%PDF- %PDF-
Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/Search/ |
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/Search/Result.php |
<?php namespace CirrusSearch\Search; use CirrusSearch\InterwikiSearcher; use CirrusSearch\Util; use CirrusSearch\Searcher; use MediaWiki\Logger\LoggerFactory; use MWTimestamp; use SearchResult; use Title; /** * An individual search result from Elasticsearch. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */ class Result extends SearchResult { /** @var int */ private $namespace; /** @var string */ private $titleSnippet = ''; /** @var Title|null */ private $redirectTitle = null; /** @var string */ private $redirectSnippet = ''; /** @var Title|null */ private $sectionTitle = null; /** @var string */ private $sectionSnippet = ''; /** @var string */ private $categorySnippet = ''; /** @var string */ private $textSnippet; /** @var bool */ private $isFileMatch = false; /* @var string */ private $interwiki = ''; /** @var string */ private $interwikiNamespace = ''; /** @var int */ private $wordCount; /** @var int */ private $byteSize; /** @var string */ private $timestamp; /** @var string */ private $docId; /** @var float */ private $score; /** @var array */ private $explanation; /** * Build the result. * * @param \Elastica\ResultSet $results containing all search results * @param \Elastica\Result $result containing the given search result * @param string $interwiki Interwiki prefix, if any * @param \Elastica\Result $result containing information about the result this class should represent */ public function __construct( $results, $result, $interwiki = '' ) { if ( $interwiki ) { $this->setInterwiki( $result, $interwiki ); } $this->docId = $result->getId(); $this->namespace = $result->namespace; $this->mTitle = $this->makeTitle( $result->namespace, $result->title ); if ( $this->getTitle()->getNamespace() == NS_FILE ) { $this->mImage = wfFindFile( $this->mTitle ); } $fields = $result->getFields(); // Not all results requested a word count. Just pretend we have none if so $this->wordCount = isset( $fields['text.word_count'] ) ? $fields['text.word_count'][ 0 ] : 0; $this->byteSize = $result->text_bytes; $this->timestamp = new MWTimestamp( $result->timestamp ); $highlights = $result->getHighlights(); if ( isset( $highlights[ 'title' ] ) ) { $nstext = $this->getTitle()->getNamespace() === 0 ? '' : Util::getNamespaceText( $this->getTitle() ) . ':'; $this->titleSnippet = $nstext . $this->escapeHighlightedText( $highlights[ 'title' ][ 0 ] ); } elseif ( $this->mTitle->isExternal() ) { // Interwiki searches are weird. They won't have title highlights by design, but // if we don't return a title snippet we'll get weird display results. $this->titleSnippet = $this->mTitle->getText(); } if ( !isset( $highlights[ 'title' ] ) && isset( $highlights[ 'redirect.title' ] ) ) { // Make sure to find the redirect title before escaping because escaping breaks it.... $redirects = $result->redirect; $this->redirectTitle = $this->findRedirectTitle( $highlights[ 'redirect.title' ][ 0 ], $redirects ); $this->redirectSnippet = $this->escapeHighlightedText( $highlights[ 'redirect.title' ][ 0 ] ); } $this->textSnippet = $this->escapeHighlightedText( $this->pickTextSnippet( $highlights ) ); if ( isset( $highlights[ 'heading' ] ) ) { $this->sectionSnippet = $this->escapeHighlightedText( $highlights[ 'heading' ][ 0 ] ); $this->sectionTitle = $this->findSectionTitle(); } if ( isset( $highlights[ 'category' ] ) ) { $this->categorySnippet = $this->escapeHighlightedText( $highlights[ 'category' ][ 0 ] ); } $this->score = $result->getScore(); $this->explanation = $result->getExplanation(); } /** * @param string[] $highlights * @return string */ private function pickTextSnippet( $highlights ) { // This can get skipped if there the page was sent to Elasticsearch without text. // This could be a bug or it could be that the page simply doesn't have any text. $mainSnippet = ''; if ( isset( $highlights[ 'text' ] ) ) { $mainSnippet = $highlights[ 'text' ][ 0 ]; if ( $this->containsMatches( $mainSnippet ) ) { return $mainSnippet; } } if ( isset( $highlights[ 'auxiliary_text' ] ) ) { $auxSnippet = $highlights[ 'auxiliary_text' ][ 0 ]; if ( $this->containsMatches( $auxSnippet ) ) { return $auxSnippet; } } if ( isset( $highlights[ 'file_text' ] ) ) { $fileSnippet = $highlights[ 'file_text' ][ 0 ]; if ( $this->containsMatches( $fileSnippet ) ) { $this->isFileMatch = true; return $fileSnippet; } } if ( isset( $highlights[ 'source_text.plain' ] ) ) { $sourceSnippet = $highlights[ 'source_text.plain' ][ 0 ]; if ( $this->containsMatches( $sourceSnippet ) ) { return $sourceSnippet; } } return $mainSnippet; } /** * Don't bother hitting the revision table and loading extra stuff like * that into memory like the parent does, just return if we've got an idea * about page existence. * * Protects against things like bug 61464, where a page clearly doesn't * exist anymore but we've got something stuck in the index. * * @return bool */ public function isMissingRevision() { return !$this->mTitle->isKnown(); } /** * Escape highlighted text coming back from Elasticsearch. * * @param string $snippet highlighted snippet returned from elasticsearch * @return string $snippet with html escaped _except_ highlighting pre and post tags */ private function escapeHighlightedText( $snippet ) { static $highlightPreEscaped = null, $highlightPostEscaped = null; if ( $highlightPreEscaped === null ) { $highlightPreEscaped = htmlspecialchars( Searcher::HIGHLIGHT_PRE ); $highlightPostEscaped = htmlspecialchars( Searcher::HIGHLIGHT_POST ); } return str_replace( [ $highlightPreEscaped, $highlightPostEscaped ], [ Searcher::HIGHLIGHT_PRE, Searcher::HIGHLIGHT_POST ], htmlspecialchars( $snippet ) ); } /** * Checks if a snippet contains matches by looking for HIGHLIGHT_PRE. * * @param string $snippet highlighted snippet returned from elasticsearch * @return boolean true if $snippet contains matches, false otherwise */ private function containsMatches( $snippet ) { return strpos( $snippet, Searcher::HIGHLIGHT_PRE ) !== false; } /** * Build the redirect title from the highlighted redirect snippet. * * @param string $snippet Highlighted redirect snippet * @param array[]|null $redirects Array of redirects stored as arrays with 'title' and 'namespace' keys * @return Title|null object representing the redirect */ private function findRedirectTitle( $snippet, $redirects ) { $title = $this->stripHighlighting( $snippet ); // Grab the redirect that matches the highlighted title with the lowest namespace. // That is pretty arbitrary but it prioritizes 0 over others. $best = null; if ( $redirects !== null ) { foreach ( $redirects as $redirect ) { if ( $redirect[ 'title' ] === $title && ( $best === null || $best[ 'namespace' ] > $redirect ) ) { $best = $redirect; } } } if ( $best === null ) { LoggerFactory::getInstance( 'CirrusSearch' )->warning( "Search backend highlighted a redirect ({title}) but didn't return it.", [ 'title' => $title ] ); return null; } return $this->makeTitle( $best[ 'namespace' ], $best[ 'title' ] ); } /** * @return Title */ private function findSectionTitle() { return $this->getTitle()->createFragmentTarget( Title::escapeFragmentForURL( $this->stripHighlighting( $this->sectionSnippet ) ) ); } /** * @param string $highlighted * @return string */ private function stripHighlighting( $highlighted ) { $markers = [ Searcher::HIGHLIGHT_PRE, Searcher::HIGHLIGHT_POST ]; return str_replace( $markers, '', $highlighted ); } /** * Set interwiki and interwikiNamespace properties * * @param \Elastica\Result $result containing the given search result * @param string $interwiki Interwiki prefix, if any */ private function setInterwiki( $result, $interwiki ) { $resultIndex = $result->getIndex(); $indexBase = InterwikiSearcher::getIndexForInterwiki( $interwiki ); $pos = strpos( $resultIndex, $indexBase ); if ( $pos === 0 && $resultIndex[strlen( $indexBase )] == '_' ) { $this->interwiki = $interwiki; $this->interwikiNamespace = $result->namespace_text ? $result->namespace_text : ''; } } /** * @return string */ public function getTitleSnippet() { return $this->titleSnippet; } /** * @return Title|null */ public function getRedirectTitle() { return $this->redirectTitle; } /** * @return string */ public function getRedirectSnippet() { return $this->redirectSnippet; } /** * @param array * @return string|null */ public function getTextSnippet( $terms ) { return $this->textSnippet; } /** * @return string */ public function getSectionSnippet() { return $this->sectionSnippet; } /** * @return Title|null */ public function getSectionTitle() { return $this->sectionTitle; } /** * @return string */ public function getCategorySnippet() { return $this->categorySnippet; } /** * @return int */ public function getWordCount() { return $this->wordCount; } /** * @return int */ public function getByteSize() { return $this->byteSize; } /** * @return string */ public function getTimestamp() { return $this->timestamp->getTimestamp( TS_MW ); } /** * @return bool */ public function isFileMatch() { return $this->isFileMatch; } /** * @return string */ public function getInterwikiPrefix() { return $this->interwiki; } /** * @return string */ public function getInterwikiNamespaceText() { return $this->interwikiNamespace; } /** * @return string */ public function getDocId() { return $this->docId; } /** * @return float the score */ public function getScore() { return $this->score; } /** * @return array lucene score explanation */ public function getExplanation() { return $this->explanation; } /** * Create a title. When making interwiki titles we should be providing the * namespace text as a portion of the text, rather than a namespace id, * because namespace id's are not consistent across wiki's. This * additionally prevents the local wiki from localizing the namespace text * when it should be using the localized name of the remote wiki. * * Unfortunately we don't always have the remote namespace text, such as * when handling redirects. Do the best we can in this case and take the * less-than ideal results when we don't. * * @param int $namespace * @param string $text * @return Title */ private function makeTitle( $namespace, $text ) { if ( $this->interwikiNamespace && $namespace === $this->namespace ) { return Title::makeTitle( 0, $this->interwikiNamespace . ':' . $text, '', $this->interwiki ); } else { return Title::makeTitle( $namespace, $text, '', $this->interwiki ); } } }