%PDF- %PDF-
Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/ |
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/Searcher.php |
<?php namespace CirrusSearch; use Elastica; use CirrusSearch; use CirrusSearch\Extra\Query\SourceRegex; use CirrusSearch\Query\QueryHelper; use CirrusSearch\Search\Escaper; use CirrusSearch\Search\Filters; use CirrusSearch\Search\FullTextResultsType; use CirrusSearch\Search\ResultsType; use CirrusSearch\Search\RescoreBuilder; use CirrusSearch\Search\SearchContext; use CirrusSearch\Search\SearchTextQueryBuilderFactory; use GeoData\Coord; use Language; use MediaWiki\Logger\LoggerFactory; use MediaWiki\MediaWikiServices; use MWNamespace; use ObjectCache; use SearchResultSet; use Status; use Title; use UsageException; use User; /** * Performs searches using Elasticsearch. Note that each instance of this class * is single use only. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */ class Searcher extends ElasticsearchIntermediary { const SUGGESTION_HIGHLIGHT_PRE = '<em>'; const SUGGESTION_HIGHLIGHT_POST = '</em>'; const HIGHLIGHT_PRE = '<span class="searchmatch">'; const HIGHLIGHT_POST = '</span>'; const HIGHLIGHT_REGEX = '/<span class="searchmatch">.*?<\/span>/'; const MORE_LIKE_THESE_NONE = 0; const MORE_LIKE_THESE_ONLY_WIKIBASE = 1; /** * Maximum title length that we'll check in prefix and keyword searches. * Since titles can be 255 bytes in length we're setting this to 255 * characters. */ const MAX_TITLE_SEARCH = 255; /** * Maximum length, in characters, allowed in queries sent to searchText. */ const MAX_TEXT_SEARCH = 300; /** * Maximum offset + limit depth allowed. As in the deepest possible result * to return. Too deep will cause very slow queries. 10,000 feels plenty * deep. This should be <= index.max_result_window in elasticsearch. */ const MAX_OFFSET_LIMIT = 10000; /** * @var integer search offset */ private $offset; /** * @var integer maximum number of result */ private $limit; /** * @var Language language of the wiki */ private $language; /** * @var ResultsType|null type of results. null defaults to FullTextResultsType */ private $resultsType; /** * @var string sort type */ private $sort = 'relevance'; /** * @var string index base name to use */ private $indexBaseName; /** * @var Escaper escapes queries */ private $escaper; /** * @var boolean just return the array that makes up the query instead of searching */ private $returnQuery = false; /** * @var boolean return raw Elasticsearch result instead of processing it */ private $returnResult = false; /** * @var boolean return explanation with results */ private $returnExplain = false; /** * Search environment configuration * @var SearchConfig */ protected $config; /** * @var SearchContext */ protected $searchContext; /** * Constructor * @param Connection $conn * @param int $offset Offset the results by this much * @param int $limit Limit the results to this many * @param SearchConfig|null $config Configuration settings * @param int[]|null $namespaces Array of namespace numbers to search or null to search all namespaces. * @param User|null $user user for which this search is being performed. Attached to slow request logs. * @param string|boolean $index Base name for index to search from, defaults to $wgCirrusSearchIndexBaseName */ public function __construct( Connection $conn, $offset, $limit, SearchConfig $config = null, array $namespaces = null, User $user = null, $index = false ) { if ( is_null( $config ) ) { // @todo connection has an embedded config ... reuse that? somehow should // at least ensure they are the same. $config = MediaWikiServices::getInstance() ->getConfigFactory() ->makeConfig( 'CirrusSearch' ); } parent::__construct( $conn, $user, $config->get( 'CirrusSearchSlowSearch' ), $config->get( 'CirrusSearchExtraBackendLatency' ) ); $this->config = $config; $this->offset = $offset; if ( $offset + $limit > self::MAX_OFFSET_LIMIT ) { $this->limit = self::MAX_OFFSET_LIMIT - $offset; } else { $this->limit = $limit; } $this->indexBaseName = $index ?: $config->get( SearchConfig::INDEX_BASE_NAME ); $this->language = $config->get( 'ContLang' ); $this->escaper = new Escaper( $config->get( 'LanguageCode' ), $config->get( 'CirrusSearchAllowLeadingWildcard' ) ); $this->searchContext = new SearchContext( $this->config, $namespaces ); } /** * @param ResultsType $resultsType results type to return */ public function setResultsType( $resultsType ) { $this->resultsType = $resultsType; } /** * @param boolean $returnQuery just return the array that makes up the query instead of searching */ public function setReturnQuery( $returnQuery ) { $this->returnQuery = $returnQuery; } /** * @param boolean $dumpResult return raw Elasticsearch result instead of processing it */ public function setDumpResult( $dumpResult ) { $this->returnResult = $dumpResult; } /** * @param boolean $returnExplain return query explanation */ public function setReturnExplain( $returnExplain ) { $this->returnExplain = $returnExplain; } /** * Set the type of sort to perform. Must be 'relevance', 'title_asc', 'title_desc'. * @param string $sort sort type */ public function setSort( $sort ) { $this->sort = $sort; } /** * Should this search limit results to the local wiki? If not called the default is false. * @param boolean $limitSearchToLocalWiki should the results be limited? */ public function limitSearchToLocalWiki( $limitSearchToLocalWiki ) { $this->searchContext->setLimitSearchToLocalWiki( $limitSearchToLocalWiki ); } /** * Perform a "near match" title search which is pretty much a prefix match without the prefixes. * @param string $search text by which to search * @return Status status containing results defined by resultsType on success */ public function nearMatchTitleSearch( $search ) { $this->checkTitleSearchRequestLength( $search ); // Elasticsearch seems to have trouble extracting the proper terms to highlight // from the default query we make so we feed it exactly the right query to highlight. $highlightQuery = new \Elastica\Query\MultiMatch(); $highlightQuery->setQuery( $search ); $highlightQuery->setFields( [ 'title.near_match', 'redirect.title.near_match', 'title.near_match_asciifolding', 'redirect.title.near_match_asciifolding', ] ); if ( $this->config->getElement( 'CirrusSearchAllFields', 'use' ) ) { // Instead of using the highlight query we need to make one like it that uses the all_near_match field. $allQuery = new \Elastica\Query\MultiMatch(); $allQuery->setQuery( $search ); $allQuery->setFields( [ 'all_near_match', 'all_near_match.asciifolding' ] ); $this->searchContext->addFilter( $allQuery ); } else { $this->searchContext->addFilter( $highlightQuery ); } $this->searchContext->setHighlightQuery( $highlightQuery ); $this->searchContext->setSearchType( 'near_match' ); return $this->search( $search ); } /** * Perform a prefix search. * @param string $search text by which to search * @return Status status containing results defined by resultsType on success */ public function prefixSearch( $search ) { $this->checkTitleSearchRequestLength( $search ); $this->searchContext->setSearchType( 'prefix' ); if ( strlen( $search ) > 0 ) { if ( $this->config->get( 'CirrusSearchPrefixSearchStartsWithAnyWord' ) ) { $match = new \Elastica\Query\Match(); $match->setField( 'title.word_prefix', [ 'query' => $search, 'analyzer' => 'plain', 'operator' => 'and', ] ); $this->searchContext->addFilter( $match ); } else { // Elasticsearch seems to have trouble extracting the proper terms to highlight // from the default query we make so we feed it exactly the right query to highlight. $query = new \Elastica\Query\MultiMatch(); $query->setQuery( $search ); $weights = $this->config->get( 'CirrusSearchPrefixWeights' ); $query->setFields( [ 'title.prefix^' . $weights[ 'title' ], 'redirect.title.prefix^' . $weights[ 'redirect' ], 'title.prefix_asciifolding^' . $weights[ 'title_asciifolding' ], 'redirect.title.prefix_asciifolding^' . $weights[ 'redirect_asciifolding' ], ] ); $this->searchContext->setMainQuery( $query ); } } /** @suppress PhanDeprecatedFunction */ $this->searchContext->setBoostLinks( true ); return $this->search( $search ); } /** * @param string $suggestPrefix prefix to be prepended to suggestions */ public function addSuggestPrefix( $suggestPrefix ) { $this->searchContext->addSuggestPrefix( $suggestPrefix ); } /** * Search articles with provided term. * @param string $term term to search * @param boolean $showSuggestion should this search suggest alternative searches that might be better? * @return Status status containing results defined by resultsType on success */ public function searchText( $term, $showSuggestion ) { $checkLengthStatus = $this->checkTextSearchRequestLength( $term ); if ( !$checkLengthStatus->isOK() ) { return $checkLengthStatus; } // save original term for logging $originalTerm = $term; $term = Util::stripQuestionMarks( $term, $this->config->get( 'CirrusSearchStripQuestionMarks' ) ); // Transform Mediawiki specific syntax to filters and extra (pre-escaped) query string $this->searchContext->setSearchType( 'full_text' ); $builderProfile = $this->config->get( 'CirrusSearchFullTextQueryBuilderProfile' ); $builderSettings = $this->config->getElement( 'CirrusSearchFullTextQueryBuilderProfiles', $builderProfile ); $qb = new $builderSettings['builder_class']( $this->config, $this->escaper, [ // Handle title prefix notation new Query\PrefixFeature( $this->connection ), // Handle prefer-recent keyword new Query\PreferRecentFeature( $this->config ), // Handle local keyword new Query\LocalFeature(), // Handle insource keyword using regex new Query\RegexInSourceFeature( $this->config ), // Handle neartitle, nearcoord keywords, and their boosted alternates new Query\GeoFeature(), // Handle boost-templates keyword new Query\BoostTemplatesFeature(), // Handle hastemplate keyword new Query\HasTemplateFeature(), // Handle linksto keyword new Query\LinksToFeature(), // Handle incategory keyword new Query\InCategoryFeature( $this->config ), // Handle non-regex insource keyword new Query\SimpleInSourceFeature( $this->escaper ), // Handle intitle keyword new Query\InTitleFeature( $this->escaper ), ], $builderSettings['settings'] ); $showSuggestion = $showSuggestion && $this->offset == 0 && $this->config->get( 'CirrusSearchEnablePhraseSuggest' ); $qb->build( $this->searchContext, $term, $showSuggestion ); if ( !$this->searchContext->areResultsPossible() ) { return Status::newGood( new SearchResultSet( true ) ); } $result = $this->search( $originalTerm ); if ( !$result->isOK() && $this->isParseError( $result ) ) { if ( $qb->buildDegraded( $this->searchContext ) ) { // If that doesn't work we're out of luck but it should. There no guarantee it'll work properly // with the syntax we've built above but it'll do _something_ and we'll still work on fixing all // the parse errors that come in. $result = $this->search( $term ); } } return $result; } /** * Find articles that contain similar text to the provided title array. * @param Title[] $titles array of titles of articles to search for * @param int $options bitset of options: * MORE_LIKE_THESE_NONE * MORE_LIKE_THESE_ONLY_WIKIBASE - filter results to only those containing wikibase items * @return Status<ResultSet> */ public function moreLikeTheseArticles( array $titles, $options = Searcher::MORE_LIKE_THESE_NONE ) { sort( $titles, SORT_STRING ); $docIds = []; $likeDocs = []; foreach ( $titles as $title ) { $docId = $this->config->makeId( $title->getArticleID() ); $docIds[] = $docId; $likeDocs[] = [ '_id' => $docId ]; } // If no fields has been set we return no results. // This can happen if the user override this setting with field names that // are not allowed in $this->config->get( 'CirrusSearchMoreLikeThisAllowedFields (see Hooks.php) if( !$this->config->get( 'CirrusSearchMoreLikeThisFields' ) ) { return Status::newGood( new SearchResultSet( true ) /* empty */ ); } // more like this queries are quite expensive and are suspected to be // triggering latency spikes. This allows redirecting more like this // queries to a different cluster $cluster = $this->config->get( 'CirrusSearchMoreLikeThisCluster' ); if ( $cluster ) { $this->connection = Connection::getPool( $this->config, $cluster ); } $this->searchContext->addSyntaxUsed( 'more_like' ); $this->searchContext->setSearchType( 'more_like' ); $moreLikeThisFields = $this->config->get( 'CirrusSearchMoreLikeThisFields' ); $moreLikeThisUseFields = $this->config->get( 'CirrusSearchMoreLikeThisUseFields' ); sort( $moreLikeThisFields ); $query = new \Elastica\Query\MoreLikeThis(); $query->setParams( $this->config->get( 'CirrusSearchMoreLikeThisConfig' ) ); $query->setFields( $moreLikeThisFields ); // The 'all' field cannot be retrieved from _source // We have to extract the text content before. if( in_array( 'all', $moreLikeThisFields ) ) { $moreLikeThisUseFields = false; } if ( !$moreLikeThisUseFields && $moreLikeThisFields != [ 'text' ] ) { // Run a first pass to extract the text field content because we want to compare it // against other fields. $text = []; $found = $this->get( $docIds, [ 'text' ] ); if ( !$found->isOK() ) { return $found; } $found = $found->getValue(); if ( count( $found ) === 0 ) { // If none of the pages are in the index we can't find articles like them return Status::newGood( new SearchResultSet() /* empty */ ); } foreach ( $found as $foundArticle ) { $text[] = $foundArticle->text; } sort( $text, SORT_STRING ); $likeDocs = array_merge( $likeDocs, $text ); } /** @suppress PhanTypeMismatchArgument library is mis-annotated */ $query->setLike( $likeDocs ); $this->searchContext->setMainQuery( $query ); if ( $options & Searcher::MORE_LIKE_THESE_ONLY_WIKIBASE ) { $this->searchContext->addFilter( new \Elastica\Query\Exists( 'wikibase_item' ) ); } // highlight snippets are not great so it's worth running a match all query // to save cpu cycles $this->searchContext->setHighlightQuery( new \Elastica\Query\MatchAll() ); return $this->search( implode( ', ', $titles ), $this->config->get( 'CirrusSearchMoreLikeThisTTL' ) ); } /** * Get the page with $docId. Note that the result is a status containing _all_ pages found. * It is possible to find more then one page if the page is in multiple indexes. * @param string[] $docIds array of document ids * @param string[]|true|false $sourceFiltering source filtering to apply * @return Status containing pages found, containing an empty array if not found, * or an error if there was an error */ public function get( array $docIds, $sourceFiltering ) { $indexType = $this->connection->pickIndexTypeForNamespaces( $this->searchContext->getNamespaces() ); // The worst case would be to have all ids duplicated in all available indices. // We set the limit accordingly $size = count ( $this->connection->getAllIndexSuffixesForNamespaces( $this->searchContext->getNamespaces() )); $size *= count( $docIds ); return Util::doPoolCounterWork( $this->getPoolCounterType(), $this->user, function() use ( $docIds, $sourceFiltering, $indexType, $size ) { try { $this->start( "get of {indexType}.{docIds}", [ 'indexType' => $indexType, 'docIds' => $docIds, 'queryType' => 'get', ] ); // Shard timeout not supported on get requests so we just use the client side timeout $this->connection->setTimeout( $this->getTimeout() ); // We use a search query instead of _get/_mget, these methods are // theorically well suited for this kind of job but they are not // supported on aliases with multiple indices (content/general) $pageType = $this->connection->getPageType( $this->indexBaseName, $indexType ); $query = new \Elastica\Query( new \Elastica\Query\Ids( null, $docIds ) ); $query->setParam( '_source', $sourceFiltering ); $query->addParam( 'stats', 'get' ); // We ignore limits provided to the searcher // otherwize we could return fewer results than // the ids requested. $query->setFrom( 0 ); $query->setSize( $size ); $resultSet = $pageType->search( $query, [ 'search_type' => 'query_then_fetch' ] ); return $this->success( $resultSet->getResults() ); } catch ( \Elastica\Exception\NotFoundException $e ) { // NotFoundException just means the field didn't exist. // It is up to the caller to decide if that is an error. return $this->success( [] ); } catch ( \Elastica\Exception\ExceptionInterface $e ) { return $this->failure( $e ); } }); } /** * @param string $name * @return Status */ public function findNamespace( $name ) { return Util::doPoolCounterWork( 'CirrusSearch-NamespaceLookup', $this->user, function() use ( $name ) { try { $this->start( "lookup namespace for {namespaceName}", [ 'namespaceName' => $name, 'query' => $name, 'queryType' => 'namespace', ] ); $queryOptions = [ 'search_type' => 'query_then_fetch', 'timeout' => $this->getTimeout(), ]; $this->connection->setTimeout( $queryOptions['timeout'] ); $pageType = $this->connection->getNamespaceType( $this->indexBaseName ); $match = new \Elastica\Query\Match(); $match->setField( 'name', $name ); $query = new \Elastica\Query( $match ); $query->setParam( '_source', false ); $query->addParam( 'stats', 'namespace' ); $resultSet = $pageType->search( $query, $queryOptions ); // @todo check for partial results due to timeout? return $this->success( $resultSet->getResults() ); } catch ( \Elastica\Exception\ExceptionInterface $e ) { return $this->failure( $e ); } }); } /** * Powers full-text-like searches including prefix search. * * @param string $for * @param int $cacheTTL Cache results into ObjectCache for $cacheTTL seconds * @return Status results from the query transformed by the resultsType */ private function search( $for, $cacheTTL = 0 ) { if ( $this->limit <= 0 && ! $this->returnQuery ) { if ( $this->returnResult ) { return Status::newGood( [ 'description' => 'Canceled due to offset out of bounds', 'path' => '', 'result' => [], ] ); } else { return Status::newGood( $this->resultsType->createEmptyResult() ); } } if ( $this->resultsType === null ) { $this->resultsType = new FullTextResultsType( FullTextResultsType::HIGHLIGHT_ALL ); } $query = new Elastica\Query(); $query->setParam( '_source', $this->resultsType->getSourceFiltering() ); $query->setParam( 'fields', $this->resultsType->getFields() ); $extraIndexes = []; $namespaces = $this->searchContext->getNamespaces(); $indexType = $this->connection->pickIndexTypeForNamespaces( $namespaces ); if ( $namespaces ) { $extraIndexes = $this->getAndFilterExtraIndexes(); $this->searchContext->addFilter( new \Elastica\Query\Terms( 'namespace', $namespaces ) ); } $this->installBoosts(); $query->setQuery( $this->searchContext->getQuery() ); $highlight = $this->searchContext->getHighlight( $this->resultsType ); if ( $highlight ) { $query->setHighlight( $highlight ); } if ( $this->searchContext->getSuggest() ) { if ( interface_exists( 'Elastica\\ArrayableInterface' ) ) { // Elastica 2.3.x. For some reason it unwraps our suggest // query when we don't want it to, so wrap it one more time // to make the unwrap do nothing. $query->setParam( 'suggest', [ 'suggest' => $this->searchContext->getSuggest() ] ); } else { $query->setParam( 'suggest', $this->searchContext->getSuggest() ); } $query->addParam( 'stats', 'suggest' ); } if( $this->offset ) { $query->setFrom( $this->offset ); } if( $this->limit ) { $query->setSize( $this->limit ); } if ( $this->sort != 'relevance' ) { // Clear rescores if we aren't using relevance as the search sort because they aren't used. $this->searchContext->clearRescore(); } elseif ( $this->searchContext->hasRescore() ) { $query->setParam( 'rescore', $this->searchContext->getRescore() ); } $query->addParam( 'stats', $this->searchContext->getSearchType() ); switch ( $this->sort ) { case 'relevance': break; // The default case 'title_asc': $query->setSort( [ 'title.keyword' => 'asc' ] ); break; case 'title_desc': $query->setSort( [ 'title.keyword' => 'desc' ] ); break; case 'incoming_links_asc': $query->setSort( [ 'incoming_links' => [ 'order' => 'asc', 'missing' => '_first', ] ] ); break; case 'incoming_links_desc': $query->setSort( [ 'incoming_links' => [ 'order' => 'desc', 'missing' => '_last', ] ] ); break; default: LoggerFactory::getInstance( 'CirrusSearch' )->warning( "Invalid sort type: {sort}", [ 'sort' => $this->sort ] ); } $queryOptions = []; if ( $this->config->get( 'CirrusSearchMoreAccurateScoringMode' ) ) { $queryOptions[ 'search_type' ] = 'dfs_query_then_fetch'; } $queryOptions['timeout'] = $this->getTimeout(); $this->connection->setTimeout( $queryOptions[ 'timeout' ] ); // Setup the search $pageType = $this->connection->getPageType( $this->indexBaseName, $indexType ); $search = $pageType->createSearch( $query, $queryOptions ); foreach ( $extraIndexes as $i ) { $search->addIndex( $i ); } $description = "{queryType} search for '{query}'"; $logContext = [ 'queryType' => $this->searchContext->getSearchType(), 'query' => $for, 'limit' => $this->limit ?: null, // null means not requested, '' means not found. If found // parent::buildLogContext will replace the '' with an // actual suggestion. 'suggestion' => $this->searchContext->getSuggest() ? '' : null, ]; if ( $this->returnQuery ) { return Status::newGood( [ 'description' => $this->formatDescription( $description, $logContext ), 'path' => $search->getPath(), 'params' => $search->getOptions(), 'query' => $query->toArray(), 'options' => $queryOptions, ] ); } if ( $this->returnExplain ) { $query->setExplain( true ); } if ( $this->returnResult || $this->returnExplain ) { // don't cache debugging queries $cacheTTL = 0; } $requestStats = MediaWikiServices::getInstance()->getStatsdDataFactory(); if ( $cacheTTL > 0 ) { $cache = ObjectCache::getLocalClusterInstance(); $key = $cache->makeKey( 'cirrussearch', 'search', md5( $search->getPath() . serialize( $search->getOptions() ) . serialize( $query->toArray() ) . serialize( $this->resultsType ) ) ); $cacheResult = $cache->get( $key ); $type = $this->searchContext->getSearchType(); if ( $cacheResult ) { $requestStats->increment("CirrusSearch.query_cache.$type.hit"); $this->successViaCache( $description, $logContext ); return $cacheResult; } else { $requestStats->increment("CirrusSearch.query_cache.$type.miss"); } } // Perform the search $result = Util::doPoolCounterWork( $this->getPoolCounterType(), $this->user, function() use ( $search, $description, $logContext ) { try { $this->start( $description, $logContext ); return $this->success( $search->search() ); } catch ( \Elastica\Exception\ExceptionInterface $e ) { return $this->failure( $e ); } }, function( $error, $key, $userName ) use ( $description, $logContext ) { $forUserName = $userName ? "for {userName} " : ''; LoggerFactory::getInstance( 'CirrusSearch' )->warning( /** @suppress PhanTypeMismatchArgument phan doesn't understand array addition */ "Pool error {$forUserName}on key {key} during $description: {error}", $logContext + [ 'userName' => $userName, 'key' => 'key', 'error' => $error ] ); if ( $error === 'pool-queuefull' ) { if ( strpos( $key, 'nowait:CirrusSearch:_per_user' ) === 0 ) { $loggedIn = $this->user->isLoggedIn() ? 'logged-in' : 'anonymous'; return Status::newFatal( "cirrussearch-too-busy-for-you-{$loggedIn}-error" ); } if ( $this->searchContext->getSearchType() === 'regex' ) { return Status::newFatal( 'cirrussearch-regex-too-busy-error' ); } return Status::newFatal( 'cirrussearch-too-busy-error' ); } return Status::newFatal( 'cirrussearch-backend-error' ); }); if ( $result->isOK() ) { $responseData = $result->getValue()->getResponse()->getData(); if ( $this->returnResult ) { return Status::newGood( [ 'description' => $this->formatDescription( $description, $logContext ), 'path' => $search->getPath(), 'result' => $responseData, ] ); } $result->setResult( true, $this->resultsType->transformElasticsearchResult( $this->searchContext, $result->getValue() ) ); $isPartialResult = false; if ( isset( $responseData['timed_out'] ) && $responseData[ 'timed_out' ] ) { $isPartialResult = true; LoggerFactory::getInstance( 'CirrusSearch' )->warning( "$description timed out and only returned partial results!", $logContext ); if ( $result->getValue()->numRows() === 0 ) { return Status::newFatal( 'cirrussearch-backend-error' ); } else { $result->warning( 'cirrussearch-timed-out' ); } } if ( $cacheTTL > 0 && !$isPartialResult ) { /** @suppress PhanUndeclaredVariable */ $requestStats->increment("CirrusSearch.query_cache.$type.set"); /** @suppress PhanUndeclaredVariable */ $cache->set( $key, $result, $cacheTTL ); } } return $result; } /** * Retrieve the extra indexes for our searchable namespaces, if any * exist. If they do exist, also add our wiki to our notFilters so * we can filter out duplicates properly. * * @return string[] */ protected function getAndFilterExtraIndexes() { if ( $this->searchContext->getLimitSearchToLocalWiki() ) { return []; } $extraIndexes = OtherIndexes::getExtraIndexesForNamespaces( $this->searchContext->getNamespaces() ); if ( $extraIndexes ) { $this->searchContext->addNotFilter( new \Elastica\Query\Term( [ 'local_sites_with_dupe' => $this->indexBaseName ] ) ); } return $extraIndexes; } /** * If there is any boosting to be done munge the the current query to get it right. */ private function installBoosts() { if ( $this->sort !== 'relevance' ) { // Boosts are irrelevant if you aren't sorting by, well, relevance return; } $builder = new RescoreBuilder( $this->searchContext ); $this->searchContext->mergeRescore( $builder->build() ); } /** * @param string $search * @throws UsageException */ private function checkTitleSearchRequestLength( $search ) { $requestLength = mb_strlen( $search ); if ( $requestLength > self::MAX_TITLE_SEARCH ) { throw new UsageException( 'Prefix search request was longer than the maximum allowed length.' . " ($requestLength > " . self::MAX_TITLE_SEARCH . ')', 'request_too_long', 400 ); } } /** * @param string $search * @return Status */ private function checkTextSearchRequestLength( $search ) { $requestLength = mb_strlen( $search ); if ( $requestLength > self::MAX_TEXT_SEARCH && // allow category intersections longer than the maximum strpos( $search, 'incategory:' ) === false ) { return Status::newFatal( 'cirrussearch-query-too-long', $this->language->formatNum( $requestLength ), $this->language->formatNum( self::MAX_TEXT_SEARCH ) ); } return Status::newGood(); } /** * Attempt to suck a leading namespace followed by a colon from the query string. Reaches out to Elasticsearch to * perform normalized lookup against the namespaces. Should be fast but for the network hop. * * @param string &$query */ public function updateNamespacesFromQuery( &$query ) { $colon = strpos( $query, ':' ); if ( $colon === false ) { return; } $namespaceName = substr( $query, 0, $colon ); $status = $this->findNamespace( $namespaceName ); // Failure case is already logged so just handle success case if ( !$status->isOK() ) { return; } $foundNamespace = $status->getValue(); if ( !$foundNamespace ) { return; } $foundNamespace = $foundNamespace[ 0 ]; $query = substr( $query, $colon + 1 ); $this->searchContext->setNamespaces( [ $foundNamespace->getId() ] ); } /** * Perform a quick and dirty replacement for $this->description * when it's not going through monolog. It replaces {foo} with * the value from $context['foo']. * * @param string $input String to perform replacement on * @param array $context patterns and their replacements * @return string $input with replacements from $context performed */ private function formatDescription( $input, $context ) { $pairs = []; foreach ( $context as $key => $value ) { $pairs['{' . $key . '}'] = $value; } return strtr( $input, $pairs ); } /** * @return SearchContext */ public function getSearchContext() { return $this->searchContext; } private function getPoolCounterType() { $poolCounterTypes = array( 'regex' => 'CirrusSearch-Regex', 'prefix' => 'CirrusSearch-Prefix', ); if ( isset( $poolCounterTypes[$this->searchContext->getSearchType()] ) ) { return $poolCounterTypes[$this->searchContext->getSearchType()]; } return 'CirrusSearch-Search'; } private function getTimeout() { if ( $this->searchContext->getSearchType() === 'regex' ) { $type = 'regex'; } else { $type = 'default'; } return $this->config->getElement( 'CirrusSearchSearchShardTimeout', $type ); } }