%PDF- %PDF-
Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/maintenance/ |
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/maintenance/updateSuggesterIndex.php |
<?php namespace CirrusSearch\Maintenance; use CirrusSearch\Connection; use CirrusSearch\DataSender; use CirrusSearch\ElasticsearchIntermediary; use CirrusSearch\Util; use CirrusSearch\BuildDocument\SuggestBuilder; use CirrusSearch\BuildDocument\SuggestScoringMethodFactory; use CirrusSearch\BuildDocument\SuggestScoringMethod; use CirrusSearch\Maintenance\Validators\AnalyzersValidator; use CirrusSearch\SearchConfig; use Elastica; use Elastica\Index; use Elastica\Query; use Elastica\Request; use Elastica\Status; use MWElasticUtils; /** * Update the search configuration on the search backend for the title * suggest index. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */ $IP = getenv( 'MW_INSTALL_PATH' ); if( $IP === false ) { $IP = __DIR__ . '/../../..'; } require_once( "$IP/maintenance/Maintenance.php" ); require_once( __DIR__ . '/../includes/Maintenance/Maintenance.php' ); class UpdateSuggesterIndex extends Maintenance { /** * @var string language code we're building for */ private $langCode; /** * @var int */ private $indexChunkSize; /** * @var int */ private $indexRetryAttempts; /** * @var string */ private $indexTypeName; /** * @var string */ private $indexBaseName; /** * @var string */ private $indexIdentifier; /** * @var string the score method name to use. */ private $scoreMethodName; /** * @var SuggestScoringMethod the score function to use. */ private $scoreMethod; /** * @var Index old suggester index that will be deleted at the end of the process */ private $oldIndex; /** * @var int */ private $lastProgressPrinted; /** * @var boolean optimize the index when done. */ private $optimizeIndex; /** * @var array */ protected $maxShardsPerNode; /** * @var array(String) list of available plugins */ private $availablePlugins; /** * @var boolean index geo contextualized suggestions */ private $withGeo; /** * @var string */ private $masterTimeout; /** * @var ConfigUtils */ private $utils; /** * @todo: public (used in closure) * @var SuggestBuilder */ public $builder; /** * @var AnalysisConfigBuilder */ private $analysisConfigBuilder; /** * @var bool */ private $recycle = false; /** * @var string[] */ private $bannedPlugins; public function __construct() { parent::__construct(); $this->addDescription( "Create a new suggester index. Always operates on a single cluster." ); $this->addOption( 'baseName', 'What basename to use for all indexes, ' . 'defaults to wiki id', false, true ); $this->addOption( 'indexChunkSize', 'Documents per shard to index in a batch. ' . 'Note when changing the number of shards that the old shard size is used, not the new ' . 'one. If you see many errors submitting documents in bulk but the automatic retry as ' . 'singles works then lower this number. Defaults to 100.', false, true ); $this->addOption( 'indexRetryAttempts', 'Number of times to back off and retry ' . 'per failure. Note that failures are not common but if Elasticsearch is in the process ' . 'of moving a shard this can time out. This will retry the attempt after some backoff ' . 'rather than failing the whole reindex process. Defaults to 5.', false, true ); $this->addOption( 'optimize', 'Optimize the index to 1 segment. Defaults to false.', false, false ); $this->addOption( 'with-geo', 'Build geo contextualized suggestions. Defaults to false.', false, false ); $this->addOption( 'scoringMethod', 'The scoring method to use when computing suggestion weights. ' . 'Defaults to $wgCirrusSearchCompletionDefaultScore or quality if unset.', false, true ); $this->addOption( 'masterTimeout', 'The amount of time to wait for the master to respond to mapping ' . 'updates before failing. Defaults to $wgCirrusSearchMasterTimeout.', false, true ); $this->addOption( 'replicationTimeout', 'The amount of time (seconds) to wait for the replica shards to initialize. ' . 'Defaults to 3600 seconds.', false, true ); $this->addOption( 'allocationIncludeTag', 'Set index.routing.allocation.include.tag on the created index. ' . 'Useful if you want to force the suggester index not to be allocated on a specific set of nodes.', false, true ); $this->addOption( 'allocationExcludeTag', 'Set index.routing.allocation.exclude.tag on the created index. ' . 'Useful if you want to force the suggester index not to be allocated on a specific set of nodes.', false, true ); } public function execute() { global $wgLanguageCode, $wgCirrusSearchBannedPlugins, $wgCirrusSearchMasterTimeout, $wgCirrusSearchMaxShardsPerNode, $wgCirrusSearchCompletionDefaultScore; $this->disablePoolCountersAndLogging(); $this->masterTimeout = $this->getOption( 'masterTimeout', $wgCirrusSearchMasterTimeout ); $this->indexTypeName = Connection::TITLE_SUGGEST_TYPE; // Check that all shards and replicas settings are set try { $this->getShardCount(); $this->getReplicaCount(); } catch( \Exception $e ) { $this->error( "Failed to get shard count and replica count information: {$e->getMessage()}", 1 ); } $this->indexBaseName = $this->getOption( 'baseName', $this->getSearchConfig()->get( SearchConfig::INDEX_BASE_NAME ) ); $this->indexChunkSize = $this->getOption( 'indexChunkSize', 100 ); $this->indexRetryAttempts = $this->getOption( 'reindexRetryAttempts', 5 ); $this->optimizeIndex = $this->getOption( 'optimize', false ); $this->withGeo = $this->getOption( 'with-geo', false ); $this->utils = new ConfigUtils( $this->getClient(), $this); $this->langCode = $wgLanguageCode; $this->bannedPlugins = $wgCirrusSearchBannedPlugins; $this->availablePlugins = $this->utils->scanAvailablePlugins( $this->bannedPlugins ); $this->analysisConfigBuilder = $this->pickAnalyzer( $this->langCode, $this->availablePlugins ); $this->utils->checkElasticsearchVersion(); $this->maxShardsPerNode = isset( $wgCirrusSearchMaxShardsPerNode[ $this->indexTypeName ] ) ? $wgCirrusSearchMaxShardsPerNode[ $this->indexTypeName ] : 'unlimited'; $this->scoreMethodName = $this->getOption( 'scoringMethod', $wgCirrusSearchCompletionDefaultScore ); $this->scoreMethod = SuggestScoringMethodFactory::getScoringMethod( $this->scoreMethodName ); $this->builder = new SuggestBuilder( $this->scoreMethod, $this->withGeo ); try { // If the version does not exist it's certainly because nothing has been indexed. if ( !MetaStoreIndex::cirrusReady( $this->getConnection() ) ) { throw new \Exception("Cirrus meta sotre does not exist, you must index your data first"); } if ( !$this->canWrite() ) { $this->error( 'Index/Cluster is frozen. Giving up.', 1 ); } # check for broken indices and delete them $this->checkAndDeleteBrokenIndices(); if ( !$this->canRecycle() ) { $this->rebuild(); } else { $this->recycle(); } } catch ( \Elastica\Exception\Connection\HttpException $e ) { $message = $e->getMessage(); $this->log( "\nUnexpected Elasticsearch failure.\n" ); $this->error( "Http error communicating with Elasticsearch: $message.\n", 1 ); } catch ( \Elastica\Exception\ExceptionInterface $e ) { $type = get_class( $e ); $message = ElasticsearchIntermediary::extractMessage( $e ); /** @suppress PhanUndeclaredMethod ExceptionInterface has no methods */ $trace = $e->getTraceAsString(); $this->log( "\nUnexpected Elasticsearch failure.\n" ); $this->error( "Elasticsearch failed in an unexpected way. This is always a bug in CirrusSearch.\n" . "Error type: $type\n" . "Message: $message\n" . "Trace:\n" . $trace, 1 ); } } /** * Check the frozen indices * @return true if the cluster/index is not frozen, false otherwise. */ private function canWrite() { // Reuse DataSender even if we don't send anything with it. $sender = new DataSender( $this->getConnection(), $this->getSearchConfig() ); return $sender->areIndexesAvailableForWrites( [ $this->getIndexTypeName() ] ); } /** * Check for duplicate indices that may have been created * by a previous update that failed. */ private function checkAndDeleteBrokenIndices() { $indices = $this->utils->getAllIndicesByType( $this->getIndexTypeName() ); if ( count( $indices ) < 2 ) { return; } $indexByName = []; foreach( $indices as $name ) { $indexByName[$name] = $this->getConnection()->getIndex( $name ); } $status = new Status($this->getClient()); foreach ( $status->getIndicesWithAlias( $this->getIndexTypeName() ) as $aliased ) { // do not try to delete indices that are used in aliases unset( $indexByName[$aliased->getName()] ); } foreach ( $indexByName as $name => $index ) { # double check with stats $stats = $index->getStats()->getData(); // Extra check: if stats report usages we should not try to fix things // automatically. if ( $stats['_all']['total']['suggest']['total'] == 0 ) { $this->log( "Deleting broken index {$index->getName()}\n" ); $this->deleteIndex( $index ); } else { $this->log( "Broken index {$index->getName()} appears to be in use, please check and delete.\n" ); } } # If something went wrong the process will fail when calling pickIndexIdentifierFromOption } private function rebuild() { $oldIndexIdentifier = $this->utils->pickIndexIdentifierFromOption( 'current', $this->getIndexTypeName() ); $this->oldIndex = $this->getConnection()->getIndex( $this->indexBaseName, $this->indexTypeName, $oldIndexIdentifier ); $this->indexIdentifier = $this->utils->pickIndexIdentifierFromOption( 'now', $this->getIndexTypeName() ); $this->createIndex(); $this->indexData(); if ( $this->optimizeIndex ) { $this->optimize(); } $this->enableReplicas(); $this->getIndex()->refresh(); $this->validateAlias(); $this->updateVersions(); $this->deleteOldIndex(); $this->log("Done.\n"); } private function canRecycle() { global $wgCirrusSearchRecycleCompletionSuggesterIndex; if ( !$wgCirrusSearchRecycleCompletionSuggesterIndex ) { return false; } $oldIndexIdentifier = $this->utils->pickIndexIdentifierFromOption( 'current', $this->getIndexTypeName() ); $oldIndex = $this->getConnection()->getIndex( $this->indexBaseName, $this->indexTypeName, $oldIndexIdentifier ); if ( ! $oldIndex->exists() ) { $this->error( 'Index does not exist yet cannot recycle.' ); return false; } $refresh = $oldIndex->getSettings()->getRefreshInterval(); if ( $refresh != '-1' ) { $this->error( 'Refresh interval is not -1, cannot recycle.' ); return false; } $shards = $oldIndex->getSettings()->get( 'number_of_shards' ); // We check only the number of shards since it cannot be updated. if( $shards != $this->getShardCount() ) { $this->error( 'Number of shards mismatch cannot recycle.' ); return false; } list( $mMaj ) = explode( '.', \CirrusSearch\Maintenance\SuggesterMappingConfigBuilder::VERSION ); list( $aMaj ) = explode( '.', \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder::VERSION ); try { $versionDoc = MetaStoreIndex::getVersionType( $this->getConnection() )->getDocument( $this->getIndexTypeName() ); } catch( \Elastica\Exception\NotFoundException $nfe ) { $this->error( 'Index missing in mw_cirrus_metastore::version, cannot recycle.' ); return false; } if ( $versionDoc->analysis_maj != $aMaj ) { $this->error( 'Analysis config version mismatch, cannot recycle.' ); return false; } if ( $versionDoc->mapping_maj != $mMaj ) { $this->error( 'Mapping config version mismatch, cannot recycle.' ); return false; } $validator = new AnalyzersValidator( $oldIndex, $this->analysisConfigBuilder, $this ); $status = $validator->validate(); if ( !$status->isOK() ) { $this->error( 'Analysis config differs, cannot recycle.' ); return false; } return true; } /** * Recycle a suggester index: * 1/ index data (delete docs if it already exists) * 2/ expunge deleted docs * 3/ refresh the reader * - so we can run a quick delete on remaining docs * (the docs that were actually deleted) * - drawbacks we load the FST from an un-optimized index * 4/ delete old docs * 5/ optimize * 6/ refresh the reader * * Drawbacks: the FST will be read from disk twice in a short * amount of time. * This is a trade off between cluster operation and disk operation. * Recreating the index may require less disk operations but causes * the cluster to rebalance. * This is certainly the best strategy for small indices (less than 100k docs) * but needs to be carefully tested on bigger indices with high QPS. */ private function recycle() { $this->log( "Recycling index {$this->getIndex()->getName()}\n"); $this->recycle = true; $this->indexData(); // This is fragile... hopefully most of the docs will be deleted from the old segments // and will result in a fast operation. // New segments should not be affected. // Unfortunately if a failure causes the process to stop // the FST will maybe contains duplicates as it cannot (elastic 1.7) // filter deleted docs. We will rely on output deduplication // but this will certainly affect performances. $this->expungeDeletes(); // Refresh the reader so we can scroll over remaining docs. // At this point we may read the new un-optimized FST segments // Old ones should be pretty small after expungeDeletes $this->getIndex()->refresh(); $boolNot = new Elastica\Query\BoolQuery(); $boolNot->addMustNot( new Elastica\Query\Term( [ "batch_id" => $this->builder->getBatchId() ] ) ); $bool = new Elastica\Query\BoolQuery(); $bool->addFilter( $boolNot ); $query = new Elastica\Query(); $query->setQuery( $bool ); $query->setFields( [ '_id' ] ); $scrollOptions = [ 'search_type' => 'scan', 'scroll' => "15m", 'size' => $this->indexChunkSize ]; $result = $this->getIndex()->search( $query, $scrollOptions ); $totalDocsInIndex = $result->getResponse()->getData(); $totalDocsInIndex = $totalDocsInIndex['hits']['total']; $totalDocsToDump = $totalDocsInIndex; $this->log( "Deleting remaining docs from previous batch ($totalDocsInIndex).\n" ); MWElasticUtils::iterateOverScroll( $this->getIndex(), $result->getResponse()->getScrollId(), '15m', function( $results ) use ( &$docsDumped, $totalDocsToDump ) { $docIds = []; foreach( $results as $result ) { $docsDumped++; $docIds[] = $result->getId(); } $this->outputProgress( $docsDumped, $totalDocsToDump ); MWElasticUtils::withRetry( $this->indexRetryAttempts, function() use ( $docIds ) { $this->getType()->deleteIds( $docIds ); } ); }, 0, $this->indexRetryAttempts ); $this->log( "Done.\n" ); // Old docs should be deleted now we can optimize and flush $this->optimize(); // @todo add support for changing the number of replicas // if the setting was changed in cirrus config. // Workaround is to change the settings directly on the cluster. // Refresh the reader so it now uses the optimized FST, // and actually free and delete old segments. $this->getIndex()->refresh(); } private function deleteOldIndex() { if ( $this->oldIndex && $this->oldIndex->exists() ) { $this->log("Deleting " . $this->oldIndex->getName() . " ... "); // @todo Utilize $this->oldIndex->delete(...) once Elastica library is updated // to allow passing the master_timeout $this->oldIndex->request( '', Request::DELETE, [], [ 'master_timeout' => $this->masterTimeout ] ); $this->output("ok.\n"); } } /** * Delete an index * @param \Elastica\Index $index */ private function deleteIndex( \Elastica\Index $index ) { // @todo Utilize $this->oldIndex->delete(...) once Elastica library is updated // to allow passing the master_timeout $index->request( '', Request::DELETE, [], [ 'master_timeout' => $this->masterTimeout ] ); } private function optimize() { $this->log("Optimizing index..."); $this->getIndex()->optimize( [ 'max_num_segments' => 1 ] ); $this->output("ok.\n"); } private function expungeDeletes() { $this->log("Purging deleted docs..."); $this->getIndex()->optimize( [ 'only_expunge_deletes' => true, 'flush' => false ] ); $this->output("ok.\n"); } private function indexData() { // We build the suggestions by reading CONTENT and GENERAL indices. // This does not support extra indices like FILES on commons. $sourceIndexTypes = [ Connection::CONTENT_INDEX_TYPE, Connection::GENERAL_INDEX_TYPE ]; // Indices to use for counting max_docs used by scoring functions // Since we work mostly on the content namespace it seems OK to count // only docs in the CONTENT index. $countIndices = [ Connection::CONTENT_INDEX_TYPE ]; $query = new Query(); $query->setFields( [ '_id', '_type', '_source' ] ); $query->setSource( [ 'include' => $this->builder->getRequiredFields() ] ); $pageAndNs = new Elastica\Query\BoolQuery(); $pageAndNs->addShould( new Elastica\Query\Term( [ "namespace" => NS_MAIN ] ) ); $pageAndNs->addShould( new Elastica\Query\Term( [ "redirect.namespace" => NS_MAIN ] ) ); $pageAndNs->addMust( new Elastica\Query\Type( Connection::PAGE_TYPE_NAME ) ); $bool = new Elastica\Query\BoolQuery(); $bool->addFilter( $pageAndNs ); $query->setQuery( $bool ); // Run a first query to count the number of docs. // This is needed for the scoring methods that need // to normalize values against wiki size. $mSearch = new \Elastica\Multi\Search( $this->getClient() ); foreach ( $countIndices as $sourceIndexType ) { $search = new \Elastica\Search( $this->getClient() ); $search->addIndex( $this->getConnection()->getIndex( $this->indexBaseName, $sourceIndexType ) ); $search->setOption( \Elastica\Search::OPTION_SEARCH_TYPE, \Elastica\Search::OPTION_SEARCH_TYPE_COUNT ); $mSearch->addSearch( $search ); } $mSearchRes = $mSearch->search(); $total = 0; foreach( $mSearchRes as $res ) { $total += $res->getTotalHits(); } $this->log( "Setting max_docs to $total\n" ); $this->scoreMethod->setMaxDocs( $total ); foreach( $sourceIndexTypes as $sourceIndexType ) { $scrollOptions = [ 'search_type' => 'scan', 'scroll' => "15m", 'size' => $this->indexChunkSize ]; $sourceIndex = $this->getConnection()->getIndex( $this->indexBaseName, $sourceIndexType ); $result = $sourceIndex->search( $query, $scrollOptions ); $totalDocsInIndex = $result->getResponse()->getData(); $totalDocsInIndex = $totalDocsInIndex['hits']['total']; $totalDocsToDump = $totalDocsInIndex; $docsDumped = 0; $this->log( "Indexing $totalDocsToDump documents from $sourceIndexType ($totalDocsInIndex in the index) with batchId: {$this->builder->getBatchId()} and scoring method: {$this->scoreMethodName}\n" ); $destinationType = $this->getIndex()->getType( Connection::TITLE_SUGGEST_TYPE_NAME ); MWElasticUtils::iterateOverScroll( $sourceIndex, $result->getResponse()->getScrollId(), '15m', function( $results ) use ( &$docsDumped, $totalDocsToDump, $destinationType ) { $inputDocs = []; foreach ( $results as $result ) { $docsDumped++; $inputDocs[] = [ 'id' => $result->getId(), 'source' => $result->getSource() ]; } $suggestDocs = $this->builder->build( $inputDocs ); $this->outputProgress( $docsDumped, $totalDocsToDump ); MWElasticUtils::withRetry( $this->indexRetryAttempts, function() use ( $destinationType, $suggestDocs ) { $destinationType->addDocuments( $suggestDocs ); } ); }, 0, $this->indexRetryAttempts ); $this->log( "Indexing from $sourceIndexType index done.\n" ); } } public function validateAlias() { // @todo utilize the following once Elastica is updated to support passing // master_timeout. This is a copy of the Elastica\Index::addAlias() method // $this->getIndex()->addAlias( $this->getIndexTypeName(), true ); $index = $this->getIndex(); $name = $this->getIndexTypeName(); $path = '_aliases'; $data = ['actions' => []]; $status = new Status($index->getClient()); foreach ($status->getIndicesWithAlias($name) as $aliased) { $data['actions'][] = ['remove' => ['index' => $aliased->getName(), 'alias' => $name]]; } $data['actions'][] = ['add' => ['index' => $index->getName(), 'alias' => $name]]; $index->getClient()->request($path, Request::POST, $data, [ 'master_timeout' => $this->masterTimeout ] ); } /** * public because php 5.3 does not support accessing private * methods in a closure. * @param int $docsDumped * @param int $limit */ public function outputProgress( $docsDumped, $limit ) { if ( $docsDumped <= 0 ) { return; } $pctDone = (int) ( ( $docsDumped / $limit ) * 100 ); if ( $this->lastProgressPrinted == $pctDone ) { return; } $this->lastProgressPrinted = $pctDone; if ( ( $pctDone % 2 ) == 0 ) { $this->outputIndented( "\t$pctDone% done...\n" ); } } public function log( $message, $channel = NULL ) { $date = new \DateTime(); parent::output( $date->format('Y-m-d H:i:s') . " " . $message, $channel ); } /** * @param string $langCode * @param array $availablePlugins * @return AnalysisConfigBuilder */ private function pickAnalyzer( $langCode, array $availablePlugins = [] ) { $analysisConfigBuilder = new \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder( $langCode, $availablePlugins ); $this->outputIndented( 'Picking analyzer...' . $analysisConfigBuilder->getDefaultTextAnalyzerType() . "\n" ); return $analysisConfigBuilder; } private function createIndex() { $maxShardsPerNode = $this->maxShardsPerNode === 'unlimited' ? -1 : $this->maxShardsPerNode; // This is "create only" for now. if ( $this->getIndex()->exists() ) { throw new \Exception( "Index already exists." ); } $mappingConfigBuilder = new SuggesterMappingConfigBuilder(); // We create the index with 0 replicas, this is faster and will // stress less nodes with 4 shards and 2 replicas we would // stress 12 nodes (moreover with the optimize flag) $settings = [ 'number_of_shards' => $this->getShardCount(), // hacky but we still use auto_expand_replicas // for convenience on small install. 'auto_expand_replicas' => "0-0", 'refresh_interval' => -1, 'analysis' => $this->analysisConfigBuilder->buildConfig(), 'routing.allocation.total_shards_per_node' => $maxShardsPerNode, ]; if ( $this->hasOption( 'allocationIncludeTag' ) ) { $this->output( "Using routing.allocation.include.tag: {$this->getOption( 'allocationIncludeTag' )}, " . "the index might be stuck in red if the cluster is not properly configured.\n" ); $settings['routing.allocation.include.tag'] = $this->getOption( 'allocationIncludeTag' ); } if ( $this->hasOption( 'allocationExcludeTag' ) ) { $this->output( "Using routing.allocation.exclude.tag: {$this->getOption( 'allocationExcludeTag' )}, " . "the index might be stuck in red if the cluster is not properly configured.\n" ); $settings['routing.allocation.exclude.tag'] = $this->getOption( 'allocationExcludeTag' ); } $args = [ 'settings' => $settings, 'mappings' => $mappingConfigBuilder->buildConfig() ]; // @todo utilize $this->getIndex()->create(...) once it supports setting // the master_timeout parameter. $this->getIndex()->request( '', Request::PUT, $args, [ 'master_timeout' => $this->masterTimeout ] ); // Index create is async, we have to make sure that the index is ready // before sending any docs to it. $this->waitForGreen(); } private function enableReplicas() { $this->log("Enabling replicas...\n"); $args = [ 'index' => [ 'auto_expand_replicas' => $this->getReplicaCount(), ], ]; $path = $this->getIndex()->getName() . "/_settings"; $this->getIndex()->getClient()->request( $path, Request::PUT, $args, [ 'master_timeout' => $this->masterTimeout ] ); // The previous call seems to be async, let's wait few sec // otherwise replication won't have time to start. sleep( 20 ); // Index will be yellow while replica shards are being allocated. $this->waitForGreen( $this->getOption( 'replicationTimeout', 3600 ) ); } private function waitForGreen( $timeout = 600 ) { $this->log( "Waiting for the index to go green...\n" ); // Wait for the index to go green ( default 10 min) if ( !$this->utils->waitForGreen( $this->getIndex()->getName(), $timeout ) ) { $this->error( "Failed to wait for green... please check config and delete the {$this->getIndex()->getName()} index if it was created.", 1 ); } } /** * @return string Number of replicas this index should have. May be a range such as '0-2' */ private function getReplicaCount() { return $this->getConnection()->getSettings()->getReplicaCount( $this->indexTypeName ); } private function getShardCount() { return $this->getConnection()->getSettings()->getShardCount( $this->indexTypeName ); } private function updateVersions() { $this->log( "Updating tracking indexes..." ); $index = MetaStoreIndex::getVersionType( $this->getConnection() ); if ( !$index->exists() ) { throw new \Exception("meta store does not exist, you must index your data first"); } list( $aMaj, $aMin ) = explode( '.', \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder::VERSION ); list( $mMaj, $mMin ) = explode( '.', \CirrusSearch\Maintenance\SuggesterMappingConfigBuilder::VERSION ); $doc = new \Elastica\Document( $this->getIndexTypeName(), [ 'analysis_maj' => $aMaj, 'analysis_min' => $aMin, 'mapping_maj' => $mMaj, 'mapping_min' => $mMin, 'shard_count' => $this->getShardCount(), ] ); $index->addDocument( $doc ); $this->output("ok.\n"); } /** * @return \Elastica\Index being updated */ public function getIndex() { return $this->getConnection()->getIndex( $this->indexBaseName, $this->indexTypeName, $this->indexIdentifier ); } /** * @return \Elastica\Type */ public function getType() { return $this->getIndex()->getType( Connection::TITLE_SUGGEST_TYPE_NAME ); } /** * @return Elastica\Client */ protected function getClient() { return $this->getConnection()->getClient(); } /** * @return string name of the index type being updated */ protected function getIndexTypeName() { return $this->getConnection()->getIndexName( $this->indexBaseName, $this->indexTypeName ); } } $maintClass = UpdateSuggesterIndex::class; require_once RUN_MAINTENANCE_IF_MAIN;