%PDF- %PDF-
| Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/maintenance/ |
| Current File : //www/varak.net/wiki.varak.net/extensions/CirrusSearch/maintenance/updateOneSearchIndexConfig.php |
<?php
namespace CirrusSearch\Maintenance;
use CirrusSearch\Connection;
use CirrusSearch\ElasticsearchIntermediary;
use CirrusSearch\Maintenance\Metastore;
use CirrusSearch\SearchConfig;
use CirrusSearch\Util;
use Elastica;
/**
* Update the search configuration on the search backend.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*/
$IP = getenv( 'MW_INSTALL_PATH' );
if( $IP === false ) {
$IP = __DIR__ . '/../../..';
}
require_once( "$IP/maintenance/Maintenance.php" );
require_once( __DIR__ . '/../includes/Maintenance/Maintenance.php' );
/**
* Update the elasticsearch configuration for this index.
*/
class UpdateOneSearchIndexConfig extends Maintenance {
/**
* @var string
*/
private $indexType;
/**
* @var bool Are we going to blow the index away and start from scratch?
*/
private $startOver;
/**
* @var int
*/
private $reindexChunkSize;
/**
* @var int
*/
private $reindexRetryAttempts;
/**
* @var string
*/
private $indexBaseName;
/**
* @var string
*/
private $indexIdentifier;
/**
* @var bool
*/
private $reindexAndRemoveOk;
/**
* @var boolean are there too few replicas in the index we're making?
*/
private $tooFewReplicas = false;
/**
* @var int number of processes to use when reindexing
*/
private $reindexProcesses;
/**
* @var string language code we're building for
*/
private $langCode;
/**
* @var bool prefix search on any term
*/
private $prefixSearchStartsWithAny;
/**
* @var bool use suggestions on text fields
*/
private $phraseSuggestUseText;
/**
* @var bool print config as it is being checked
*/
private $printDebugCheckConfig;
/**
* @var float how much can the reindexed copy of an index is allowed to deviate from the current
* copy without triggering a reindex failure
*/
private $reindexAcceptableCountDeviation;
/**
* @var AnalysisConfigBuilder the builder for analysis config
*/
private $analysisConfigBuilder;
/**
* @var array(String) list of available plugins
*/
private $availablePlugins;
/**
* @var array
*/
protected $bannedPlugins;
/**
* @var bool
*/
protected $optimizeIndexForExperimentalHighlighter;
/**
* @var array
*/
protected $maxShardsPerNode;
/**
* @var int
*/
protected $refreshInterval;
/**
* @var string
*/
protected $masterTimeout;
public function __construct() {
parent::__construct();
$this->addDescription( "Update the configuration or contents of one search index. This always operates on a single cluster." );
$this->addOption( 'indexType', 'Index to update. Either content or general.', true, true );
self::addSharedOptions( $this );
}
/**
* @param Maintenance $maintenance
* @suppress PhanAccessMethodProtected Phan incorrectly thinks we can't call protected methods
* on other Maintenance classes.
*/
public static function addSharedOptions( $maintenance ) {
$maintenance->addOption( 'startOver', 'Blow away the identified index and rebuild it with ' .
'no data.' );
$maintenance->addOption( 'indexIdentifier', "Set the identifier of the index to work on. " .
"You'll need this if you have an index in production serving queries and you have " .
"to alter some portion of its configuration that cannot safely be done without " .
"rebuilding it. Once you specify a new indexIdentifier for this wiki you'll have to " .
"run this script with the same identifier each time. Defaults to 'current' which " .
"infers the currently in use identifier. You can also use 'now' to set the identifier " .
"to the current time in seconds which should give you a unique identifier.", false, true);
$maintenance->addOption( 'reindexAndRemoveOk', "If the alias is held by another index then " .
"reindex all documents from that index (via the alias) to this one, swing the " .
"alias to this index, and then remove other index. Updates performed while this".
"operation is in progress will be queued up in the job queue. Defaults to false." );
$maintenance->addOption( 'reindexProcesses', 'Number of processes to use in reindex. ' .
'Not supported on Windows. Defaults to 1 on Windows and 5 otherwise.', false, true );
$maintenance->addOption( 'reindexAcceptableCountDeviation', 'How much can the reindexed ' .
'copy of an index is allowed to deviate from the current copy without triggering a ' .
'reindex failure. Defaults to 5%.', false, true );
$maintenance->addOption( 'reindexChunkSize', 'Documents per shard to reindex in a batch. ' .
'Note when changing the number of shards that the old shard size is used, not the new ' .
'one. If you see many errors submitting documents in bulk but the automatic retry as ' .
'singles works then lower this number. Defaults to 100.', false, true );
$maintenance->addOption( 'reindexRetryAttempts', 'Number of times to back off and retry ' .
'per failure. Note that failures are not common but if Elasticsearch is in the process ' .
'of moving a shard this can time out. This will retry the attempt after some backoff ' .
'rather than failing the whole reindex process. Defaults to 5.', false, true );
$maintenance->addOption( 'baseName', 'What basename to use for all indexes, ' .
'defaults to wiki id', false, true );
$maintenance->addOption( 'debugCheckConfig', 'Print the configuration as it is checked ' .
'to help debug unexpected configuration mismatches.' );
$maintenance->addOption( 'justCacheWarmers', 'Just validate that the cache warmers are correct ' .
'and perform no additional checking. Use when you need to apply new cache warmers but ' .
"want to be sure that you won't apply any other changes at an inopportune time." );
$maintenance->addOption( 'justAllocation', 'Just validate the shard allocation settings. Use ' .
"when you need to apply new cache warmers but want to be sure that you won't apply any other " .
'changes at an inopportune time.' );
$maintenance->addOption( 'justMapping', 'Just try to update the mapping.' );
}
public function execute() {
global $wgLanguageCode,
$wgCirrusSearchPhraseSuggestUseText,
$wgCirrusSearchPrefixSearchStartsWithAnyWord,
$wgCirrusSearchBannedPlugins,
$wgCirrusSearchOptimizeIndexForExperimentalHighlighter,
$wgCirrusSearchMaxShardsPerNode,
$wgCirrusSearchRefreshInterval,
$wgCirrusSearchMasterTimeout;
$this->disablePoolCountersAndLogging();
$utils = new ConfigUtils( $this->getConnection()->getClient(), $this );
$this->indexType = $this->getOption( 'indexType' );
$this->startOver = $this->getOption( 'startOver', false );
$this->indexBaseName = $this->getOption( 'baseName', $this->getSearchConfig()->get( SearchConfig::INDEX_BASE_NAME ) );
$this->reindexAndRemoveOk = $this->getOption( 'reindexAndRemoveOk', false );
$this->reindexProcesses = $this->getOption( 'reindexProcesses', wfIsWindows() ? 1 : 5 );
$this->reindexAcceptableCountDeviation = Util::parsePotentialPercent(
$this->getOption( 'reindexAcceptableCountDeviation', '5%' ) );
$this->reindexChunkSize = $this->getOption( 'reindexChunkSize', 100 );
$this->reindexRetryAttempts = $this->getOption( 'reindexRetryAttempts', 5 );
$this->printDebugCheckConfig = $this->getOption( 'debugCheckConfig', false );
$this->langCode = $wgLanguageCode;
$this->prefixSearchStartsWithAny = $wgCirrusSearchPrefixSearchStartsWithAnyWord;
$this->phraseSuggestUseText = $wgCirrusSearchPhraseSuggestUseText;
$this->bannedPlugins = $wgCirrusSearchBannedPlugins;
$this->optimizeIndexForExperimentalHighlighter = $wgCirrusSearchOptimizeIndexForExperimentalHighlighter;
$this->masterTimeout = $wgCirrusSearchMasterTimeout;
$this->maxShardsPerNode = isset( $wgCirrusSearchMaxShardsPerNode[ $this->indexType ] ) ? $wgCirrusSearchMaxShardsPerNode[ $this->indexType ] : 'unlimited';
$this->refreshInterval = $wgCirrusSearchRefreshInterval;
try{
$indexTypes = $this->getConnection()->getAllIndexTypes();
if ( !in_array( $this->indexType, $indexTypes ) ) {
$this->error( 'indexType option must be one of ' .
implode( ', ', $indexTypes ), 1 );
}
$utils->checkElasticsearchVersion();
$this->availablePlugins = $utils->scanAvailablePlugins( $this->bannedPlugins );
if ( $this->getOption( 'justCacheWarmers', false ) ) {
$this->validateCacheWarmers();
return;
}
if ( $this->getOption( 'justAllocation', false ) ) {
$this->validateShardAllocation();
return;
}
if ( $this->getOption( 'justMapping', false ) ) {
$this->validateMapping();
return;
}
$this->indexIdentifier = $utils->pickIndexIdentifierFromOption( $this->getOption( 'indexIdentifier', 'current' ), $this->getIndexTypeName() );
$this->analysisConfigBuilder = $this->pickAnalyzer( $this->langCode, $this->availablePlugins );
$this->validateIndex();
$this->validateAnalyzers();
$this->validateMapping();
$this->validateCacheWarmers();
$this->validateAlias();
$this->updateVersions();
$this->indexNamespaces();
} catch ( \Elastica\Exception\Connection\HttpException $e ) {
$message = $e->getMessage();
$this->output( "\nUnexpected Elasticsearch failure.\n" );
$this->error( "Http error communicating with Elasticsearch: $message.\n", 1 );
} catch ( \Elastica\Exception\ExceptionInterface $e ) {
$type = get_class( $e );
$message = ElasticsearchIntermediary::extractMessage( $e );
/** @suppress PhanUndeclaredMethod ExceptionInterface has no methods */
$trace = $e->getTraceAsString();
$this->output( "\nUnexpected Elasticsearch failure.\n" );
$this->error( "Elasticsearch failed in an unexpected way. This is always a bug in CirrusSearch.\n" .
"Error type: $type\n" .
"Message: $message\n" .
"Trace:\n" . $trace, 1 );
}
}
/**
* @suppress PhanAccessPropertyProtected Phan has a bug where it thinks we can't
* access mOptions because its protected. That would be true but this
* class shares the hierarchy that contains mOptions so php allows it.
* @suppress PhanUndeclaredMethod runChild technically returns a
* \Maintenance instance but only \CirrusSearch\Maintenance\Maintenance
* classes have the done method. Just allow it since we know what type of
* maint class is being created
*/
private function updateVersions() {
$child = $this->runChild( Metastore::class );
$child->mOptions['index-version-basename'] = $this->indexBaseName;
$child->mOptions['update-index-version'] = true;
$child->execute();
$child->done();
}
/**
* @suppress PhanUndeclaredMethod runChild technically returns a
* \Maintenance instance but only \CirrusSearch\Maintenance\Maintenance
* classes have the done method. Just allow it since we know what type of
* maint class is being created
*/
private function indexNamespaces() {
// Only index namespaces if we're doing the general index
if ( $this->indexType === 'general' ) {
$child = $this->runChild( 'CirrusSearch\Maintenance\IndexNamespaces' );
$child->execute();
$child->done();
}
}
private function validateIndex() {
// $this->startOver || !$this->getIndex()->exists() are the conditions
// under which a new index will be created
$this->tooFewReplicas = ( $this->startOver || !$this->getIndex()->exists() ) && $this->reindexAndRemoveOk;
if ( $this->startOver ) {
$this->createIndex( true, "Blowing away index to start over..." );
} else if ( !$this->getIndex()->exists() ) {
$this->createIndex( false, "Creating index..." );
}
$this->validateIndexSettings();
}
/**
* @param bool $rebuild
* @param string $msg
*/
private function createIndex( $rebuild, $msg ) {
global $wgCirrusSearchAllFields;
$indexCreator = new \CirrusSearch\Maintenance\IndexCreator(
$this->getIndex(),
$this->analysisConfigBuilder
);
$this->outputIndented( $msg );
$status = $indexCreator->createIndex(
$rebuild,
$this->maxShardsPerNode,
$this->getShardCount(),
$this->getReplicaCount(),
$this->refreshInterval,
$this->getMergeSettings(),
$wgCirrusSearchAllFields['build']
);
if ( !$status->isOK() ) {
$this->error( $status->getMessage()->text(), 1 );
} else {
$this->output( "ok\n" );
}
}
/**
* @return \CirrusSearch\Maintenance\Validators\Validator[]
*/
private function getIndexSettingsValidators() {
$validators = [];
$validators[] = new \CirrusSearch\Maintenance\Validators\NumberOfShardsValidator( $this->getIndex(), $this->getShardCount(), $this );
$validators[] = new \CirrusSearch\Maintenance\Validators\ReplicaRangeValidator( $this->getIndex(), $this->getReplicaCount(), $this );
$validators[] = $this->getShardAllocationValidator();
$validators[] = new \CirrusSearch\Maintenance\Validators\MaxShardsPerNodeValidator( $this->getIndex(), $this->indexType, $this->maxShardsPerNode, $this );
return $validators;
}
private function validateIndexSettings() {
$validators = $this->getIndexSettingsValidators();
foreach ( $validators as $validator ) {
$status = $validator->validate();
if ( !$status->isOK() ) {
$this->error( $status->getMessage()->text(), 1 );
}
}
}
private function validateAnalyzers() {
$validator = new \CirrusSearch\Maintenance\Validators\AnalyzersValidator( $this->getIndex(), $this->analysisConfigBuilder, $this );
$validator->printDebugCheckConfig( $this->printDebugCheckConfig );
$status = $validator->validate();
if ( !$status->isOK() ) {
$this->error( $status->getMessage()->text(), 1 );
}
}
private function validateMapping() {
$validator = new \CirrusSearch\Maintenance\Validators\MappingValidator(
$this->getIndex(),
$this->masterTimeout,
$this->optimizeIndexForExperimentalHighlighter,
$this->availablePlugins,
$this->getMappingConfig(),
[ 'page' => $this->getPageType(), 'namespace' => $this->getNamespaceType() ],
$this
);
$validator->printDebugCheckConfig( $this->printDebugCheckConfig );
$status = $validator->validate();
if ( !$status->isOK() ) {
$this->error( $status->getMessage()->text(), 1 );
}
}
private function validateAlias() {
$this->outputIndented( "Validating aliases...\n" );
// Since validate the specific alias first as that can cause reindexing
// and we want the all index to stay with the old index during reindexing
$this->validateSpecificAlias();
$this->validateAllAlias();
}
/**
* Validate the alias that is just for this index's type.
*/
private function validateSpecificAlias() {
$connection = $this->getConnection();
$reindexer = new Reindexer(
$this->getSearchConfig(),
$connection,
$connection,
[ $this->getPageType() ],
[ $this->getOldPageType() ],
$this->getShardCount(),
$this->getReplicaCount(),
$this->getMergeSettings(),
$this->getMappingConfig(),
$this
);
$validator = new \CirrusSearch\Maintenance\Validators\SpecificAliasValidator(
$this->getConnection()->getClient(),
$this->getIndexTypeName(),
$this->getSpecificIndexName(),
$this->startOver,
$reindexer,
[ $this->reindexProcesses, $this->refreshInterval, $this->reindexRetryAttempts, $this->reindexChunkSize, $this->reindexAcceptableCountDeviation ],
$this->getIndexSettingsValidators(),
$this->reindexAndRemoveOk,
$this->tooFewReplicas,
$this
);
$status = $validator->validate();
if ( !$status->isOK() ) {
$this->error( $status->getMessage()->text(), 1 );
}
}
public function validateAllAlias() {
$validator = new \CirrusSearch\Maintenance\Validators\IndexAllAliasValidator( $this->getConnection()->getClient(),
$this->getIndexName(), $this->getSpecificIndexName(), $this->startOver, $this->getIndexTypeName(), $this );
$status = $validator->validate();
if ( !$status->isOK() ) {
$this->error( $status->getMessage()->text(), 1 );
}
if ( $this->tooFewReplicas ) {
$this->validateIndexSettings();
}
}
protected function validateCacheWarmers() {
global $wgCirrusSearchMainPageCacheWarmer, $wgCirrusSearchCacheWarmers;
if ( $wgCirrusSearchMainPageCacheWarmer ) {
$wgCirrusSearchCacheWarmers['content'][] = \Title::newMainPage()->getText();
}
$cacheWarmers = isset( $wgCirrusSearchCacheWarmers[$this->indexType] ) ? $wgCirrusSearchCacheWarmers[$this->indexType] : [];
$warmers = new \CirrusSearch\Maintenance\Validators\CacheWarmersValidator( $this->indexType, $this->getPageType(), $cacheWarmers, $this );
$status = $warmers->validate();
if ( !$status->isOK() ) {
$this->error( $status->getMessage()->text(), 1 );
}
}
/**
* @return \CirrusSearch\Maintenance\Validators\Validator
*/
private function getShardAllocationValidator() {
global $wgCirrusSearchIndexAllocation;
return new \CirrusSearch\Maintenance\Validators\ShardAllocationValidator( $this->getIndex(), $wgCirrusSearchIndexAllocation, $this );
}
protected function validateShardAllocation() {
$validator = $this->getShardAllocationValidator();
$status = $validator->validate();
if ( !$status->isOK() ) {
$this->error( $status->getMessage()->text(), 1 );
}
}
/**
* @param string $langCode
* @param array $availablePlugins
* @return AnalysisConfigBuilder
*/
private function pickAnalyzer( $langCode, array $availablePlugins = [] ) {
$analysisConfigBuilder = new \CirrusSearch\Maintenance\AnalysisConfigBuilder( $langCode, $availablePlugins );
$this->outputIndented( 'Picking analyzer...' .
$analysisConfigBuilder->getDefaultTextAnalyzerType() . "\n" );
return $analysisConfigBuilder;
}
/**
* @return array
*/
protected function getMappingConfig() {
$builder = new MappingConfigBuilder( $this->optimizeIndexForExperimentalHighlighter );
$configFlags = 0;
if ( $this->prefixSearchStartsWithAny ) {
$configFlags |= MappingConfigBuilder::PREFIX_START_WITH_ANY;
}
if ( $this->phraseSuggestUseText ) {
$configFlags |= MappingConfigBuilder::PHRASE_SUGGEST_USE_TEXT;
}
return $builder->buildConfig( $configFlags );
}
/**
* @return \Elastica\Index being updated
*/
public function getIndex() {
return $this->getConnection()->getIndex( $this->indexBaseName, $this->indexType, $this->indexIdentifier );
}
/**
* @return string name of the index being updated
*/
protected function getSpecificIndexName() {
return $this->getConnection()->getIndexName( $this->indexBaseName, $this->indexType, $this->indexIdentifier );
}
/**
* @return string name of the index type being updated
*/
protected function getIndexTypeName() {
return $this->getConnection()->getIndexName( $this->indexBaseName, $this->indexType );
}
/**
* @return string
*/
protected function getIndexName() {
return $this->getConnection()->getIndexName( $this->indexBaseName );
}
/**
* Get the page type being updated by the search config.
*
* @return Elastica\Type
*/
protected function getPageType() {
return $this->getIndex()->getType( Connection::PAGE_TYPE_NAME );
}
/**
* Get the namespace type being updated by the search config.
*
* @return Elastica\Type
*/
protected function getNamespaceType() {
return $this->getIndex()->getType( Connection::NAMESPACE_TYPE_NAME );
}
/**
* @return Elastica\Type
*/
protected function getOldPageType() {
return $this->getConnection()->getPageType( $this->indexBaseName, $this->indexType );
}
/**
* Get the merge settings for this index.
* @return array
*/
private function getMergeSettings() {
global $wgCirrusSearchMergeSettings;
if ( isset( $wgCirrusSearchMergeSettings[ $this->indexType ] ) ) {
return $wgCirrusSearchMergeSettings[ $this->indexType ];
}
// If there aren't configured merge settings for this index type default to the content type.
return $wgCirrusSearchMergeSettings[ 'content' ];
}
/**
* @return int Number of shards this index should have
*/
private function getShardCount() {
return $this->getConnection()->getSettings()->getShardCount( $this->indexType );
}
/**
* @return string Number of replicas this index should have. May be a range such as '0-2'
*/
private function getReplicaCount() {
return $this->getConnection()->getSettings()->getReplicaCount( $this->indexType );
}
}
$maintClass = UpdateOneSearchIndexConfig::class;
require_once RUN_MAINTENANCE_IF_MAIN;