%PDF- %PDF-
Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/Maintenance/ |
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/Maintenance/MetaStoreIndex.php |
<?php namespace CirrusSearch\Maintenance; use CirrusSearch\Connection; use Elastica\Client; /** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */ /** * Utility class to manage a multipurpose metadata storage index for cirrus. * This store is used to store persistent states related to administrative * tasks (index settings upgrade, frozen indices, ...). */ class MetaStoreIndex { /** * @const int major version, increment when adding an incompatible change * to settings or mappings */ const METASTORE_MAJOR_VERSION = 0; /** * @const int minor version increment only when adding a new field to * an existing mapping or a new mapping */ const METASTORE_MINOR_VERSION = 2; /** * @const string the doc id used to store version information related * to the meta store itself. This value is not supposed to be changed. */ const METASTORE_VERSION_DOCID = 'metastore_version'; /** * @const string index name */ const INDEX_NAME = 'mw_cirrus_metastore'; /** * @const string previous index name (bc code) */ const OLD_INDEX_NAME = 'mw_cirrus_versions'; /** * @const string type for storing version tracking info */ const VERSION_TYPE = 'version'; /** * @const string type for storing sanitze jobs tracking info */ const SANITIZE_TYPE = 'sanitize'; /** * @const string type for storing frozen indices tracking info */ const FROZEN_TYPE = 'frozen'; /** * @const string type for storing internal data */ const INTERNAL_TYPE = 'internal'; /** * @var Connection */ private $connection; /** * @var \Elastica\Client */ private $client; /** * @var Maintenance|null initiator maintenance script */ private $out; /** * @var string master operation timeout */ private $masterTimeout; /** * @var ConfigUtils */ private $configUtils; /** * @param Connection $connection * @param Maintenance $out * @param $masterTimeout int */ public function __construct( Connection $connection, Maintenance $out, $masterTimeout = '10000s' ) { $this->connection = $connection; $this->client = $connection->getClient(); $this->configUtils = new ConfigUtils( $this->client, $out ); $this->out = $out; $this->masterTimeout = $masterTimeout; } public function createOrUpgradeIfNecessary() { $this->fixOldName(); // If the mw_cirrus_metastore alias still not exists it // means we need to create everything from scratch. if ( !$this->client->getIndex( self::INDEX_NAME )->exists() ) { $this->log( self::INDEX_NAME . " missing creating.\n" ); $newIndex = $this->createNewIndex(); $this->switchAliasTo( $newIndex ); } else { list( $major, $minor ) = $this->metastoreVersion(); if ( $major < self::METASTORE_MAJOR_VERSION ) { $this->log( self::INDEX_NAME . " major version mismatch upgrading.\n" ); $this->majorUpgrade(); } elseif( $major == self::METASTORE_MAJOR_VERSION && $minor < self::METASTORE_MINOR_VERSION ) { $this->log( self::INDEX_NAME . " minor version mismatch trying to upgrade mapping.\n" ); $this->minorUpgrade(); } elseif ( $major > self::METASTORE_MAJOR_VERSION || $minor > self::METASTORE_MINOR_VERSION ) { throw new \Exception( "Metastore version $major.$minor found, cannot upgrade to a lower version: " . self::METASTORE_MAJOR_VERSION . "." . self::METASTORE_MINOR_VERSION ); } } } /** * Create a new metastore index. * @param string $suffix index suffix * @return \Elastica\Index the newly created index */ public function createNewIndex( $suffix = 'first' ) { $name = self::INDEX_NAME . '_' . $suffix; $this->log( "Creating metastore index... $name" ); // Don't forget to update METASTORE_MAJOR_VERSION when changing something // in the settings $settings = [ 'number_of_shards' => 1, 'auto_expand_replicas' => '0-2' ]; $args = [ 'settings' => $settings, 'mappings' => $this->buildMapping(), ]; // @todo utilize $this->getIndex()->create(...) once it supports setting // the master_timeout parameter. $index = $this->client->getIndex( $name ); $index->request( '', \Elastica\Request::PUT, $args, [ 'master_timeout' => $this->masterTimeout ] ); $this->log( " ok\n" ); $this->configUtils->waitForGreen( $index->getName(), 3600 ); $this->storeMetastoreVersion( $index ); return $index; } /** * Increment : * - self:METASTORE_MAJOR_VERSION for incompatible changes * - self:METASTORE_MINOR_VERSION when adding new field or new mappings * @return array[] the mapping */ private function buildMapping() { return [ self::VERSION_TYPE => [ 'properties' => [ 'analysis_maj' => [ 'type' => 'long', 'include_in_all' => false ], 'analysis_min' => [ 'type' => 'long', 'include_in_all' => false ], 'mapping_maj' => [ 'type' => 'long', 'include_in_all' => false ], 'mapping_min' => [ 'type' => 'long', 'include_in_all' => false ], 'shard_count' => [ 'type' => 'long', 'include_in_all' => false ], ], ], self::FROZEN_TYPE => [ 'properties' => [], ], self::SANITIZE_TYPE => [ 'properties' => [ 'sanitize_job_wiki' => [ 'type' => 'string' ], 'sanitize_job_created' => [ 'type' => 'date', 'format' => 'epoch_second', ], 'sanitize_job_updated' => [ 'type' => 'date', 'format' => 'epoch_second', ], 'sanitize_job_last_loop' => [ 'type' => 'date', 'format' => 'epoch_second', ], 'sanitize_job_cluster' => [ 'type' => 'string' ], 'sanitize_job_id_offset' => [ 'type' => 'long' ], 'sanitize_job_ids_sent' => [ 'type' => 'long' ], 'sanitize_job_jobs_sent' => [ 'type' => 'long' ], 'sanitize_job_jobs_sent_total' => [ 'type' => 'long' ], ], ], self::INTERNAL_TYPE => [ 'properties' => [ 'metastore_major_version' => [ 'type' => 'integer' ], 'metastore_minor_version' => [ 'type' => 'integer' ], ], ], ]; } private function minorUpgrade() { $index = $this->connection->getIndex( self::INDEX_NAME ); foreach( $this->buildMapping() as $type => $mapping ) { $index->getType( $type )->request( '_mapping', \Elastica\Request::PUT, $mapping, [ 'master_timeout' => $this->masterTimeout, ] ); } $this->storeMetastoreVersion( $index ); } /** * Switch the mw_cirrus_metastore alias to this new index name. * @param \Elastica\Index $index */ private function switchAliasTo( $index ) { $name = $index->getName(); $oldIndexName = $this->getAliasedIndexName(); if ( $oldIndexName !== null ) { $this->log( "Switching " . self::INDEX_NAME . " alias from $oldIndexName to $name.\n" ); } else { $this->log( "Creating " . self::INDEX_NAME . " alias to $name.\n" ); } if ( $oldIndexName == $name ) { throw new \Exception( "Cannot switch aliases old and new index names are identical: $name" ); } // Create the alias $path = '_aliases'; $data = [ 'actions' => [ [ 'add' => [ 'index' => $name, 'alias' => self::INDEX_NAME, ] ], ] ]; if ( $oldIndexName !== null ) { $data['actions'][] = [ 'remove' => [ 'index' => $oldIndexName, 'alias' => self::INDEX_NAME, ] ]; } $this->client->request( $path, \Elastica\Request::POST, $data, [ 'master_timeout' => $this->masterTimeout ] ); if ( $oldIndexName !== null ) { $this->log( "Deleting old index $oldIndexName\n" ); $this->connection->getIndex( $oldIndexName )->delete(); } } /** * @return string|null the current index behind the self::INDEX_NAME * alias or null if the alias does not exist */ private function getAliasedIndexName() { $resp = $this->client->request( '_aliases/' . self::INDEX_NAME, \Elastica\Request::GET, [] ); $indexName = null; foreach( $resp->getData() as $index => $aliases ) { if ( isset( $aliases['aliases'][self::INDEX_NAME] ) ) { if ( $indexName !== null ) { throw new \Exception( "Multiple indices are aliased with " . self::INDEX_NAME . ", please fix manually." ); } $indexName = $index; } } return $indexName; } private function majorUpgrade() { $plugins = $this->configUtils->scanAvailableModules(); if ( !array_search( 'reindex', $plugins ) ) { throw new \Exception( "The reindex module is mandatory to upgrade the metastore" ); } $index = $this->createNewIndex( (string) time() ); // Reindex everything except the internal type, it's not clear // yet if we just need to filter the metastore version info or // the whole internal type. Currently we only use the internal // type for storing the metastore version. $reindex = [ 'source' => [ 'index' => self::INDEX_NAME, 'query' => [ 'bool' => [ 'must_not' => [ 'type' => [ 'value' => self::INTERNAL_TYPE ] ], ] ], ], 'dest' => [ 'index' => $index->getName() ], ]; // reindex is extremely fast so we can wait for it // we might consider using the task manager if this process // becomes longer and/or prone to curl timeouts $resp = $this->client->request( '_reindex', \Elastica\Request::POST, $reindex, [ 'wait_for_completion' => true ] ); $index->refresh(); $this->switchAliasTo( $index ); } /** * BC strategy to reuse mw_cirrus_versions as the new mw_cirrus_metastore * If mw_cirrus_versions exists with no mw_cirrus_metastore */ private function fixOldName() { if ( !$this->client->getIndex( self::OLD_INDEX_NAME )->exists() ) { return; } // Old mw_cirrus_versions exists, if mw_cirrus_metastore alias does not // exist we must create it if ( !$this->client->getIndex( self::INDEX_NAME )->exists() ) { $this->log( "Adding transition alias to " . self::OLD_INDEX_NAME . "\n" ); // Old one exists but new one does not // we need to create an alias $index = $this->client->getIndex( self::OLD_INDEX_NAME ); $this->switchAliasTo( $index ); // The version check (will return 0.0 for // mw_cirrus_versions) should schedule an minor or // major upgrade. } } /** * @return int[] major, minor version */ public function metastoreVersion() { return self::getMetastoreVersion( $this->connection ); } /** * @return int[] major, minor version */ public function runtimeVersion() { return [ self::METASTORE_MAJOR_VERSION, self::METASTORE_MINOR_VERSION ]; } /** * @param \Elastica\Index $index new index */ private function storeMetastoreVersion( $index ) { $index->getType( self::INTERNAL_TYPE )->addDocument( new \Elastica\Document( self::METASTORE_VERSION_DOCID, [ 'metastore_major_version' => self::METASTORE_MAJOR_VERSION, 'metastore_minor_version' => self::METASTORE_MINOR_VERSION, ] ) ); } /** * @param string $msg log message */ private function log( $msg ) { if ($this->out ) { $this->out->output( $msg ); } } /** * Get the version tracking index type * @return \Elastica\Type */ public function versionType() { return self::getVersionType( $this->connection ); } /** * Get the frozen indices tracking index type * @return \Elastica\Type $type */ public function frozenType() { return self::getFrozenType( $this->connection ); } /** * Get the sanitize tracking index type * @return \Elastica\Type $type */ public function sanitizeType() { return self::getSanitizeType( $this->connection ); } /** * Get the internal index type * @return \Elastica\Type $type */ private function internalType() { return self::getInternalType( $this->connection ); } /** * Get the version tracking index type * @param Connection $connection * @return \Elastica\Type $type */ public static function getVersionType( Connection $connection ) { return $connection->getIndex( self::INDEX_NAME )->getType( self::VERSION_TYPE ); } /** * Get the sanitize tracking index type * @param Connection $connection * @return \Elastica\Type $type */ public static function getSanitizeType( Connection $connection ) { return $connection->getIndex( self::INDEX_NAME )->getType( self::SANITIZE_TYPE ); } /** * Get the frozen indices tracking index type * @param Connection $connection * @return \Elastica\Type $type */ public static function getFrozenType( Connection $connection ) { return $connection->getIndex( self::INDEX_NAME )->getType( self::FROZEN_TYPE ); } /** * Get the sanitize tracking index type * @param Connection $connection * @return \Elastica\Type $type */ private static function getInternalType( Connection $connection ) { return $connection->getIndex( self::INDEX_NAME )->getType( self::INTERNAL_TYPE ); } /** * Check if cirrus is ready by checking if some indices have been created on this cluster * @param Connection $connection * @return bool */ public static function cirrusReady( Connection $connection ) { return $connection->getIndex( self::INDEX_NAME )->exists() || $connection->getIndex( self::OLD_INDEX_NAME )->exists(); } /** * @param Connection $connection * @return int[] the major and minor version of the meta store * [0, 0] means that the metastore has never been created */ public static function getMetastoreVersion( Connection $connection ) { try { $doc = self::getInternalType( $connection )->getDocument( self::METASTORE_VERSION_DOCID ); } catch ( \Elastica\Exception\NotFoundException $e ) { return [ 0, 0 ]; } catch( \Elastica\Exception\ResponseException $e ) { // BC code in case the metastore alias does not exist yet $fullError = $e->getResponse()->getFullError(); if ( isset( $fullError['type'] ) && $fullError['type'] === 'index_not_found_exception' && isset( $fullError['index'] ) && $fullError['index'] === self::INDEX_NAME ) { return [ 0, 0 ]; } throw $e; } return [ (int) $doc->get('metastore_major_version'), (int) $doc->get('metastore_minor_version') ]; } }