%PDF- %PDF-
Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/ |
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/DataSender.php |
<?php namespace CirrusSearch; use CirrusSearch\SearchConfig; use Elastica\Exception\Bulk\ResponseException; use MediaWiki\Logger\LoggerFactory; use Status; use Title; use WikiPage; /** * Handles non-maintenance write operations to the elastic search cluster. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */ class DataSender extends ElasticsearchIntermediary { const ALL_INDEXES_FROZEN_NAME = 'freeze_everything'; /** @var \Psr\Log\LoggerInterface */ private $log; /** @var \Psr\Log\LoggerInterface */ private $failedLog; /** * @var string */ private $indexBaseName; /** * @var SearchConfig */ private $searchConfig; /** * @var Connection */ public function __construct( Connection $conn, SearchConfig $config ) { parent::__construct( $conn, null, 0 ); $this->log = LoggerFactory::getInstance( 'CirrusSearch' ); $this->failedLog = LoggerFactory::getInstance( 'CirrusSearchChangeFailed' ); $this->indexBaseName = $config->get( SearchConfig::INDEX_BASE_NAME ); $this->searchConfig = $config; } /** * Disallow writes to the specified indexes. * * @param string[]|null $indexes List of index types to disallow writes to. * null means to prevent indexing in all indexes across all wikis. */ public function freezeIndexes( array $indexes = null ) { global $wgCirrusSearchUpdateConflictRetryCount; if ( $indexes === null ) { $names = [ self::ALL_INDEXES_FROZEN_NAME ]; } else { if ( count( $indexes ) === 0 ) { return; } $names = $this->indexesToIndexNames( $indexes ); } $this->log->info( "Freezing writes to: " . implode( ',', $names ) ); $documents = []; foreach ( $names as $indexName ) { $doc = new \Elastica\Document( $indexName, [ 'name' => $indexName, ] ); $doc->setDocAsUpsert( true ); $doc->setRetryOnConflict( $wgCirrusSearchUpdateConflictRetryCount ); $documents[] = $doc; } $client = $this->connection->getClient(); $type = $this->connection->getFrozenIndexNameType(); // Elasticsearch has a queue capacity of 50 so if $data // contains 50 documents it could bump up against the max. So // we chunk it and do them sequentially. foreach ( array_chunk( $documents, 30 ) as $data ) { $bulk = new \Elastica\Bulk( $client ); $bulk->setType( $type ); $bulk->addData( $data, 'update' ); $bulk->send(); } // Ensure our freeze is immediately seen (mostly for testing // purposes) $type->getIndex()->refresh(); } /** * Allow writes to the specified indexes. * * @param string[]|null $indexes List of index types to allow writes to. * null means to remove the global freeze on all indexes. Null does not * thaw indexes that were individually frozen. */ public function thawIndexes( array $indexes = null ) { if ( $indexes === null ) { $names = [ self::ALL_INDEXES_FROZEN_NAME ]; } else { if ( count( $indexes ) === 0 ) { return; } $names = $this->indexesToIndexNames( $indexes ); } $this->log->info( "Thawing writes to " . implode( ',', $names ) ); $this->connection->getFrozenIndexNameType()->deleteIds( $names ); } /** * Checks if all the specified indexes are available for writes. They might * not currently allow writes during procedures like reindexing or rolling * restarts. * * @param string[] $indexes List of index names to check for availability. * @param bool $areIndexesFullyQualified Set to true if the provided $indexes are * already fully qualified elasticsearch index names. * @return bool */ public function areIndexesAvailableForWrites( array $indexes, $areIndexesFullyQualified = false ) { if ( count( $indexes ) === 0 ) { return true; } if ( !$areIndexesFullyQualified ) { $indexes = $this->indexesToIndexNames( $indexes ); } $ids = new \Elastica\Query\Ids( null, $indexes ); $ids->addId( self::ALL_INDEXES_FROZEN_NAME ); $resp = $this->connection->getFrozenIndexNameType()->search( $ids ); if ( $resp->count() === 0 ) { return true; } else { return false; } } /** * @param string $indexType type of index to which to send $data * @param (\Elastica\Script|\Elastica\Document)[] $data documents to send * @return Status */ public function sendData( $indexType, $data ) { $documentCount = count( $data ); if ( $documentCount === 0 ) { return Status::newGood(); } if ( !$this->areIndexesAvailableForWrites( [ $indexType ] ) ) { return Status::newFatal( 'cirrussearch-indexes-frozen' ); } $exception = null; $responseSet = null; $justDocumentMissing = false; try { $pageType = $this->connection->getPageType( $this->indexBaseName, $indexType ); $this->start( "sending {numBulk} documents to the {indexType} index", [ 'numBulk' => $documentCount, 'indexType' => $indexType, 'queryType' => 'send_data_write', ] ); $bulk = new \Elastica\Bulk( $this->connection->getClient() ); $bulk->setShardTimeout( $this->searchConfig->get( 'CirrusSearchUpdateShardTimeout' ) ); $bulk->setType( $pageType ); $bulk->addData( $data, 'update' ); $responseSet = $bulk->send(); } catch ( ResponseException $e ) { $justDocumentMissing = $this->bulkResponseExceptionIsJustDocumentMissing( $e, function( $docId ) use ( $e, $indexType ) { $this->log->info( "Updating a page that doesn't yet exist in Elasticsearch: {docId}", [ 'docId' => $docId, 'indexType' => $indexType ] ); } ); if ( !$justDocumentMissing ) { $exception = $e; } } catch ( \Elastica\Exception\ExceptionInterface $e ) { $exception = $e; } $validResponse = $responseSet !== null && count( $responseSet->getBulkResponses() ) > 0; if ( $exception === null && ( $justDocumentMissing || $validResponse ) ) { $this->success(); return Status::newGood(); } else { $this->failure( $exception ); $documentIds = array_map( function( $d ) { return $d->getId(); }, $data ); $this->failedLog->warning( 'Update for doc ids: ' . implode( ',', $documentIds ), $exception ? [ 'exception' => $exception ] : [] ); return Status::newFatal( 'cirrussearch-failed-send-data' ); } } /** * Send delete requests to Elasticsearch. * * @param string[] $docIds elasticsearch document ids to delete * @param string|null $indexType index from which to delete. null means all. * @return Status */ public function sendDeletes( $docIds, $indexType = null ) { if ( $indexType === null ) { $indexes = $this->connection->getAllIndexTypes(); } else { $indexes = [ $indexType ]; } if ( !$this->areIndexesAvailableForWrites( $indexes ) ) { return Status::newFatal( 'cirrussearch-indexes-frozen' ); } $idCount = count( $docIds ); if ( $idCount !== 0 ) { try { foreach ( $indexes as $indexType ) { $this->start( "deleting {numIds} from {indexType}", [ 'numIds' => $idCount, 'indexType' => $indexType, 'queryType' => 'send_deletes', ] ); $this->connection->getPageType( $this->indexBaseName, $indexType )->deleteIds( $docIds ); $this->success(); } } catch ( \Elastica\Exception\ExceptionInterface $e ) { $this->failure( $e ); $this->failedLog->warning( 'Delete for ids: ' . implode( ',', $docIds ), [ 'exception' => $e ] ); return Status::newFatal( 'cirrussearch-failed-send-deletes' ); } } return Status::newGood(); } /** * @param string $localSite The wikiId to add/remove from local_sites_with_dupe * @param string $indexName The name of the index to perform updates to * @param array $otherActions A list of arrays each containing the id within elasticsearch ('docId') and the article namespace ('ns') and DB key ('dbKey') at the within $localSite * @return Status */ public function sendOtherIndexUpdates( $localSite, $indexName, array $otherActions ) { if ( !$this->areIndexesAvailableForWrites( [ $indexName ], true ) ) { return Status::newFatal( 'cirrussearch-indexes-frozen' ); } $client = $this->connection->getClient(); $status = Status::newGood(); foreach ( array_chunk( $otherActions, 30 ) as $updates ) { $bulk = new \Elastica\Bulk( $client ); $titles = []; foreach ( $updates as $update ) { $title = Title::makeTitle( $update['ns'], $update['dbKey'] ); $action = $this->decideRequiredSetAction( $title ); $script = new \Elastica\Script\Script( 'super_detect_noop', [ 'source' => [ 'local_sites_with_dupe' => [ $action => $localSite ], ], 'handlers' => [ 'local_sites_with_dupe' => 'set' ], ], 'native' ); $script->setId( $update['docId'] ); $script->setParam( '_type', 'page' ); $script->setParam( '_index', $indexName ); $bulk->addScript( $script, 'update' ); $titles[] = $title; } // Execute the bulk update $exception = null; try { $this->start( "updating {numBulk} documents in other indexes", [ 'numBulk' => count( $updates ), 'queryType' => 'send_data_other_idx_write', ] ); $bulk->send(); } catch ( \Elastica\Exception\Bulk\ResponseException $e ) { if ( !$this->bulkResponseExceptionIsJustDocumentMissing( $e ) ) { $exception = $e; } } catch ( \Elastica\Exception\ExceptionInterface $e ) { $exception = $e; } if ( $exception === null ) { $this->success(); } else { $this->failure( $exception ); $this->failedLog->warning( "OtherIndex update for articles: " . implode( ',', $titles ), [ 'exception' => $exception ] ); $status->error( 'cirrussearch-failed-update-otherindex' ); } } return $status; } /** * Decide what action is required to the other index to make it up * to data with the current wiki state. This will always check against * the master database. * * @param Title $title The title to decide the action for * @return string The set action to be performed. Either 'add' or 'remove' */ protected function decideRequiredSetAction( Title $title ) { $page = new WikiPage( $title ); $page->loadPageData( 'fromdbmaster' ); if ( $page->exists() ) { return 'add'; } else { return 'remove'; } } /** * Check if $exception is a bulk response exception that just contains * document is missing failures. * * @param ResponseException $exception exception to check * @param callable|null $logCallback Callback in which to do some logging. * Callback will be passed the id of the missing document. * @return bool */ protected function bulkResponseExceptionIsJustDocumentMissing( ResponseException $exception, $logCallback = null ) { $justDocumentMissing = true; foreach ( $exception->getResponseSet()->getBulkResponses() as $bulkResponse ) { if ( !$bulkResponse->hasError() ) { continue; } $error = $bulkResponse->getFullError(); if ( is_string( $error ) ) { // es 1.7 cluster $message = $bulkResponse->getError(); if ( false === strpos( $message, 'DocumentMissingException' ) ) { $justDocumentMissing = false; continue; } } else { // es 2.x cluster if ( $error['type'] !== 'document_missing_exception' ) { $justDocumentMissing = false; continue; } } if ( $logCallback ) { // This is generally not an error but we should // log it to see how many we get $action = $bulkResponse->getAction(); $docId = 'missing'; if ( $action instanceof \Elastica\Bulk\Action\AbstractDocument ) { $docId = $action->getData()->getId(); } call_user_func( $logCallback, $docId ); } } return $justDocumentMissing; } /** * @param string[] $indexes * @return string[] */ public function indexesToIndexNames( array $indexes ) { $names = []; foreach ( $indexes as $indexType ) { $names[] = $this->connection->getIndexName( $this->indexBaseName, $indexType ); } return $names; } }