%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /proc/985914/root/www/varak.net/wiki.varak.net/extensions/Translate/ttmserver/
Upload File :
Create Path :
Current File : //proc/985914/root/www/varak.net/wiki.varak.net/extensions/Translate/ttmserver/SolrTTMServer.php

<?php
/**
 * TTMServer - The Translate extension translation memory interface
 *
 * @file
 * @author Niklas Laxström
 * @copyright Copyright © 2012-2013, Niklas Laxström
 * @license GPL-2.0-or-later
 * @ingroup TTMServer
 */

/**
 * TTMServer backed based on Solr instance. Depends on Solarium.
 * @since 2012-06-27
 * @ingroup TTMServer
 * @deprecated 1.27. Will be removed in 1.29.
 */
class SolrTTMServer
	extends TTMServer
	implements ReadableTTMServer, SearchableTTMServer, WritableTTMServer
{
	/**
	 * In case auto-commit is not enabled, or even if it is, tell solr to
	 * commit before this time has passed, in milliseconds.
	 */
	const COMMIT_WITHIN = 5000;

	protected $client;

	/**
	 * Reference to the maintenance script to relay logging output.
	 */
	protected $logger;

	public function __construct( $config ) {
		wfDeprecated( __METHOD__, '1.24' );

		parent::__construct( $config );

		if ( isset( $config['config'] ) ) {
			$this->client = new Solarium_Client( $config['config'] );
		} else {
			$this->client = new Solarium_Client();
		}
	}

	public function isLocalSuggestion( array $suggestion ) {
		return $suggestion['wiki'] === wfWikiID();
	}

	public function expandLocation( array $suggestion ) {
		return $suggestion['uri'];
	}

	public function query( $sourceLanguage, $targetLanguage, $text ) {
		try {
			return $this->doQuery( $sourceLanguage, $targetLanguage, $text );
		} catch ( Solarium_Exception $e ) {
			throw new TranslationHelperException( 'Solarium exception: ' . $e );
		}
	}

	/// @see ReadableTTMServer::query
	protected function doQuery( $sourceLanguage, $targetLanguage, $text ) {
		/* Two query system:
		 * 1) Find all strings in source language that match text
		 * 2) Do another query for translations for those strings
		 */
		// For now impose a length limit on query string to avoid doing
		// very slow queries. Magic number.
		if ( strlen( $text ) > 789 ) {
			return [];
		}

		$query = $this->client->createSelect();
		$query->setFields( [ 'globalid', 'content', 'score' ] );

		/* The interface usually displays three best candidates. These might
		 * come from more than three matches, if the translation is the same.
		 * This might not find all suggestions, if the top N best matching
		 * source texts don't have translations, but worse matches do. We
		 * could loop with start parameter to fetch more until we have enough
		 * suggestions or the quality drops below the cutoff point. */
		$query->setRows( 25 );

		/* Our string can contain all kind of nasty characters, so we need
		 * escape them with great pain. */
		$helper = $query->getHelper();
		$dist = $helper->escapePhrase( $text );
		// "edit" could also be ngram of other algorithm
		$dist = "strdist($dist,content,edit)";
		/* Note how we need to escape twice here, first the string for strdist
		 * and then the strdist call itself for the query. And of course every-
		 * thing will be URL encoded once sent over the line. */
		$query->setQuery( '_val_:%P1%', [ $dist ] );

		/* Filter queries are supposed to be efficient as they are separately
		 * cached, but I haven't done any benchmarks. */
		$query->createFilterQuery( 'lang' )
			->setQuery( 'language:%P1%', [ $sourceLanguage ] );

		$resultset = $this->client->select( $query );

		/* This query is doing two unrelated things:
		 * 1) Collect the message contents and scores so that they can
		 *    be accessed later for the translations we found.
		 * 2) Build the query string for the query that fetches the
		 *    translations.
		 * This code is a bit uglier than I'd like it to be, since there
		 * there is no field that globally identifies a message (message
		 * definition and translations). */
		$contents = $scores = [];
		$queryString = '';
		foreach ( $resultset as $doc ) {
			$sourceId = preg_replace( '~/[^/]+$~', '', $doc->globalid );
			$contents[$sourceId] = $doc->content;
			$scores[$sourceId] = $doc->score;

			$globalid = $helper->escapePhrase( "$sourceId/$targetLanguage" );
			$queryString .= "globalid:$globalid ";
		}

		// Second query to fetch available translations
		$fetchQuery = $this->client->createSelect();
		$fetchQuery->setFields( [ 'wiki', 'uri', 'content', 'messageid', 'globalid' ] );
		// This come in random order, so have to fetch all and sort
		$fetchQuery->setRows( 25 );
		$fetchQuery->setQuery( $queryString );
		// With AND we would not find anything, obviously.
		$fetchQuery->setQueryDefaultOperator( Solarium_Query_Select::QUERY_OPERATOR_OR );

		$translations = $this->client->select( $fetchQuery );

		$suggestions = [];
		foreach ( $translations as $doc ) {
			/* Construct the matching source id */
			$sourceId = preg_replace( '~/[^/]+$~', '', $doc->globalid );

			/* Unfortunately we cannot do this on the search server,
			 * because score is not a real field and thus cannot be
			 * used in a filter query. */
			$quality = $scores[$sourceId];
			if ( $quality < $this->config['cutoff'] ) {
				continue;
			}

			$suggestions[] = [
				'source' => $contents[$sourceId],
				'target' => $doc->content,
				'context' => $doc->messageid,
				'quality' => $quality,
				'wiki' => $doc->wiki,
				'location' => $doc->messageid . '/' . $targetLanguage,
				'uri' => $doc->uri,
			];
		}

		/* Like mentioned above, we get results in random order. Sort them
		 * now to have best matches first as expected by callers. */
		uasort( $suggestions, function ( $a, $b ) {
			if ( $a['quality'] === $b['quality'] ) {
				return 0;
			}

			return ( $a['quality'] < $b['quality'] ) ? 1 : -1;
		} );

		return $suggestions;
	}

	/* Write functions */

	public function update( MessageHandle $handle, $targetText ) {
		if ( $handle->getCode() === '' ) {
			return false;
		}

		/* There are various different cases here:
		 * [new or updated] [fuzzy|non-fuzzy] [translation|definition]
		 * 1) We don't distinguish between new or updated here.
		 * 2) Delete old translation, but not definition
		 * 3) Insert new translation or definition, if non-fuzzy
		 * The definition should never be fuzzied anyway.
		 *
		 * These only apply to known messages.
		 */

		$update = $this->client->createUpdate();
		$title = $handle->getTitle();

		$doDelete = true;
		$sourceLanguage = '';
		if ( $handle->isValid() ) {
			$sourceLanguage = $handle->getGroup()->getSourceLanguage();
			if ( $handle->getCode() === $sourceLanguage ) {
				$doDelete = false;
			}
		}

		if ( $doDelete ) {
			$base = Title::makeTitle( $title->getNamespace(), $handle->getKey() );
			$conds = [
				'wiki' => wfWikiID(),
				'language' => $handle->getCode(),
				'messageid' => $base->getPrefixedText(),
			];
			foreach ( $conds as $key => &$value ) {
				$value = "$key:" . $update->getHelper()->escapePhrase( $value );
			}
			$update->addDeleteQuery( implode( ' AND ', $conds ) );
		}

		if ( $targetText !== null ) {
			if ( $handle->isValid() ) {
				// Of the message definition page
				$targetTitle = $handle->getTitle();
				$sourceTitle = Title::makeTitle(
					$targetTitle->getNamespace(),
					$handle->getKey() . '/' . $sourceLanguage
				);
				$revId = (int)$sourceTitle->getLatestRevID();
				/* Note: in some cases the source page might not exist, in this case
				 * we use 0 as message version identifier, to differentiate them from
				 * orphan messages */
			} else {
				$revId = 'orphan';
			}

			$doc = $this->createDocument( $handle, $targetText, $revId );
			// Add document and commit within X seconds.
			$update->addDocument( $doc, null, self::COMMIT_WITHIN );
		}

		try {
			$this->client->update( $update );
		} catch ( Solarium_Exception $e ) {
			error_log( 'SolrTTMServer update-write failed' );

			return false;
		}

		return true;
	}

	/**
	 * @see schema.xml
	 * @param MessageHandle $handle
	 * @param string $text
	 * @param int $revId
	 * @return Solarium_Document_ReadWrite
	 */
	protected function createDocument( MessageHandle $handle, $text, $revId ) {
		$language = $handle->getCode();
		$translationTitle = $handle->getTitle();

		$title = Title::makeTitle( $handle->getTitle()->getNamespace(), $handle->getKey() );
		$wiki = wfWikiID();
		$messageid = $title->getPrefixedText();
		$globalid = "$wiki-$messageid-$revId/$language";

		$doc = new Solarium_Document_ReadWrite();
		$doc->wiki = $wiki;
		$doc->uri = $translationTitle->getCanonicalURL();
		$doc->messageid = $messageid;
		$doc->globalid = $globalid;

		$doc->language = $language;
		$doc->content = $text;
		$doc->setField( 'group', $handle->getGroupIds() );

		return $doc;
	}

	public function beginBootstrap() {
		$update = $this->client->createUpdate();
		$query = 'wiki:' . $update->getHelper()->escapePhrase( wfWikiID() );
		$update->addDeleteQuery( $query );
		$update->addCommit();
		$this->client->update( $update );
	}

	public function beginBatch() {
		// I hate the rule that forbids {}
	}

	public function batchInsertDefinitions( array $batch ) {
		$lb = new LinkBatch();
		foreach ( $batch as $data ) {
			$lb->addObj( $data[0]->getTitle() );
		}
		$lb->execute();

		$this->batchInsertTranslations( $batch );
	}

	public function batchInsertTranslations( array $batch ) {
		$update = $this->client->createUpdate();
		foreach ( $batch as $key => $data ) {
			list( $handle, $sourceLanguage, $text ) = $data;
			$revId = $handle->getTitleForLanguage( $sourceLanguage )->getLatestRevID();
			$doc = $this->createDocument( $handle, $text, $revId );
			// Add document and commit within X seconds.
			$update->addDocument( $doc, null, self::COMMIT_WITHIN );
		}

		$retries = 5;

		while ( $retries-- > 0 ) {
			try {
				$this->client->update( $update );
				break;
			} catch ( Solarium_Client_HttpException $e ) {
				if ( $retries === 0 ) {
					throw $e;
				} else {
					$c = get_class( $e );
					$msg = $e->getMessage();
					$this->logOutput( "Batch failed ($c: $msg), trying again in 10 seconds" );
					sleep( 10 );
				}
			}
		}
	}

	public function endBatch() {
		$update = $this->client->createUpdate();
		$this->client->update( $update );
	}

	public function endBootstrap() {
		$update = $this->client->createUpdate();
		$update->addCommit();
		$update->addOptimize();
		$this->client->update( $update );
	}

	public function getSolarium() {
		return $this->client;
	}

	public function setLogger( $logger ) {
		$this->logger = $logger;
	}

	// Can it get any uglier?
	protected function logOutput( $text ) {
		if ( $this->logger ) {
			$this->logger->statusLine( "$text\n" );
		}
	}

	/**
	 * Search interface
	 * @param string $queryString
	 * @param array $opts
	 * @param array $highlight
	 * @return array
	 */
	public function search( $queryString, $opts, $highlight ) {
		$client = $this->getSolarium();

		$query = $client->createSelect();
		$dismax = $query->getDisMax();
		$dismax->setQueryParser( 'edismax' );
		$query->setQuery( $queryString );
		$query->setRows( $opts['limit'] );
		$query->setStart( $opts['offset'] );

		list( $pre, $post ) = $highlight;
		$hl = $query->getHighlighting();
		$hl->setFields( 'text' );
		$hl->setSimplePrefix( $pre );
		$hl->setSimplePostfix( $post );
		$hl->setMaxAnalyzedChars( '5000' );
		$hl->setFragSize( '5000' );
		$hl->setSnippets( 1 );

		$languageFilter = $opts['language'];
		if ( $languageFilter !== '' ) {
			$query->createFilterQuery( 'languageFilter' )
				->setQuery( 'language:%P1%', [ $languageFilter ] )
				->addTag( 'filter' );
		}

		$groupFilter = $opts['group'];
		if ( $groupFilter !== '' ) {
			$query->createFilterQuery( 'groupFilter' )
				->setQuery( 'group:%P1%', [ $groupFilter ] )
				->addTag( 'filter' );
		}

		$facetSet = $query->getFacetSet();

		$language = $facetSet->createFacetField( 'language' );
		$language->setField( 'language' );
		$language->setMinCount( 1 );
		$language->addExclude( 'filter' );

		$group = $facetSet->createFacetField( 'group' );
		$group->setField( 'group' );
		$group->setMinCount( 1 );
		$group->setMissing( true );
		$group->addExclude( 'filter' );

		try {
			return $client->select( $query );
		} catch ( Solarium_Client_HttpException $e ) {
			throw new TTMServer( $e->getMessage() );
		}
	}

	public function getFacets( $resultset ) {
		return [
			'language' => iterator_to_array( $resultset->getFacetSet()->getFacet( 'language' ) ),
			'group' => iterator_to_array( $resultset->getFacetSet()->getFacet( 'group' ) ),
		];
	}

	public function getTotalHits( $resultset ) {
		return $resultset->getNumFound();
	}

	public function getDocuments( $resultset ) {
		$highlighting = $resultset->getHighlighting();
		$ret = [];
		foreach ( $resultset as $document ) {
			$fields = iterator_to_array( $document );
			// Compatibility mapping
			$fields['localid'] = $fields['messageid'];

			$hdoc = $highlighting->getResult( $document->globalid );
			$text = $hdoc->getField( 'text' );
			if ( $text === [] ) {
				$text = $document->text;
			} else {
				$text = $text[0];
			}

			$fields['content'] = $text;
			$ret[] = $fields;
		}

		return $ret;
	}
}

Zerion Mini Shell 1.0