%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /proc/self/root/www/varak.net/wiki.varak.net/extensions/Translate/ttmserver/
Upload File :
Create Path :
Current File : //proc/self/root/www/varak.net/wiki.varak.net/extensions/Translate/ttmserver/DatabaseTTMServer.php

<?php
/**
 * TTMServer - The Translate extension translation memory interface
 *
 * @file
 * @author Niklas Laxström
 * @copyright Copyright © 2012-2013, Niklas Laxström
 * @license GPL-2.0-or-later
 * @ingroup TTMServer
 */

use Wikimedia\Rdbms\DBQueryError;

/**
 * Mysql based backend.
 * @ingroup TTMServer
 * @since 2012-06-27
 */
class DatabaseTTMServer extends TTMServer implements WritableTTMServer, ReadableTTMServer {
	protected $sids;

	/**
	 * @param int $mode DB_REPLICA|DB_MASTER
	 * @return \Wikimedia\Rdbms\IDatabase
	 */
	protected function getDB( $mode = DB_REPLICA ) {
		return wfGetDB( $mode, 'ttmserver', $this->config['database'] );
	}

	public function update( MessageHandle $handle, $targetText ) {
		if ( !$handle->isValid() || $handle->getCode() === '' ) {
			return false;
		}

		$mkey = $handle->getKey();
		$group = $handle->getGroup();
		$targetLanguage = $handle->getCode();
		$sourceLanguage = $group->getSourceLanguage();

		// Skip definitions to not slow down mass imports etc.
		// These will be added when the first translation is made
		if ( $targetLanguage === $sourceLanguage ) {
			return false;
		}

		$definition = $group->getMessage( $mkey, $sourceLanguage );
		if ( !is_string( $definition ) || !strlen( trim( $definition ) ) ) {
			return false;
		}

		$context = Title::makeTitle( $handle->getTitle()->getNamespace(), $mkey );
		$dbw = $this->getDB( DB_MASTER );
		/* Check that the definition exists and fetch the sid. If not, add
		 * the definition and retrieve the sid. If the definition changes,
		 * we will create a new entry - otherwise we could at some point
		 * get suggestions which do not match the original definition any
		 * longer. The old translations are still kept until purged by
		 * rerunning the bootstrap script. */
		$conds = [
			'tms_context' => $context->getPrefixedText(),
			'tms_text' => $definition,
		];

		$sid = $dbw->selectField( 'translate_tms', 'tms_sid', $conds, __METHOD__ );
		if ( $sid === false ) {
			$sid = $this->insertSource( $context, $sourceLanguage, $definition );
		}

		// Delete old translations for this message if any. Could also use replace
		$deleteConds = [
			'tmt_sid' => $sid,
			'tmt_lang' => $targetLanguage,
		];
		$dbw->delete( 'translate_tmt', $deleteConds, __METHOD__ );

		// Insert the new translation
		if ( $targetText !== null ) {
			$row = $deleteConds + [
				'tmt_text' => $targetText,
			];

			$dbw->insert( 'translate_tmt', $row, __METHOD__ );
		}

		return true;
	}

	protected function insertSource( Title $context, $sourceLanguage, $text ) {
		$row = [
			'tms_lang' => $sourceLanguage,
			'tms_len' => mb_strlen( $text ),
			'tms_text' => $text,
			'tms_context' => $context->getPrefixedText(),
		];

		$dbw = $this->getDB( DB_MASTER );
		$dbw->insert( 'translate_tms', $row, __METHOD__ );
		$sid = $dbw->insertId();

		$fulltext = $this->filterForFulltext( $sourceLanguage, $text );
		if ( count( $fulltext ) ) {
			$row = [
				'tmf_sid' => $sid,
				'tmf_text' => implode( ' ', $fulltext ),
			];
			$dbw->insert( 'translate_tmf', $row, __METHOD__ );
		}

		return $sid;
	}

	/**
	 * Tokenizes the text for fulltext search.
	 * Tries to find the most useful tokens.
	 *
	 * @param string $language Language code
	 * @param string $input
	 * @return array
	 */
	protected function filterForFulltext( $language, $input ) {
		$lang = Language::factory( $language );

		$text = preg_replace( '/[^[:alnum:]]/u', ' ', $input );
		$text = $lang->segmentByWord( $text );
		$text = $lang->lc( $text );
		$segments = preg_split( '/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY );
		if ( count( $segments ) < 4 ) {
			return [];
		}

		foreach ( $segments as $i => $segment ) {
			// Yes strlen
			$len = strlen( $segment );
			if ( $len < 4 || $len > 15 ) {
				unset( $segments[$i] );
			}
		}

		$segments = array_unique( $segments );
		$segments = array_slice( $segments, 0, 10 );

		return $segments;
	}

	public function beginBootstrap() {
		$dbw = $this->getDB( DB_MASTER );
		$dbw->delete( 'translate_tms', '*', __METHOD__ );
		$dbw->delete( 'translate_tmt', '*', __METHOD__ );
		$dbw->delete( 'translate_tmf', '*', __METHOD__ );
		$table = $dbw->tableName( 'translate_tmf' );
		try {
			$dbw->query( "DROP INDEX tmf_text ON $table" );
		} catch ( DBQueryError $e ) {
			// Perhaps the script was aborted before it got
			// chance to add the index back.
		}
	}

	public function beginBatch() {
		$this->sids = [];
	}

	public function batchInsertDefinitions( array $batch ) {
		foreach ( $batch as $key => $item ) {
			list( $title, $language, $text ) = $item;
			$handle = new MessageHandle( $title );
			$context = Title::makeTitle( $handle->getTitle()->getNamespace(), $handle->getKey() );
			$this->sids[$key] = $this->insertSource( $context, $language, $text );
		}
		wfWaitForSlaves( 10 );
	}

	public function batchInsertTranslations( array $batch ) {
		$rows = [];
		foreach ( $batch as $key => $data ) {
			list( , $language, $text ) = $data;
			$rows[] = [
				'tmt_sid' => $this->sids[$key],
				'tmt_lang' => $language,
				'tmt_text' => $text,
			];
		}

		$dbw = $this->getDB( DB_MASTER );
		$dbw->insert( 'translate_tmt', $rows, __METHOD__ );
		wfWaitForSlaves( 10 );
	}

	public function endBatch() {
	}

	public function endBootstrap() {
		$dbw = $this->getDB( DB_MASTER );
		$table = $dbw->tableName( 'translate_tmf' );
		$dbw->query( "CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)" );
	}

	/* Reading interface */

	public function isLocalSuggestion( array $suggestion ) {
		return true;
	}

	public function expandLocation( array $suggestion ) {
		$title = Title::newFromText( $suggestion['location'] );

		return $title->getCanonicalURL();
	}

	public function query( $sourceLanguage, $targetLanguage, $text ) {
		// Calculate the bounds of the string length which are able
		// to satisfy the cutoff percentage in edit distance.
		$len = mb_strlen( $text );
		$min = ceil( max( $len * $this->config['cutoff'], 2 ) );
		$max = floor( $len / $this->config['cutoff'] );

		// We could use fulltext index to narrow the results further
		$dbr = $this->getDB( DB_REPLICA );
		$tables = [ 'translate_tmt', 'translate_tms' ];
		$fields = [ 'tms_context', 'tms_text', 'tmt_lang', 'tmt_text' ];

		$conds = [
			'tms_lang' => $sourceLanguage,
			'tmt_lang' => $targetLanguage,
			"tms_len BETWEEN $min AND $max",
			'tms_sid = tmt_sid',
		];

		$fulltext = $this->filterForFulltext( $sourceLanguage, $text );
		if ( $fulltext ) {
			$tables[] = 'translate_tmf';
			$list = implode( ' ', $fulltext );
			$conds[] = 'tmf_sid = tmt_sid';
			$conds[] = "MATCH(tmf_text) AGAINST( '$list' )";
		}

		$res = $dbr->select( $tables, $fields, $conds, __METHOD__ );

		return $this->processQueryResults( $res, $text, $targetLanguage );
	}

	protected function processQueryResults( $res, $text, $targetLanguage ) {
		$timeLimit = microtime( true ) + 5;

		$lenA = mb_strlen( $text );
		$results = [];
		foreach ( $res as $row ) {
			if ( microtime( true ) > $timeLimit ) {
				// Having no suggestions is better than preventing translation
				// altogether by timing out the request :(
				break;
			}

			$a = $text;
			$b = $row->tms_text;
			$lenB = mb_strlen( $b );
			$len = min( $lenA, $lenB );
			if ( $len > 600 ) {
				// two strings of length 1500 ~ 10s
				// two strings of length 2250 ~ 30s
				$dist = $len;
			} else {
				$dist = self::levenshtein( $a, $b, $lenA, $lenB );
			}
			$quality = 1 - ( $dist * 0.9 / $len );

			if ( $quality >= $this->config['cutoff'] ) {
				$results[] = [
					'source' => $row->tms_text,
					'target' => $row->tmt_text,
					'context' => $row->tms_context,
					'location' => $row->tms_context . '/' . $targetLanguage,
					'quality' => $quality,
					'wiki' => isset( $row->tms_wiki ) ? $row->tms_wiki : wfWikiID(),
				];
			}
		}
		$results = TTMServer::sortSuggestions( $results );

		return $results;
	}
}

Zerion Mini Shell 1.0