%PDF- %PDF-
| Direktori : /www/varak.net/wiki.varak.net/extensions/Translate/utils/ |
| Current File : /www/varak.net/wiki.varak.net/extensions/Translate/utils/MessageGroupStats.php |
<?php
/**
* This file aims to provide efficient mechanism for fetching translation completion stats.
*
* @file
* @author Wikia (trac.wikia-code.com/browser/wikia/trunk/extensions/wikia/TranslationStatistics)
* @author Niklas Laxström
* @copyright Copyright © 2012-2013 Niklas Laxström
* @license GPL-2.0-or-later
*/
use MediaWiki\MediaWikiServices;
use Wikimedia\Rdbms\IDatabase;
/**
* This class abstract MessageGroup statistics calculation and storing.
* You can access stats easily per language or per group.
* Stat array for each item is of format array( total, translate, fuzzy ).
*
* @ingroup Stats MessageGroups
*/
class MessageGroupStats {
/// Name of the database table
const TABLE = 'translate_groupstats';
const TOTAL = 0; ///< Array index
const TRANSLATED = 1; ///< Array index
const FUZZY = 2; ///< Array index
const PROOFREAD = 3; ///< Array index
/**
* @var float|null
*/
protected static $timeStart = null;
/**
* @var float|null
*/
protected static $limit = null;
/**
* @var array[]
*/
protected static $updates = [];
/**
* Set the maximum time statistics are calculated.
* If the time limit is exceeded, the missing
* entries will be null.
* @param float $limit time in seconds
*/
public static function setTimeLimit( $limit ) {
self::$timeStart = microtime( true );
self::$limit = $limit;
}
/**
* Returns empty stats array. Useful because the number of elements
* may change.
* @return int[]
* @since 2012-09-21
*/
public static function getEmptyStats() {
return [ 0, 0, 0, 0 ];
}
/**
* Returns empty stats array that indicates stats are incomplete or
* unknown.
* @return null[]
* @since 2013-01-02
*/
protected static function getUnknownStats() {
return [ null, null, null, null ];
}
/**
* Returns stats for given group in given language.
* @param string $id Group id
* @param string $code Language code
* @return null[]|int[]
*/
public static function forItem( $id, $code ) {
$res = self::selectRowsIdLang( [ $id ], $code );
$stats = self::extractResults( $res, [ $id ] );
/* In case some code calls this for dynamic groups, return the default
* values for unknown/incomplete stats. Calculating these numbers don't
* make sense for dynamic groups, and would just throw an exception. */
$group = MessageGroups::getGroup( $id );
if ( MessageGroups::isDynamic( $group ) ) {
$stats[$id][$code] = self::getUnknownStats();
}
if ( !isset( $stats[$id][$code] ) ) {
$stats[$id][$code] = self::forItemInternal( $stats, $group, $code );
}
self::queueUpdates();
return $stats[$id][$code];
}
/**
* Returns stats for all groups in given language.
* @param string $code Language code
* @return array
*/
public static function forLanguage( $code ) {
$stats = self::forLanguageInternal( $code );
$flattened = [];
foreach ( $stats as $group => $languages ) {
$flattened[$group] = $languages[$code];
}
self::queueUpdates();
return $flattened;
}
/**
* Returns stats for all languages in given group.
* @param string $id Group id
* @return array
*/
public static function forGroup( $id ) {
$group = MessageGroups::getGroup( $id );
if ( $group === null ) {
return [];
}
$stats = self::forGroupInternal( $group );
self::queueUpdates();
return $stats[$id];
}
/**
* Returns stats for all group in all languages.
* Might be slow, might use lots of memory.
* Returns two dimensional array indexed by group and language.
* @return array
*/
public static function forEverything() {
$groups = MessageGroups::singleton()->getGroups();
$stats = [];
foreach ( $groups as $g ) {
$stats = self::forGroupInternal( $g, $stats );
}
self::queueUpdates();
return $stats;
}
/**
* Clears the cache for all groups associated with the message.
*
* Hook: TranslateEventTranslationReview
* @param MessageHandle $handle
*/
public static function clear( MessageHandle $handle ) {
$code = $handle->getCode();
$dbids = array_map( 'self::getDatabaseIdForGroupId', $handle->getGroupIds() );
$dbw = wfGetDB( DB_MASTER );
$conds = [ 'tgs_group' => $dbids, 'tgs_lang' => $code ];
$dbw->delete( self::TABLE, $conds, __METHOD__ );
wfDebugLog( 'messagegroupstats', 'Cleared ' . serialize( $conds ) );
}
public static function clearGroup( $id ) {
// T206904: Fix countable error for group id
// NOTE: An empty string ('') is not a valid group id
if ( $id === [] ) {
return;
}
$dbids = array_map( 'self::getDatabaseIdForGroupId', (array)$id );
$dbw = wfGetDB( DB_MASTER );
$conds = [ 'tgs_group' => $dbids ];
$dbw->delete( self::TABLE, $conds, __METHOD__ );
wfDebugLog( 'messagegroupstats', 'Cleared ' . serialize( $conds ) );
}
public static function clearLanguage( $code ) {
if ( !count( $code ) ) {
return;
}
$dbw = wfGetDB( DB_MASTER );
$conds = [ 'tgs_lang' => $code ];
$dbw->delete( self::TABLE, $conds, __METHOD__ );
wfDebugLog( 'messagegroupstats', 'Cleared ' . serialize( $conds ) );
}
/**
* Purges all cached stats.
*/
public static function clearAll() {
$dbw = wfGetDB( DB_MASTER );
$dbw->delete( self::TABLE, '*' );
wfDebugLog( 'messagegroupstats', 'Cleared everything :(' );
}
/**
* Use this to extract results returned from selectRowsIdLang. You must pass the
* message group ids you want to retrieve. Entries that do not match are not returned.
*
* @param Traversable $res Database result object
* @param string[] $ids List of message group ids
* @param array[] $stats Optional array to append results to.
* @return array[]
*/
protected static function extractResults( $res, array $ids, array $stats = [] ) {
// Map the internal ids back to real ids
$idmap = array_combine( array_map( 'self::getDatabaseIdForGroupId', $ids ), $ids );
foreach ( $res as $row ) {
if ( !isset( $idmap[$row->tgs_group] ) ) {
// Stale entry, ignore for now
// TODO: Schedule for purge
continue;
}
$realId = $idmap[$row->tgs_group];
$stats[$realId][$row->tgs_lang] = self::extractNumbers( $row );
}
return $stats;
}
public static function update( MessageHandle $handle, array $changes = [] ) {
$dbids = array_map( 'self::getDatabaseIdForGroupId', $handle->getGroupIds() );
$dbw = wfGetDB( DB_MASTER );
$conds = [
'tgs_group' => $dbids,
'tgs_lang' => $handle->getCode(),
];
$values = [];
foreach ( [ 'total', 'translated', 'fuzzy', 'proofread' ] as $type ) {
if ( isset( $changes[$type] ) ) {
$values[] = "tgs_$type=tgs_$type" .
self::stringifyNumber( $changes[$type] );
}
}
$dbw->update( self::TABLE, $values, $conds, __METHOD__ );
}
/**
* Returns an array of needed database fields.
* @param stdClass $row
* @return array
*/
protected static function extractNumbers( $row ) {
return [
self::TOTAL => (int)$row->tgs_total,
self::TRANSLATED => (int)$row->tgs_translated,
self::FUZZY => (int)$row->tgs_fuzzy,
self::PROOFREAD => (int)$row->tgs_proofread,
];
}
/**
* @param string $code Language code
* @param array[] $stats
* @return array[]
*/
protected static function forLanguageInternal( $code, array $stats = [] ) {
$groups = MessageGroups::singleton()->getGroups();
$ids = array_keys( $groups );
$res = self::selectRowsIdLang( null, $code );
$stats = self::extractResults( $res, $ids, $stats );
foreach ( $groups as $id => $group ) {
if ( isset( $stats[$id][$code] ) ) {
continue;
}
$stats[$id][$code] = self::forItemInternal( $stats, $group, $code );
}
return $stats;
}
/**
* @param AggregateMessageGroup $agg
* @return mixed
*/
protected static function expandAggregates( AggregateMessageGroup $agg ) {
$flattened = [];
/** @var MessageGroup|AggregateMessageGroup $group */
foreach ( $agg->getGroups() as $group ) {
if ( $group instanceof AggregateMessageGroup ) {
$flattened += self::expandAggregates( $group );
} else {
$flattened[$group->getId()] = $group;
}
}
return $flattened;
}
/**
* @param MessageGroup $group
* @param array[] $stats
* @return array[]
*/
protected static function forGroupInternal( $group, array $stats = [] ) {
$id = $group->getId();
$res = self::selectRowsIdLang( [ $id ], null );
$stats = self::extractResults( $res, [ $id ], $stats );
# Go over each language filling missing entries
$languages = array_keys( TranslateUtils::getLanguageNames( 'en' ) );
// This is for calculating things in correct order
sort( $languages );
foreach ( $languages as $code ) {
if ( isset( $stats[$id][$code] ) ) {
continue;
}
$stats[$id][$code] = self::forItemInternal( $stats, $group, $code );
}
// This is for sorting the values added later in correct order
foreach ( array_keys( $stats ) as $key ) {
ksort( $stats[$key] );
}
return $stats;
}
/**
* Fetch rows from the database. Use extractResults to process this value.
*
* @param null|string[] $ids List of message group ids
* @param null|string[] $codes List of language codes
* @return Traversable Database result object
*/
protected static function selectRowsIdLang( $ids = null, $codes = null ) {
$conds = [];
if ( $ids !== null ) {
$dbids = array_map( 'self::getDatabaseIdForGroupId', $ids );
$conds['tgs_group'] = $dbids;
}
if ( $codes !== null ) {
$conds['tgs_lang'] = $codes;
}
$dbr = TranslateUtils::getSafeReadDB();
$res = $dbr->select( self::TABLE, '*', $conds, __METHOD__ );
return $res;
}
/**
* @param array[] &$stats
* @param MessageGroup $group
* @param string $code Language code
*
* @return null[]|int[]
*/
protected static function forItemInternal( &$stats, $group, $code ) {
$id = $group->getId();
if ( self::$timeStart !== null && ( microtime( true ) - self::$timeStart ) > self::$limit ) {
$stats[$id][$code] = self::getUnknownStats();
return $stats[$id][$code];
}
if ( $group instanceof AggregateMessageGroup ) {
$aggregates = self::getEmptyStats();
$expanded = self::expandAggregates( $group );
if ( $expanded === [] ) {
return $aggregates;
}
$subGroupIds = array_keys( $expanded );
$res = self::selectRowsIdLang( $subGroupIds, $code );
$stats = self::extractResults( $res, $subGroupIds, $stats );
foreach ( $expanded as $sid => $subgroup ) {
# Discouraged groups may belong to another group, usually if there
# is an aggregate group for all translatable pages. In that case
# calculate and store the statistics, but don't count them as part of
# the aggregate group, so that the numbers in Special:LanguageStats
# add up. The statistics for discouraged groups can still be viewed
# through Special:MessageGroupStats.
if ( !isset( $stats[$sid][$code] ) ) {
$stats[$sid][$code] = self::forItemInternal( $stats, $subgroup, $code );
}
$include = Hooks::run( 'Translate:MessageGroupStats:isIncluded', [ $sid, $code ] );
if ( $include ) {
$aggregates = self::multiAdd( $aggregates, $stats[$sid][$code] );
}
}
$stats[$id][$code] = $aggregates;
} else {
$aggregates = self::calculateGroup( $group, $code );
}
// Don't add nulls to the database, causes annoying warnings
if ( $aggregates[self::TOTAL] === null ) {
return $aggregates;
}
self::$updates[] = [
'tgs_group' => self::getDatabaseIdForGroupId( $id ),
'tgs_lang' => $code,
'tgs_total' => $aggregates[self::TOTAL],
'tgs_translated' => $aggregates[self::TRANSLATED],
'tgs_fuzzy' => $aggregates[self::FUZZY],
'tgs_proofread' => $aggregates[self::PROOFREAD],
];
return $aggregates;
}
public static function multiAdd( &$a, $b ) {
if ( $a[0] === null || $b[0] === null ) {
return array_fill( 0, count( $a ), null );
}
foreach ( $a as $i => &$v ) {
$v += $b[$i];
}
return $a;
}
/**
* @param MessageGroup $group
* @param string $code Language code
* @return int[] ( total, translated, fuzzy, proofread )
*/
protected static function calculateGroup( $group, $code ) {
global $wgTranslateDocumentationLanguageCode;
// Calculate if missing and store in the db
$collection = $group->initCollection( $code );
if ( $code === $wgTranslateDocumentationLanguageCode ) {
$ffs = $group->getFFS();
if ( $ffs instanceof GettextFFS ) {
$template = $ffs->read( 'en' );
$infile = [];
foreach ( $template['TEMPLATE'] as $key => $data ) {
if ( isset( $data['comments']['.'] ) ) {
$infile[$key] = '1';
}
}
$collection->setInFile( $infile );
}
}
$collection->filter( 'ignored' );
$collection->filter( 'optional' );
// Store the count of real messages for later calculation.
$total = count( $collection );
// Count fuzzy first.
$collection->filter( 'fuzzy' );
$fuzzy = $total - count( $collection );
// Count the completed translations.
$collection->filter( 'hastranslation', false );
$translated = count( $collection );
// Count how many of the completed translations
// have been proofread
$collection->filter( 'reviewer', false );
$proofread = count( $collection );
return [
self::TOTAL => $total,
self::TRANSLATED => $translated,
self::FUZZY => $fuzzy,
self::PROOFREAD => $proofread,
];
}
/**
* Converts input to "+2" "-4" type of string.
* @param int $number
* @return string
*/
protected static function stringifyNumber( $number ) {
$number = (int)$number;
return $number < 0 ? "$number" : "+$number";
}
protected static function queueUpdates() {
if ( wfReadOnly() ) {
return;
}
if ( !count( self::$updates ) ) {
return;
}
$lb = MediaWikiServices::getInstance()->getDBLoadBalancer();
$dbw = $lb->getLazyConnectionRef( DB_MASTER ); // avoid connecting yet
$table = self::TABLE;
$updates = &self::$updates;
self::queueWithLock(
$dbw,
'updates',
__METHOD__,
function ( IDatabase $dbw, $method ) use( $table, &$updates ) {
$dbw->insert(
$table,
$updates,
$method,
[ 'IGNORE' ]
);
$updates = [];
}
);
}
protected static function queueWithLock( IDatabase $dbw, $key, $method, $callback ) {
$fname = __METHOD__;
DeferredUpdates::addCallableUpdate( function () use ( $dbw, $key, $method, $callback, $fname ) {
$lockName = 'MessageGroupStats:' . $key;
if ( !$dbw->lock( $lockName, $fname, 1 ) ) {
return; // raced out
}
$dbw->commit( $fname, 'flush' );
call_user_func( $callback, $dbw, $method );
$dbw->commit( $fname, 'flush' );
$dbw->unlock( $lockName, $fname );
} );
}
public static function getDatabaseIdForGroupId( $id ) {
// The column is 100 bytes long, but we don't need to use it all
if ( strlen( $id ) <= 72 ) {
return $id;
}
$hash = hash( 'sha256', $id, /*asHex*/false );
$dbid = substr( $id, 0, 50 ) . '||' . substr( $hash, 0, 20 );
return $dbid;
}
}