%PDF- %PDF-
| Direktori : /www/varak.net/wiki.varak.net/extensions/cldr/ |
| Current File : /www/varak.net/wiki.varak.net/extensions/cldr/rebuild.php |
<?php
/**
* Extract data from cldr XML.
*
* @author Niklas Laxström
* @author Ryan Kaldari
* @author Santhosh Thottingal
* @author Sam Reed
* @copyright Copyright © 2007-2015
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
*/
// Standard boilerplate to define $IP
if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
$IP = getenv( 'MW_INSTALL_PATH' );
} else {
$dir = __DIR__;
$IP = "$dir/../..";
}
require_once "$IP/maintenance/Maintenance.php";
class CLDRRebuild extends Maintenance {
public function __construct() {
parent::__construct();
$this->addDescription( 'Extract data from CLDR XML' );
$this->addOption(
'datadir', 'Directory containing CLDR data. Default is core/common/main',
/* required */ false,
/* param */ true
);
$this->addOption(
'outputdir', 'Output directory. Default is current directory',
/* required */ false,
/* param */ true
);
}
public function execute() {
$dir = __DIR__;
require_once "$dir/cldr.php";
$DATA = $this->getOption( 'datadir', "$dir/core/common/main" );
$OUTPUT = $this->getOption( 'outputdir', $dir );
if ( !file_exists( $DATA ) ) {
$this->error( "CLDR data not found at $DATA\n", 1 );
}
// Get an array of all MediaWiki languages ( $wgLanguageNames + $wgExtraLanguageNames )
$languages = Language::fetchLanguageNames();
# hack to get pt-pt too
$languages['pt-pt'] = 'Foo';
ksort( $languages );
foreach ( $languages as $code => $name ) {
// Construct the correct name for the input file
unset( $codeParts );
$codeParts = explode( '-', $code );
if ( count( $codeParts ) > 1 ) {
// ISO 15924 alpha-4 script code
if ( strlen( $codeParts[1] ) == 4 ) {
$codeParts[1] = ucfirst( $codeParts[1] );
}
// ISO 3166-1 alpha-2 country code
if ( strlen( $codeParts[1] ) == 2 ) {
$codeParts[2] = $codeParts[1];
unset( $codeParts[1] );
}
if ( isset( $codeParts[2] ) && strlen( $codeParts[2] ) == 2 ) {
$codeParts[2] = strtoupper( $codeParts[2] );
}
$codeCLDR = implode( '_', $codeParts );
} else {
$codeCLDR = $code;
}
$input = "$DATA/$codeCLDR.xml";
// If the file exists, parse it, otherwise display an error
if ( file_exists( $input ) ) {
$outputFileName = Language::getFileName( "CldrNames", getRealCode( $code ), '.php' );
$p = new CLDRParser();
$p->parse( $input, "$OUTPUT/CldrNames/$outputFileName" );
} else {
$this->output( "File $input not found\n" );
}
}
// Now parse out what we want form the supplemental file
$this->output( "Parsing Supplemental Data...\n" );
// argh! If $DATA defaulted to something slightly more general in the
// CLDR dump, this wouldn't have to be this way.
$input = "$DATA/../supplemental/supplementalData.xml";
if ( file_exists( $input ) ) {
$p = new CLDRParser();
$p->parse_supplemental( $input, "$OUTPUT/CldrSupplemental/Supplemental.php" );
} else {
$this->output( "File $input not found\n" );
}
$this->output( "Done parsing supplemental data.\n" );
$this->output( "Parsing Currency Symbol Data...\n" );
$p = new CLDRParser();
$p->parse_currency_symbols( $DATA, "$OUTPUT/CldrCurrency/Symbols.php" );
$this->output( "Done parsing currency symbols.\n" );
}
}
class CLDRParser {
/**
* @param string $inputFile filename
* @param string $outputFile filename
*/
function parse( $inputFile, $outputFile ) {
// Open the input file for reading
$contents = file_get_contents( $inputFile );
$doc = new SimpleXMLElement( $contents );
$data = array(
'languageNames' => array(),
'currencyNames' => array(),
'currencySymbols' => array(),
'countryNames' => array(),
'timeUnits' => array(),
);
foreach ( $doc->xpath( '//languages/language' ) as $elem ) {
if ( (string)$elem['alt'] !== '' ) {
continue;
}
if ( (string)$elem['type'] === 'root' ) {
continue;
}
$key = str_replace( '_', '-', strtolower( $elem['type'] ) );
$data['languageNames'][$key] = (string)$elem;
}
foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) {
if ( (string)$elem->displayName[0] === '' ) {
continue;
}
$data['currencyNames'][(string)$elem['type']] = (string)$elem->displayName[0];
if ( (string)$elem->symbol[0] !== '' ) {
$data['currencySymbols'][(string)$elem['type']] = (string)$elem->symbol[0];
}
}
foreach ( $doc->xpath( '//territories/territory' ) as $elem ) {
if ( (string)$elem['alt'] !== '' && (string)$elem['alt'] !== 'short' ) {
continue;
}
if ( (string)$elem['type'] === 'ZZ' ||
!preg_match( '/^[A-Z][A-Z]$/', $elem['type'] )
) {
continue;
}
$data['countryNames'][(string)$elem['type']] = (string)$elem;
}
foreach ( $doc->xpath( '//units/unitLength' ) as $unitLength ) {
if ( (string)$unitLength['type'] !== 'long' ) {
continue;
}
foreach ( $unitLength->unit as $elem ) {
$type = (string)$elem['type'];
$pos = strpos( $type, 'duration' );
if ( $pos === false ) {
continue;
}
$type = substr( $type, strlen( 'duration-' ) );
foreach ( $elem->unitPattern as $pattern ) {
$data['timeUnits'][$type . '-' . (string)$pattern['count']] = (string)$pattern;
}
}
}
foreach ( $doc->xpath( '//fields/field' ) as $field ) {
$fieldType = (string)$field['type'];
foreach ( $field->relativeTime as $relative ) {
$type = (string)$relative['type'];
foreach ( $relative->relativeTimePattern as $pattern ) {
$data['timeUnits'][$fieldType . '-' . $type
. '-' . (string)$pattern['count']] = (string)$pattern;
}
}
}
ksort( $data['timeUnits'] );
$this->savephp( $data, $outputFile );
}
/**
* Parse method for the file structure found in common/supplemental/supplementalData.xml
* @param string $inputFile
* @param string $outputFile
*/
function parse_supplemental( $inputFile, $outputFile ) {
// Open the input file for reading
$contents = file_get_contents( $inputFile );
$doc = new SimpleXMLElement( $contents );
$data = array(
'currencyFractions' => array(),
'localeCurrencies' => array(),
);
// Pull currency attributes - digits, rounding, and cashRounding.
// This will tell us how many decmal places make sense to use with any currency,
// or if the currency is totally non-fractional
foreach ( $doc->xpath( '//currencyData/fractions/info' ) as $elem ) {
if ( (string)$elem['iso4217'] === '' ) {
continue;
}
$attributes = array( 'digits', 'rounding', 'cashDigits', 'cashRounding' );
foreach ( $attributes as $att ) {
if ( (string)$elem[$att] !== '' ) {
$data['currencyFractions'][(string)$elem['iso4217']][$att] = (string)$elem[$att];
}
}
}
// Pull a map of regions to currencies in order of perference.
foreach ( $doc->xpath( '//currencyData/region' ) as $elem ) {
if ( (string)$elem['iso3166'] === '' ) {
continue;
}
$region = (string)$elem['iso3166'];
foreach ( $elem->currency as $currencynode ) {
if ( (string)$currencynode['to'] === '' && (string)$currencynode['tender'] !== 'false' ) {
$data['localeCurrencies'][$region][] = (string)$currencynode['iso4217'];
}
}
}
$this->savephp( $data, $outputFile );
}
/**
* Parse method for the currency section in the names files.
* This is separate from the regular parse function, because we need all of
* the currency locale information, even if mediawiki doesn't support the language.
* (For instance: en_AU uses '$' for AUD, not USD, but it's not a supported mediawiki locality)
* @param string $inputDir - the directory, in which we will parse everything.
* @param string $outputFile
*/
function parse_currency_symbols( $inputDir, $outputFile ) {
if ( !file_exists( $inputDir ) ) {
return;
}
$files = scandir( $inputDir );
$data = array(
'currencySymbols' => array(),
);
// Foreach files!
foreach ( $files as $inputFile ) {
if ( strpos( $inputFile, '.xml' ) < 1 ) {
continue;
}
$contents = file_get_contents( $inputDir . '/' . $inputFile );
$doc = new SimpleXMLElement( $contents );
foreach ( $doc->xpath( '//identity' ) as $elem ) {
$language = (string)$elem->language['type'];
if ( $language === '' ) {
continue;
}
$territory = (string)$elem->territory['type'];
if ( $territory === '' ) {
$territory = 'DEFAULT';
}
}
foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) {
if ( (string)$elem->symbol[0] !== '' ) {
$data['currencySymbols'][(string)$elem['type']][$language][$territory] =
(string)$elem->symbol[0];
}
}
}
// now massage the data somewhat. It's pretty blown up at this point.
/**
* Part 1: Stop blowing up on defaults.
* Defaults apparently come in many forms. Listed below in order of scope
* (widest to narrowest)
* 1) The ISO code itself, in the absense of any other defaults
* 2) The 'root' language file definition
* 3) Language with no locality - locality will come in as 'DEFAULT'
*
* Intended behavior:
* From narrowest scope to widest, collapse the defaults
*/
foreach ( $data['currencySymbols'] as $currency => $language ) {
// get the currency default symbol. This will either be defined in the
// 'root' language file, or taken from the ISO code.
$default = $currency;
if ( array_key_exists( 'root', $language ) ) {
$default = $language['root']['DEFAULT'];
}
foreach ( $language as $lang => $territories ) {
// Collapse a language (no locality) array if it's just the default. One value will do fine.
if ( is_array( $territories ) ) {
if ( count( $territories ) === 1 && array_key_exists( 'DEFAULT', $territories ) ) {
$data['currencySymbols'][$currency][$lang] = $territories['DEFAULT'];
if ( $territories['DEFAULT'] == $default && $lang != 'root' ) {
unset( $data['currencySymbols'][$currency][$lang] );
}
} else {
ksort( $data['currencySymbols'][$currency][$lang] );
}
}
}
ksort( $data['currencySymbols'][$currency] );
}
ksort( $data['currencySymbols'] );
$this->savephp( $data, $outputFile );
}
/**
* savephp will build and return a string containing properly formatted php
* output of all the vars we've just parsed out of the xml.
* @param array $data The variable names and values we want defined in the php output
* @param string $location File location to write
*/
function savephp( $data, $location ) {
$hasData = false;
foreach ( $data as $v ) {
if ( count( $v ) ) {
$hasData = true;
break;
}
}
if ( !$hasData ) {
return;
}
// Yes, I am aware I could have simply used var_export.
// ...the spacing was ugly.
$output = "<?php\n";
foreach ( $data as $varname => $values ) {
if ( !count( $values ) ) {
// Don't output empty arrays
continue;
}
$output .= "\n\$$varname = array(\n";
if ( $this->isAssoc( $values ) ) {
foreach ( $values as $key => $value ) {
if ( is_array( $value ) ) {
$output .= $this->makePrettyArrayOuts( $key, $value, 1 );
} else {
$key = addcslashes( $key, "'" );
$value = addcslashes( $value, "'" );
if ( !is_numeric( $key ) ) {
$key = "'$key'";
}
$output .= "\t$key => '$value',\n";
}
}
} else {
foreach ( $values as $value ) {
if ( is_array( $value ) ) {
$output .= $this->makePrettyArrayOuts( null, $value, 1 );
} else {
$value = addcslashes( $value, "'" );
$output .= "\t'$value',\n";
}
}
}
$output .= ");\n";
}
file_put_contents( $location, $output );
}
/**
* It makes pretty array vals. Dur.
* @param string|null $key Use null to omit outputting the key
* @param array $value
* @param int $level
* @return string
*/
function makePrettyArrayOuts( $key, $value, $level = 1 ) {
$subKeys = '';
$isAssoc = $this->isAssoc( $value );
$tabs = str_repeat( "\t", $level );
foreach ( $value as $subkey => $subvalue ) {
$subkey = $isAssoc ? $subkey : null;
if ( is_array( $subvalue ) ) {
$subKeys .= $this->makePrettyArrayOuts( $subkey, $subvalue, $level + 1 );
} else {
$subkey = $isAssoc ? $this->formatKey( $subkey ) : '';
$subvalue = addcslashes( $subvalue, "'" );
$subKeys .= "$tabs\t$subkey'$subvalue',\n";
}
}
if ( $subKeys === '' ) {
return '';
}
$key = $key !== null ? $this->formatKey( $key ) : '';
$ret = "$tabs$key" . "array(\n$subKeys$tabs),\n";
return $ret;
}
/**
* It makes pretty array keys. Dur.
* @param string $key
* @return string
*/
protected function formatKey( $key ) {
$key = addcslashes( $key, "'" );
if ( !is_numeric( $key ) ) {
$key = "'$key'";
}
$key = "$key => ";
return $key;
}
/**
* Checks if array is associative or sequential.
*
* @param array $arr
* @return bool
*/
protected function isAssoc( array $arr ) {
return array_keys( $arr ) !== range( 0, count( $arr ) - 1 );
}
}
/**
* Get the code for the MediaWiki localisation,
* these are same as the fallback.
*
* @param $code string
* @return string
*/
function getRealCode( $code ) {
$realCode = $code;
if ( !strcmp( $code, 'kk' ) )
$realCode = 'kk-cyrl';
elseif ( !strcmp( $code, 'ku' ) )
$realCode = 'ku-arab';
elseif ( !strcmp( $code, 'sr' ) )
$realCode = 'sr-ec';
elseif ( !strcmp( $code, 'tg' ) )
$realCode = 'tg-cyrl';
elseif ( !strcmp( $code, 'zh' ) )
$realCode = 'zh-hans';
elseif ( !strcmp( $code, 'pt' ) )
$realCode = 'pt-br';
elseif ( !strcmp( $code, 'pt-pt' ) )
$realCode = 'pt';
elseif ( !strcmp( $code, 'az-arab' ) )
$realCode = 'azb';
return $realCode;
}
$maintClass = 'CLDRRebuild';
require_once RUN_MAINTENANCE_IF_MAIN;