%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /www/varak.net/wiki.varak.net/extensions/cldr/
Upload File :
Create Path :
Current File : /www/varak.net/wiki.varak.net/extensions/cldr/rebuild.php

<?php

/**
 * Extract data from cldr XML.
 *
 * @author Niklas Laxström
 * @author Ryan Kaldari
 * @author Santhosh Thottingal
 * @author Sam Reed
 * @copyright Copyright © 2007-2015
 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
 */

// Standard boilerplate to define $IP
if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
	$IP = getenv( 'MW_INSTALL_PATH' );
} else {
	$dir = __DIR__;
	$IP = "$dir/../..";
}
require_once "$IP/maintenance/Maintenance.php";

class CLDRRebuild extends Maintenance {

	public function __construct() {
		parent::__construct();
		$this->addDescription( 'Extract data from CLDR XML' );
		$this->addOption(
			'datadir', 'Directory containing CLDR data. Default is core/common/main',
			/* required */ false,
			/* param */ true
		);
		$this->addOption(
			'outputdir', 'Output directory. Default is current directory',
			/* required */ false,
			/* param */ true
		);
	}

	public function execute() {
		$dir = __DIR__;
		require_once "$dir/cldr.php";

		$DATA = $this->getOption( 'datadir', "$dir/core/common/main" );
		$OUTPUT = $this->getOption( 'outputdir', $dir );

		if ( !file_exists( $DATA ) ) {
			$this->error( "CLDR data not found at $DATA\n", 1 );
		}

		// Get an array of all MediaWiki languages ( $wgLanguageNames + $wgExtraLanguageNames )
		$languages = Language::fetchLanguageNames();
		# hack to get pt-pt too
		$languages['pt-pt'] = 'Foo';
		ksort( $languages );

		foreach ( $languages as $code => $name ) {

			// Construct the correct name for the input file
			unset( $codeParts );
			$codeParts = explode( '-', $code );
			if ( count( $codeParts ) > 1 ) {

				// ISO 15924 alpha-4 script code
				if ( strlen( $codeParts[1] ) == 4 ) {
					$codeParts[1] = ucfirst( $codeParts[1] );
				}

				// ISO 3166-1 alpha-2 country code
				if ( strlen( $codeParts[1] ) == 2 ) {
					$codeParts[2] = $codeParts[1];
					unset( $codeParts[1] );
				}
				if ( isset( $codeParts[2] ) && strlen( $codeParts[2] ) == 2 ) {
					$codeParts[2] = strtoupper( $codeParts[2] );
				}
				$codeCLDR = implode( '_', $codeParts );
			} else {
				$codeCLDR = $code;
			}
			$input = "$DATA/$codeCLDR.xml";

			// If the file exists, parse it, otherwise display an error
			if ( file_exists( $input ) ) {
				$outputFileName = Language::getFileName( "CldrNames", getRealCode( $code ), '.php' );
				$p = new CLDRParser();
				$p->parse( $input, "$OUTPUT/CldrNames/$outputFileName" );
			} else {
				$this->output( "File $input not found\n" );
			}
		}

		// Now parse out what we want form the supplemental file
		$this->output( "Parsing Supplemental Data...\n" );
		// argh! If $DATA defaulted to something slightly more general in the
		// CLDR dump, this wouldn't have to be this way.
		$input = "$DATA/../supplemental/supplementalData.xml";
		if ( file_exists( $input ) ) {
			$p = new CLDRParser();
			$p->parse_supplemental( $input, "$OUTPUT/CldrSupplemental/Supplemental.php" );
		} else {
			$this->output( "File $input not found\n" );
		}
		$this->output( "Done parsing supplemental data.\n" );

		$this->output( "Parsing Currency Symbol Data...\n" );
		$p = new CLDRParser();
		$p->parse_currency_symbols( $DATA, "$OUTPUT/CldrCurrency/Symbols.php" );
		$this->output( "Done parsing currency symbols.\n" );
	}
}

class CLDRParser {

	/**
	 * @param string $inputFile filename
	 * @param string $outputFile filename
	 */
	function parse( $inputFile, $outputFile ) {
		// Open the input file for reading

		$contents = file_get_contents( $inputFile );
		$doc = new SimpleXMLElement( $contents );

		$data = array(
			'languageNames' => array(),
			'currencyNames' => array(),
			'currencySymbols' => array(),
			'countryNames' => array(),
			'timeUnits' => array(),
		);

		foreach ( $doc->xpath( '//languages/language' ) as $elem ) {
			if ( (string)$elem['alt'] !== '' ) {
				continue;
			}

			if ( (string)$elem['type'] === 'root' ) {
				continue;
			}

			$key = str_replace( '_', '-', strtolower( $elem['type'] ) );

			$data['languageNames'][$key] = (string)$elem;
		}

		foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) {
			if ( (string)$elem->displayName[0] === '' ) {
				continue;
			}

			$data['currencyNames'][(string)$elem['type']] = (string)$elem->displayName[0];
			if ( (string)$elem->symbol[0] !== '' ) {
				$data['currencySymbols'][(string)$elem['type']] = (string)$elem->symbol[0];
			}
		}

		foreach ( $doc->xpath( '//territories/territory' ) as $elem ) {
			if ( (string)$elem['alt'] !== '' && (string)$elem['alt'] !== 'short' ) {
				continue;
			}

			if ( (string)$elem['type'] === 'ZZ' ||
				!preg_match( '/^[A-Z][A-Z]$/', $elem['type'] )
			) {
				continue;
			}

			$data['countryNames'][(string)$elem['type']] = (string)$elem;
		}
		foreach ( $doc->xpath( '//units/unitLength' ) as $unitLength ) {
			if ( (string)$unitLength['type'] !== 'long' ) {
				continue;
			}
			foreach ( $unitLength->unit as $elem ) {
				$type = (string)$elem['type'];
				$pos = strpos( $type, 'duration' );
				if ( $pos === false ) {
					continue;
				}
				$type = substr( $type, strlen( 'duration-' ) );
				foreach ( $elem->unitPattern as $pattern ) {
					$data['timeUnits'][$type . '-' . (string)$pattern['count']] = (string)$pattern;
				}
			}
		}
		foreach ( $doc->xpath( '//fields/field' ) as $field ) {
			$fieldType = (string)$field['type'];

			foreach ( $field->relativeTime as $relative ) {
				$type = (string)$relative['type'];
				foreach ( $relative->relativeTimePattern as $pattern ) {
					$data['timeUnits'][$fieldType . '-' . $type
					. '-' . (string)$pattern['count']] = (string)$pattern;
				}
			}
		}
		ksort( $data['timeUnits'] );

		$this->savephp( $data, $outputFile );
	}

	/**
	 * Parse method for the file structure found in common/supplemental/supplementalData.xml
	 * @param string $inputFile
	 * @param string $outputFile
	 */
	function parse_supplemental( $inputFile, $outputFile ) {
		// Open the input file for reading

		$contents = file_get_contents( $inputFile );
		$doc = new SimpleXMLElement( $contents );

		$data = array(
			'currencyFractions' => array(),
			'localeCurrencies' => array(),
		);

		// Pull currency attributes - digits, rounding, and cashRounding.
		// This will tell us how many decmal places make sense to use with any currency,
		// or if the currency is totally non-fractional
		foreach ( $doc->xpath( '//currencyData/fractions/info' ) as $elem ) {
			if ( (string)$elem['iso4217'] === '' ) {
				continue;
			}

			$attributes = array( 'digits', 'rounding', 'cashDigits', 'cashRounding' );
			foreach ( $attributes as $att ) {
				if ( (string)$elem[$att] !== '' ) {
					$data['currencyFractions'][(string)$elem['iso4217']][$att] = (string)$elem[$att];
				}
			}
		}

		// Pull a map of regions to currencies in order of perference.
		foreach ( $doc->xpath( '//currencyData/region' ) as $elem ) {
			if ( (string)$elem['iso3166'] === '' ) {
				continue;
			}

			$region = (string)$elem['iso3166'];

			foreach ( $elem->currency as $currencynode ) {
				if ( (string)$currencynode['to'] === '' && (string)$currencynode['tender'] !== 'false' ) {
					$data['localeCurrencies'][$region][] = (string)$currencynode['iso4217'];
				}
			}
		}

		$this->savephp( $data, $outputFile );
	}

	/**
	 * Parse method for the currency section in the names files.
	 * This is separate from the regular parse function, because we need all of
	 * the currency locale information, even if mediawiki doesn't support the language.
	 * (For instance: en_AU uses '$' for AUD, not USD, but it's not a supported mediawiki locality)
	 * @param string $inputDir - the directory, in which we will parse everything.
	 * @param string $outputFile
	 */
	function parse_currency_symbols( $inputDir, $outputFile ) {
		if ( !file_exists( $inputDir ) ) {
			return;
		}
		$files = scandir( $inputDir );

		$data = array(
			'currencySymbols' => array(),
		);

		// Foreach files!
		foreach ( $files as $inputFile ) {
			if ( strpos( $inputFile, '.xml' ) < 1 ) {
				continue;
			}

			$contents = file_get_contents( $inputDir . '/' . $inputFile );
			$doc = new SimpleXMLElement( $contents );

			foreach ( $doc->xpath( '//identity' ) as $elem ) {
				$language = (string)$elem->language['type'];
				if ( $language === '' ) {
					continue;
				}

				$territory = (string)$elem->territory['type'];
				if ( $territory === '' ) {
					$territory = 'DEFAULT';
				}
			}

			foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) {
				if ( (string)$elem->symbol[0] !== '' ) {
					$data['currencySymbols'][(string)$elem['type']][$language][$territory] =
						(string)$elem->symbol[0];
				}
			}
		}

		// now massage the data somewhat. It's pretty blown up at this point.

		/**
		 * Part 1: Stop blowing up on defaults.
		 * Defaults apparently come in many forms. Listed below in order of scope
		 * (widest to narrowest)
		 * 1) The ISO code itself, in the absense of any other defaults
		 * 2) The 'root' language file definition
		 * 3) Language with no locality - locality will come in as 'DEFAULT'
		 *
		 * Intended behavior:
		 * From narrowest scope to widest, collapse the defaults
		 */
		foreach ( $data['currencySymbols'] as $currency => $language ) {
			// get the currency default symbol. This will either be defined in the
			// 'root' language file, or taken from the ISO code.
			$default = $currency;
			if ( array_key_exists( 'root', $language ) ) {
				$default = $language['root']['DEFAULT'];
			}

			foreach ( $language as $lang => $territories ) {
				// Collapse a language (no locality) array if it's just the default. One value will do fine.
				if ( is_array( $territories ) ) {
					if ( count( $territories ) === 1 && array_key_exists( 'DEFAULT', $territories ) ) {
						$data['currencySymbols'][$currency][$lang] = $territories['DEFAULT'];
						if ( $territories['DEFAULT'] == $default && $lang != 'root' ) {
							unset( $data['currencySymbols'][$currency][$lang] );
						}
					} else {
						ksort( $data['currencySymbols'][$currency][$lang] );
					}
				}
			}

			ksort( $data['currencySymbols'][$currency] );
		}

		ksort( $data['currencySymbols'] );

		$this->savephp( $data, $outputFile );
	}

	/**
	 * savephp will build and return a string containing properly formatted php
	 * output of all the vars we've just parsed out of the xml.
	 * @param array $data The variable names and values we want defined in the php output
	 * @param string $location File location to write
	 */
	function savephp( $data, $location ) {
		$hasData = false;
		foreach ( $data as $v ) {
			if ( count( $v ) ) {
				$hasData = true;
				break;
			}
		}

		if ( !$hasData ) {
			return;
		}

		// Yes, I am aware I could have simply used var_export.
		// ...the spacing was ugly.
		$output = "<?php\n";
		foreach ( $data as $varname => $values ) {
			if ( !count( $values ) ) {
				// Don't output empty arrays
				continue;
			}
			$output .= "\n\$$varname = array(\n";
			if ( $this->isAssoc( $values ) ) {
				foreach ( $values as $key => $value ) {
					if ( is_array( $value ) ) {
						$output .= $this->makePrettyArrayOuts( $key, $value, 1 );
					} else {
						$key = addcslashes( $key, "'" );
						$value = addcslashes( $value, "'" );
						if ( !is_numeric( $key ) ) {
							$key = "'$key'";
						}
						$output .= "\t$key => '$value',\n";
					}
				}
			} else {
				foreach ( $values as $value ) {
					if ( is_array( $value ) ) {
						$output .= $this->makePrettyArrayOuts( null, $value, 1 );
					} else {
						$value = addcslashes( $value, "'" );
						$output .= "\t'$value',\n";
					}
				}
			}
			$output .= ");\n";
		}

		file_put_contents( $location, $output );
	}

	/**
	 * It makes pretty array vals. Dur.
	 * @param string|null $key Use null to omit outputting the key
	 * @param array $value
	 * @param int $level
	 * @return string
	 */
	function makePrettyArrayOuts( $key, $value, $level = 1 ) {
		$subKeys = '';
		$isAssoc = $this->isAssoc( $value );
		$tabs = str_repeat( "\t", $level );

		foreach ( $value as $subkey => $subvalue ) {
			$subkey = $isAssoc ? $subkey : null;

			if ( is_array( $subvalue ) ) {
				$subKeys .= $this->makePrettyArrayOuts( $subkey, $subvalue, $level + 1 );
			} else {
				$subkey = $isAssoc ? $this->formatKey( $subkey ) : '';
				$subvalue = addcslashes( $subvalue, "'" );
				$subKeys .= "$tabs\t$subkey'$subvalue',\n";
			}
		}

		if ( $subKeys === '' ) {
			return '';
		}

		$key = $key !== null ? $this->formatKey( $key ) : '';
		$ret = "$tabs$key" . "array(\n$subKeys$tabs),\n";

		return $ret;
	}

	/**
	 * It makes pretty array keys. Dur.
	 * @param string $key
	 * @return string
	 */
	protected function formatKey( $key ) {
		$key = addcslashes( $key, "'" );
		if ( !is_numeric( $key ) ) {
			$key = "'$key'";
		}

		$key = "$key => ";

		return $key;
	}

	/**
	 * Checks if array is associative or sequential.
	 *
	 * @param array $arr
	 * @return bool
	 */
	protected function isAssoc( array $arr ) {
		return array_keys( $arr ) !== range( 0, count( $arr ) - 1 );
	}
}

/**
 * Get the code for the MediaWiki localisation,
 * these are same as the fallback.
 *
 * @param $code string
 * @return string
 */
function getRealCode( $code ) {
	$realCode = $code;
	if ( !strcmp( $code, 'kk' ) )
		$realCode = 'kk-cyrl';
	elseif ( !strcmp( $code, 'ku' ) )
		$realCode = 'ku-arab';
	elseif ( !strcmp( $code, 'sr' ) )
		$realCode = 'sr-ec';
	elseif ( !strcmp( $code, 'tg' ) )
		$realCode = 'tg-cyrl';
	elseif ( !strcmp( $code, 'zh' ) )
		$realCode = 'zh-hans';
	elseif ( !strcmp( $code, 'pt' ) )
		$realCode = 'pt-br';
	elseif ( !strcmp( $code, 'pt-pt' ) )
		$realCode = 'pt';
	elseif ( !strcmp( $code, 'az-arab' ) )
		$realCode = 'azb';

	return $realCode;
}

$maintClass = 'CLDRRebuild';
require_once RUN_MAINTENANCE_IF_MAIN;

Zerion Mini Shell 1.0