%PDF- %PDF-
| Direktori : /www/varak.net/wiki.varak.net/languages/utils/ |
| Current File : //www/varak.net/wiki.varak.net/languages/utils/CLDRPluralRuleConverter.php |
<?php
/**
* @author Niklas Laxström, Tim Starling
*
* @copyright Copyright © 2010-2012, Niklas Laxström
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
*
* @file
* @since 1.20
*/
/**
* Helper class for converting rules to reverse polish notation (RPN).
*/
class CLDRPluralRuleConverter {
/**
* The input string
*
* @var string
*/
public $rule;
/**
* The current position
*
* @var int
*/
public $pos;
/**
* The past-the-end position
*
* @var int
*/
public $end;
/**
* The operator stack
*
* @var array
*/
public $operators = array();
/**
* The operand stack
*
* @var array
*/
public $operands = array();
/**
* Precedence levels. Note that there's no need to worry about associativity
* for the level 4 operators, since they return boolean and don't accept
* boolean inputs.
*/
private static $precedence = array(
'or' => 2,
'and' => 3,
'is' => 4,
'is-not' => 4,
'in' => 4,
'not-in' => 4,
'within' => 4,
'not-within' => 4,
'mod' => 5,
',' => 6,
'..' => 7,
);
/**
* A character list defining whitespace, for use in strspn() etc.
*/
const WHITESPACE_CLASS = " \t\r\n";
/**
* Same for digits. Note that the grammar given in UTS #35 doesn't allow
* negative numbers or decimal separators.
*/
const NUMBER_CLASS = '0123456789';
/**
* A character list of symbolic operands.
*/
const OPERAND_SYMBOLS = 'nivwft';
/**
* An anchored regular expression which matches a word at the current offset.
*/
const WORD_REGEX = '/[a-zA-Z@]+/A';
/**
* Convert a rule to RPN. This is the only public entry point.
*
* @param string $rule The rule to convert
* @return string The RPN representation of the rule
*/
public static function convert( $rule ) {
$parser = new self( $rule );
return $parser->doConvert();
}
/**
* Private constructor.
* @param string $rule
*/
protected function __construct( $rule ) {
$this->rule = $rule;
$this->pos = 0;
$this->end = strlen( $rule );
}
/**
* Do the operation.
*
* @return string The RPN representation of the rule (e.g. "5 3 mod n is")
*/
protected function doConvert() {
$expectOperator = true;
// Iterate through all tokens, saving the operators and operands to a
// stack per Dijkstra's shunting yard algorithm.
/** @var CLDRPluralRuleConverterOperator $token */
while ( false !== ( $token = $this->nextToken() ) ) {
// In this grammar, there are only binary operators, so every valid
// rule string will alternate between operator and operand tokens.
$expectOperator = !$expectOperator;
if ( $token instanceof CLDRPluralRuleConverterExpression ) {
// Operand
if ( $expectOperator ) {
$token->error( 'unexpected operand' );
}
$this->operands[] = $token;
continue;
} else {
// Operator
if ( !$expectOperator ) {
$token->error( 'unexpected operator' );
}
// Resolve higher precedence levels
$lastOp = end( $this->operators );
while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
$this->doOperation( $lastOp, $this->operands );
array_pop( $this->operators );
$lastOp = end( $this->operators );
}
$this->operators[] = $token;
}
}
// Finish off the stack
while ( $op = array_pop( $this->operators ) ) {
$this->doOperation( $op, $this->operands );
}
// Make sure the result is sane. The first case is possible for an empty
// string input, the second should be unreachable.
if ( !count( $this->operands ) ) {
$this->error( 'condition expected' );
} elseif ( count( $this->operands ) > 1 ) {
$this->error( 'missing operator or too many operands' );
}
$value = $this->operands[0];
if ( $value->type !== 'boolean' ) {
$this->error( 'the result must have a boolean type' );
}
return $this->operands[0]->rpn;
}
/**
* Fetch the next token from the input string.
*
* @return CLDRPluralRuleConverterFragment The next token
*/
protected function nextToken() {
if ( $this->pos >= $this->end ) {
return false;
}
// Whitespace
$length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
$this->pos += $length;
if ( $this->pos >= $this->end ) {
return false;
}
// Number
$length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
if ( $length !== 0 ) {
$token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
$this->pos += $length;
return $token;
}
// Two-character operators
$op2 = substr( $this->rule, $this->pos, 2 );
if ( $op2 === '..' || $op2 === '!=' ) {
$token = $this->newOperator( $op2, $this->pos, 2 );
$this->pos += 2;
return $token;
}
// Single-character operators
$op1 = $this->rule[$this->pos];
if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
$token = $this->newOperator( $op1, $this->pos, 1 );
$this->pos++;
return $token;
}
// Word
if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
$this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
}
$word1 = strtolower( $m[0] );
$word2 = '';
$nextTokenPos = $this->pos + strlen( $word1 );
if ( $word1 === 'not' || $word1 === 'is' ) {
// Look ahead one word
$nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
if ( $nextTokenPos < $this->end
&& preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
) {
$word2 = strtolower( $m[0] );
$nextTokenPos += strlen( $word2 );
}
}
// Two-word operators like "is not" take precedence over single-word operators like "is"
if ( $word2 !== '' ) {
$bothWords = "{$word1}-{$word2}";
if ( isset( self::$precedence[$bothWords] ) ) {
$token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
$this->pos = $nextTokenPos;
return $token;
}
}
// Single-word operators
if ( isset( self::$precedence[$word1] ) ) {
$token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
$this->pos += strlen( $word1 );
return $token;
}
// The single-character operand symbols
if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
$token = $this->newNumber( $word1, $this->pos );
$this->pos++;
return $token;
}
// Samples
if ( $word1 === '@integer' || $word1 === '@decimal' ) {
// Samples are like comments, they have no effect on rule evaluation.
// They run from the first sample indicator to the end of the string.
$this->pos = $this->end;
return false;
}
$this->error( 'unrecognised word' );
}
/**
* For the binary operator $op, pop its operands off the stack and push
* a fragment with rpn and type members describing the result of that
* operation.
*
* @param CLDRPluralRuleConverterOperator $op
*/
protected function doOperation( $op ) {
if ( count( $this->operands ) < 2 ) {
$op->error( 'missing operand' );
}
$right = array_pop( $this->operands );
$left = array_pop( $this->operands );
$result = $op->operate( $left, $right );
$this->operands[] = $result;
}
/**
* Create a numerical expression object
*
* @param string $text
* @param int $pos
* @return CLDRPluralRuleConverterExpression The numerical expression
*/
protected function newNumber( $text, $pos ) {
return new CLDRPluralRuleConverterExpression( $this, 'number', $text, $pos, strlen( $text ) );
}
/**
* Create a binary operator
*
* @param string $type
* @param int $pos
* @param int $length
* @return CLDRPluralRuleConverterOperator The operator
*/
protected function newOperator( $type, $pos, $length ) {
return new CLDRPluralRuleConverterOperator( $this, $type, $pos, $length );
}
/**
* Throw an error
* @param string $message
*/
protected function error( $message ) {
throw new CLDRPluralRuleError( $message );
}
}