%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /www/varak.net/wiki.varak.net/languages/utils/
Upload File :
Create Path :
Current File : /www/varak.net/wiki.varak.net/languages/utils/CLDRPluralRuleConverter.php

<?php
/**
 * @author Niklas Laxström, Tim Starling
 *
 * @copyright Copyright © 2010-2012, Niklas Laxström
 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
 *
 * @file
 * @since 1.20
 */

/**
 * Helper class for converting rules to reverse polish notation (RPN).
 */
class CLDRPluralRuleConverter {
	/**
	 * The input string
	 *
	 * @var string
	 */
	public $rule;

	/**
	 * The current position
	 *
	 * @var int
	 */
	public $pos;

	/**
	 * The past-the-end position
	 *
	 * @var int
	 */
	public $end;

	/**
	 * The operator stack
	 *
	 * @var array
	 */
	public $operators = array();

	/**
	 * The operand stack
	 *
	 * @var array
	 */
	public $operands = array();

	/**
	 * Precedence levels. Note that there's no need to worry about associativity
	 * for the level 4 operators, since they return boolean and don't accept
	 * boolean inputs.
	 */
	private static $precedence = array(
		'or' => 2,
		'and' => 3,
		'is' => 4,
		'is-not' => 4,
		'in' => 4,
		'not-in' => 4,
		'within' => 4,
		'not-within' => 4,
		'mod' => 5,
		',' => 6,
		'..' => 7,
	);

	/**
	 * A character list defining whitespace, for use in strspn() etc.
	 */
	const WHITESPACE_CLASS = " \t\r\n";

	/**
	 * Same for digits. Note that the grammar given in UTS #35 doesn't allow
	 * negative numbers or decimal separators.
	 */
	const NUMBER_CLASS = '0123456789';

	/**
	 * A character list of symbolic operands.
	 */
	const OPERAND_SYMBOLS = 'nivwft';

	/**
	 * An anchored regular expression which matches a word at the current offset.
	 */
	const WORD_REGEX = '/[a-zA-Z@]+/A';

	/**
	 * Convert a rule to RPN. This is the only public entry point.
	 *
	 * @param string $rule The rule to convert
	 * @return string The RPN representation of the rule
	 */
	public static function convert( $rule ) {
		$parser = new self( $rule );

		return $parser->doConvert();
	}

	/**
	 * Private constructor.
	 * @param string $rule
	 */
	protected function __construct( $rule ) {
		$this->rule = $rule;
		$this->pos = 0;
		$this->end = strlen( $rule );
	}

	/**
	 * Do the operation.
	 *
	 * @return string The RPN representation of the rule (e.g. "5 3 mod n is")
	 */
	protected function doConvert() {
		$expectOperator = true;

		// Iterate through all tokens, saving the operators and operands to a
		// stack per Dijkstra's shunting yard algorithm.
		/** @var CLDRPluralRuleConverterOperator $token */
		while ( false !== ( $token = $this->nextToken() ) ) {
			// In this grammar, there are only binary operators, so every valid
			// rule string will alternate between operator and operand tokens.
			$expectOperator = !$expectOperator;

			if ( $token instanceof CLDRPluralRuleConverterExpression ) {
				// Operand
				if ( $expectOperator ) {
					$token->error( 'unexpected operand' );
				}
				$this->operands[] = $token;
				continue;
			} else {
				// Operator
				if ( !$expectOperator ) {
					$token->error( 'unexpected operator' );
				}
				// Resolve higher precedence levels
				$lastOp = end( $this->operators );
				while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
					$this->doOperation( $lastOp, $this->operands );
					array_pop( $this->operators );
					$lastOp = end( $this->operators );
				}
				$this->operators[] = $token;
			}
		}

		// Finish off the stack
		while ( $op = array_pop( $this->operators ) ) {
			$this->doOperation( $op, $this->operands );
		}

		// Make sure the result is sane. The first case is possible for an empty
		// string input, the second should be unreachable.
		if ( !count( $this->operands ) ) {
			$this->error( 'condition expected' );
		} elseif ( count( $this->operands ) > 1 ) {
			$this->error( 'missing operator or too many operands' );
		}

		$value = $this->operands[0];
		if ( $value->type !== 'boolean' ) {
			$this->error( 'the result must have a boolean type' );
		}

		return $this->operands[0]->rpn;
	}

	/**
	 * Fetch the next token from the input string.
	 *
	 * @return CLDRPluralRuleConverterFragment The next token
	 */
	protected function nextToken() {
		if ( $this->pos >= $this->end ) {
			return false;
		}

		// Whitespace
		$length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
		$this->pos += $length;

		if ( $this->pos >= $this->end ) {
			return false;
		}

		// Number
		$length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
		if ( $length !== 0 ) {
			$token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
			$this->pos += $length;

			return $token;
		}

		// Two-character operators
		$op2 = substr( $this->rule, $this->pos, 2 );
		if ( $op2 === '..' || $op2 === '!=' ) {
			$token = $this->newOperator( $op2, $this->pos, 2 );
			$this->pos += 2;

			return $token;
		}

		// Single-character operators
		$op1 = $this->rule[$this->pos];
		if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
			$token = $this->newOperator( $op1, $this->pos, 1 );
			$this->pos++;

			return $token;
		}

		// Word
		if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
			$this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
		}
		$word1 = strtolower( $m[0] );
		$word2 = '';
		$nextTokenPos = $this->pos + strlen( $word1 );
		if ( $word1 === 'not' || $word1 === 'is' ) {
			// Look ahead one word
			$nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
			if ( $nextTokenPos < $this->end
				&& preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
			) {
				$word2 = strtolower( $m[0] );
				$nextTokenPos += strlen( $word2 );
			}
		}

		// Two-word operators like "is not" take precedence over single-word operators like "is"
		if ( $word2 !== '' ) {
			$bothWords = "{$word1}-{$word2}";
			if ( isset( self::$precedence[$bothWords] ) ) {
				$token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
				$this->pos = $nextTokenPos;

				return $token;
			}
		}

		// Single-word operators
		if ( isset( self::$precedence[$word1] ) ) {
			$token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
			$this->pos += strlen( $word1 );

			return $token;
		}

		// The single-character operand symbols
		if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
			$token = $this->newNumber( $word1, $this->pos );
			$this->pos++;

			return $token;
		}

		// Samples
		if ( $word1 === '@integer' || $word1 === '@decimal' ) {
			// Samples are like comments, they have no effect on rule evaluation.
			// They run from the first sample indicator to the end of the string.
			$this->pos = $this->end;

			return false;
		}

		$this->error( 'unrecognised word' );
	}

	/**
	 * For the binary operator $op, pop its operands off the stack and push
	 * a fragment with rpn and type members describing the result of that
	 * operation.
	 *
	 * @param CLDRPluralRuleConverterOperator $op
	 */
	protected function doOperation( $op ) {
		if ( count( $this->operands ) < 2 ) {
			$op->error( 'missing operand' );
		}
		$right = array_pop( $this->operands );
		$left = array_pop( $this->operands );
		$result = $op->operate( $left, $right );
		$this->operands[] = $result;
	}

	/**
	 * Create a numerical expression object
	 *
	 * @param string $text
	 * @param int $pos
	 * @return CLDRPluralRuleConverterExpression The numerical expression
	 */
	protected function newNumber( $text, $pos ) {
		return new CLDRPluralRuleConverterExpression( $this, 'number', $text, $pos, strlen( $text ) );
	}

	/**
	 * Create a binary operator
	 *
	 * @param string $type
	 * @param int $pos
	 * @param int $length
	 * @return CLDRPluralRuleConverterOperator The operator
	 */
	protected function newOperator( $type, $pos, $length ) {
		return new CLDRPluralRuleConverterOperator( $this, $type, $pos, $length );
	}

	/**
	 * Throw an error
	 * @param string $message
	 */
	protected function error( $message ) {
		throw new CLDRPluralRuleError( $message );
	}
}

Zerion Mini Shell 1.0