%PDF- %PDF-
| Direktori : /usr/lib/calibre/calibre/utils/ |
| Current File : //usr/lib/calibre/calibre/utils/formatter.py |
'''
Created on 23 Sep 2010
@author: charles
'''
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, string, traceback, numbers
from functools import partial
from math import modf
from calibre import prints
from calibre.constants import DEBUG
from calibre.ebooks.metadata.book.base import field_metadata
from calibre.utils.formatter_functions import formatter_functions
from calibre.utils.icu import strcmp
from polyglot.builtins import error_message
class Node:
NODE_RVALUE = 1
NODE_IF = 2
NODE_ASSIGN = 3
NODE_FUNC = 4
NODE_COMPARE_STRING = 5
NODE_COMPARE_NUMERIC = 6
NODE_CONSTANT = 7
NODE_FIELD = 8
NODE_RAW_FIELD = 9
NODE_CALL = 10
NODE_ARGUMENTS = 11
NODE_FIRST_NON_EMPTY = 12
NODE_FOR = 13
NODE_GLOBALS = 14
NODE_SET_GLOBALS = 15
NODE_CONTAINS = 16
NODE_BINARY_LOGOP = 17
NODE_UNARY_LOGOP = 18
NODE_BINARY_ARITHOP = 19
NODE_UNARY_ARITHOP = 20
NODE_PRINT = 21
NODE_BREAK = 22
NODE_CONTINUE = 23
NODE_RETURN = 24
NODE_CHARACTER = 25
NODE_STRCAT = 26
def __init__(self, line_number, name):
self.my_line_number = line_number
self.my_node_name = name
@property
def node_name(self):
return self.my_node_name
@property
def line_number(self):
return self.my_line_number
class IfNode(Node):
def __init__(self, line_number, condition, then_part, else_part):
Node.__init__(self, line_number, 'if ...')
self.node_type = self.NODE_IF
self.condition = condition
self.then_part = then_part
self.else_part = else_part
class ForNode(Node):
def __init__(self, line_number, variable, list_field_expr, separator, block):
Node.__init__(self, line_number, 'for ...:')
self.node_type = self.NODE_FOR
self.variable = variable
self.list_field_expr = list_field_expr
self.separator = separator
self.block = block
class BreakNode(Node):
def __init__(self, line_number):
Node.__init__(self, line_number, 'break')
self.node_type = self.NODE_BREAK
class ContinueNode(Node):
def __init__(self, line_number):
Node.__init__(self, line_number, 'continue')
self.node_type = self.NODE_CONTINUE
class ReturnNode(Node):
def __init__(self, line_number, expr):
Node.__init__(self, line_number, 'return')
self.expr = expr
self.node_type = self.NODE_RETURN
class AssignNode(Node):
def __init__(self, line_number, left, right):
Node.__init__(self, line_number, 'assign to ' + left)
self.node_type = self.NODE_ASSIGN
self.left = left
self.right = right
class FunctionNode(Node):
def __init__(self, line_number, function_name, expression_list):
Node.__init__(self, line_number, function_name + '()')
self.node_type = self.NODE_FUNC
self.name = function_name
self.expression_list = expression_list
class CallNode(Node):
def __init__(self, line_number, name, function, expression_list):
Node.__init__(self, line_number, 'call template: ' + name)
self.node_type = self.NODE_CALL
self.function = function
self.expression_list = expression_list
class ArgumentsNode(Node):
def __init__(self, line_number, expression_list):
Node.__init__(self, line_number, 'arguments()')
self.node_type = self.NODE_ARGUMENTS
self.expression_list = expression_list
class GlobalsNode(Node):
def __init__(self, line_number, expression_list):
Node.__init__(self, line_number, 'globals()')
self.node_type = self.NODE_GLOBALS
self.expression_list = expression_list
class SetGlobalsNode(Node):
def __init__(self, line_number, expression_list):
Node.__init__(self, line_number, 'set_globals()')
self.node_type = self.NODE_SET_GLOBALS
self.expression_list = expression_list
class StringCompareNode(Node):
def __init__(self, line_number, operator, left, right):
Node.__init__(self, line_number, 'comparision: ' + operator)
self.node_type = self.NODE_COMPARE_STRING
self.operator = operator
self.left = left
self.right = right
class NumericCompareNode(Node):
def __init__(self, line_number, operator, left, right):
Node.__init__(self, line_number, 'comparison: ' + operator)
self.node_type = self.NODE_COMPARE_NUMERIC
self.operator = operator
self.left = left
self.right = right
class LogopBinaryNode(Node):
def __init__(self, line_number, operator, left, right):
Node.__init__(self, line_number, 'binary operator: ' + operator)
self.node_type = self.NODE_BINARY_LOGOP
self.operator = operator
self.left = left
self.right = right
class LogopUnaryNode(Node):
def __init__(self, line_number, operator, expr):
Node.__init__(self, line_number, 'unary operator: ' + operator)
self.node_type = self.NODE_UNARY_LOGOP
self.operator = operator
self.expr = expr
class NumericBinaryNode(Node):
def __init__(self, line_number, operator, left, right):
Node.__init__(self, line_number, 'binary operator: ' + operator)
self.node_type = self.NODE_BINARY_ARITHOP
self.operator = operator
self.left = left
self.right = right
class NumericUnaryNode(Node):
def __init__(self, line_number, operator, expr):
Node.__init__(self, line_number, 'unary operator: '+ operator)
self.node_type = self.NODE_UNARY_ARITHOP
self.operator = operator
self.expr = expr
class ConstantNode(Node):
def __init__(self, line_number, value):
Node.__init__(self, line_number, 'constant: ' + value)
self.node_type = self.NODE_CONSTANT
self.value = value
class VariableNode(Node):
def __init__(self, line_number, name):
Node.__init__(self, line_number, 'variable: ' + name)
self.node_type = self.NODE_RVALUE
self.name = name
class FieldNode(Node):
def __init__(self, line_number, expression):
Node.__init__(self, line_number, 'field()')
self.node_type = self.NODE_FIELD
self.expression = expression
class RawFieldNode(Node):
def __init__(self, line_number, expression, default=None):
Node.__init__(self, line_number, 'raw_field()')
self.node_type = self.NODE_RAW_FIELD
self.expression = expression
self.default = default
class FirstNonEmptyNode(Node):
def __init__(self, line_number, expression_list):
Node.__init__(self, line_number, 'first_non_empty()')
self.node_type = self.NODE_FIRST_NON_EMPTY
self.expression_list = expression_list
class ContainsNode(Node):
def __init__(self, line_number, arguments):
Node.__init__(self, line_number, 'contains()')
self.node_type = self.NODE_CONTAINS
self.value_expression = arguments[0]
self.test_expression = arguments[1]
self.match_expression = arguments[2]
self.not_match_expression = arguments[3]
class PrintNode(Node):
def __init__(self, line_number, arguments):
Node.__init__(self, line_number, 'print')
self.node_type = self.NODE_PRINT
self.arguments = arguments
class CharacterNode(Node):
def __init__(self, line_number, expression):
Node.__init__(self, line_number, 'character()')
self.node_type = self.NODE_CHARACTER
self.expression = expression
class StrcatNode(Node):
def __init__(self, line_number, expression_list):
Node.__init__(self, line_number, 'strcat()')
self.node_type = self.NODE_STRCAT
self.expression_list = expression_list
class _Parser:
LEX_OP = 1
LEX_ID = 2
LEX_CONST = 3
LEX_EOF = 4
LEX_STRING_INFIX = 5
LEX_NUMERIC_INFIX = 6
LEX_KEYWORD = 7
LEX_NEWLINE = 8
def error(self, message):
ln = None
try:
tval = "'" + self.prog[self.lex_pos-1][1] + "'"
except Exception:
tval = _('Unknown')
if self.lex_pos > 0 and self.lex_pos < self.prog_len:
location = tval
ln = self.line_number
else:
location = _('the end of the program')
if ln:
raise ValueError(_('{0}: {1} near {2} on line {3}').format(
'Formatter', message, location, ln))
else:
raise ValueError(_('{0}: {1} near {2}').format(
'Formatter', message, location))
def check_eol(self):
while self.lex_pos < len(self.prog) and self.prog[self.lex_pos] == self.LEX_NEWLINE:
self.line_number += 1
self.consume()
def token(self):
self.check_eol()
try:
token = self.prog[self.lex_pos][1]
self.lex_pos += 1
return token
except:
return None
def consume(self):
self.lex_pos += 1
def token_op_is(self, op):
self.check_eol()
try:
token = self.prog[self.lex_pos]
return token[1] == op and token[0] == self.LEX_OP
except:
return False
def token_op_is_string_infix_compare(self):
self.check_eol()
try:
return self.prog[self.lex_pos][0] == self.LEX_STRING_INFIX
except:
return False
def token_op_is_numeric_infix_compare(self):
self.check_eol()
try:
return self.prog[self.lex_pos][0] == self.LEX_NUMERIC_INFIX
except:
return False
def token_is_newline(self):
return self.lex_pos < len(self.prog) and self.prog[self.lex_pos] == self.LEX_NEWLINE
def token_is_id(self):
self.check_eol()
try:
return self.prog[self.lex_pos][0] == self.LEX_ID
except:
return False
def token_is(self, candidate):
self.check_eol()
try:
token = self.prog[self.lex_pos]
return token[1] == candidate and token[0] == self.LEX_KEYWORD
except:
return False
def token_is_keyword(self):
self.check_eol()
try:
return self.prog[self.lex_pos][0] == self.LEX_KEYWORD
except:
return False
def token_is_constant(self):
self.check_eol()
try:
return self.prog[self.lex_pos][0] == self.LEX_CONST
except:
return False
def token_is_eof(self):
self.check_eol()
try:
return self.prog[self.lex_pos][0] == self.LEX_EOF
except:
return True
def token_text(self):
self.check_eol()
try:
return self.prog[self.lex_pos][1]
except:
return _("'End of program'")
def program(self, parent, funcs, prog):
self.line_number = 1
self.lex_pos = 0
self.parent = parent
self.funcs = funcs
self.func_names = frozenset(set(self.funcs.keys()))
self.prog = prog[0]
self.prog_len = len(self.prog)
if prog[1] != '':
self.error(_("Failed to scan program. Invalid input '{0}'").format(prog[1]))
tree = self.expression_list()
if not self.token_is_eof():
self.error(_("Expected end of program, found '{0}'").format(self.token_text()))
return tree
def expression_list(self):
expr_list = []
while True:
while self.token_is_newline():
self.line_number += 1
self.consume()
if self.token_is_eof():
break
expr_list.append(self.top_expr())
if self.token_op_is(';'):
self.consume()
else:
break
return expr_list
def if_expression(self):
self.consume()
line_number = self.line_number
condition = self.top_expr()
if not self.token_is('then'):
self.error(_("{0} statement: expected '{1}', "
"found '{2}'").format('if', 'then', self.token_text()))
self.consume()
then_part = self.expression_list()
if self.token_is('elif'):
return IfNode(line_number, condition, then_part, [self.if_expression(),])
if self.token_is('else'):
self.consume()
else_part = self.expression_list()
else:
else_part = None
if not self.token_is('fi'):
self.error(_("{0} statement: expected '{1}', "
"found '{2}'").format('if', 'fi', self.token_text()))
self.consume()
return IfNode(line_number, condition, then_part, else_part)
def for_expression(self):
line_number = self.line_number
self.consume()
if not self.token_is_id():
self.error(_("'{0}' statement: expected an identifier").format('for'))
variable = self.token()
if not self.token_is('in'):
self.error(_("{0} statement: expected '{1}', "
"found '{2}'").format('for', 'in', self.token_text()))
self.consume()
list_expr = self.top_expr()
if self.token_is('separator'):
self.consume()
separator = self.expr()
else:
separator = None
if not self.token_op_is(':'):
self.error(_("{0} statement: expected '{1}', "
"found '{2}'").format('for', ':', self.token_text()))
self.consume()
block = self.expression_list()
if not self.token_is('rof'):
self.error(_("{0} statement: expected '{1}', "
"found '{2}'").format('for', 'rof', self.token_text()))
self.consume()
return ForNode(line_number, variable, list_expr, separator, block)
def top_expr(self):
return self.or_expr()
def or_expr(self):
left = self.and_expr()
while self.token_op_is('||'):
self.consume()
right = self.and_expr()
left = LogopBinaryNode(self.line_number, 'or', left, right)
return left
def and_expr(self):
left = self.not_expr()
while self.token_op_is('&&'):
self.consume()
right = self.not_expr()
left = LogopBinaryNode(self.line_number, 'and', left, right)
return left
def not_expr(self):
if self.token_op_is('!'):
self.consume()
return LogopUnaryNode(self.line_number, 'not', self.not_expr())
return self.compare_expr()
def compare_expr(self):
left = self.add_subtract_expr()
if (self.token_op_is_string_infix_compare() or
self.token_is('in') or self.token_is('inlist')):
operator = self.token()
return StringCompareNode(self.line_number, operator, left, self.add_subtract_expr())
if self.token_op_is_numeric_infix_compare():
operator = self.token()
return NumericCompareNode(self.line_number, operator, left, self.add_subtract_expr())
return left
def add_subtract_expr(self):
left = self.times_divide_expr()
while self.token_op_is('+') or self.token_op_is('-'):
operator = self.token()
right = self.times_divide_expr()
left = NumericBinaryNode(self.line_number, operator, left, right)
return left
def times_divide_expr(self):
left = self.unary_plus_minus_expr()
while self.token_op_is('*') or self.token_op_is('/'):
operator = self.token()
right = self.unary_plus_minus_expr()
left = NumericBinaryNode(self.line_number, operator, left, right)
return left
def unary_plus_minus_expr(self):
if self.token_op_is('+'):
self.consume()
return NumericUnaryNode(self.line_number, '+', self.unary_plus_minus_expr())
if self.token_op_is('-'):
self.consume()
return NumericUnaryNode(self.line_number, '-', self.unary_plus_minus_expr())
return self.expr()
def call_expression(self, name, arguments):
subprog = self.funcs[name].cached_parse_tree
if subprog is None:
text = self.funcs[name].program_text
if not text.startswith('program:'):
self.error(_("A stored template must begin with '{0}'").format('program:'))
text = text[len('program:'):]
subprog = _Parser().program(self.parent, self.funcs,
self.parent.lex_scanner.scan(text))
self.funcs[name].cached_parse_tree = subprog
return CallNode(self.line_number, name, subprog, arguments)
# {keyword: tuple(preprocessor, node builder) }
keyword_nodes = {
'if': (lambda self:None, if_expression),
'for': (lambda self:None, for_expression),
'break': (lambda self: self.consume(), lambda self: BreakNode(self.line_number)),
'continue': (lambda self: self.consume(), lambda self: ContinueNode(self.line_number)),
'return': (lambda self: self.consume(), lambda self: ReturnNode(self.line_number, self.expr())),
}
# {inlined_function_name: tuple(constraint on number of length, node builder) }
inlined_function_nodes = {
'field': (lambda args: len(args) == 1,
lambda ln, args: FieldNode(ln, args[0])),
'raw_field': (lambda args: len(args) == 1,
lambda ln, args: RawFieldNode(ln, *args)),
'test': (lambda args: len(args) == 3,
lambda ln, args: IfNode(ln, args[0], (args[1],), (args[2],))),
'first_non_empty': (lambda args: len(args) >= 1,
lambda ln, args: FirstNonEmptyNode(ln, args)),
'assign': (lambda args: len(args) == 2 and len(args[0]) == 1 and args[0][0].node_type == Node.NODE_RVALUE,
lambda ln, args: AssignNode(ln, args[0][0].name, args[1])),
'contains': (lambda args: len(args) == 4,
lambda ln, args: ContainsNode(ln, args)),
'character': (lambda args: len(args) == 1,
lambda ln, args: CharacterNode(ln, args[0])),
'print': (lambda _: True,
lambda ln, args: PrintNode(ln, args)),
'strcat': (lambda _: True,
lambda ln, args: StrcatNode(ln, args))
}
def expr(self):
if self.token_op_is('('):
self.consume()
rv = self.expression_list()
if not self.token_op_is(')'):
self.error(_("Expected '{0}', found '{1}'").format(')', self.token_text()))
self.consume()
return rv
# Check if we have a keyword-type expression
if self.token_is_keyword():
t = self.token_text()
kw_tuple = self.keyword_nodes.get(t, None)
if kw_tuple:
# These are keywords, so there can't be ambiguity between these,
# ids, and functions.
kw_tuple[0](self)
return kw_tuple[1](self)
# Not a keyword. Check if we have an id reference or a function call
if self.token_is_id():
# We have an identifier. Check if it is a shorthand field reference
line_number = self.line_number
id_ = self.token()
if len(id_) > 1 and id_[0] == '$':
if id_[1] == '$':
return RawFieldNode(line_number, ConstantNode(self.line_number, id_[2:]))
return FieldNode(line_number, ConstantNode(self.line_number, id_[1:]))
# Do we have a function call?
if not self.token_op_is('('):
# Nope. We must have an lvalue (identifier) or an assignment
if self.token_op_is('='):
# classic assignment statement
self.consume()
return AssignNode(line_number, id_, self.top_expr())
return VariableNode(line_number, id_)
# We have a function.
# Check if it is a known one. We do this here so error reporting is
# better, as it can identify the tokens near the problem.
id_ = id_.strip()
if id_ not in self.func_names:
self.error(_('Unknown function {0}').format(id_))
# Eat the opening paren, parse the argument list, then eat the closing paren
self.consume()
arguments = list()
while not self.token_op_is(')'):
# parse an argument expression (recursive call)
arguments.append(self.expression_list())
if not self.token_op_is(','):
break
self.consume()
t = self.token()
if t != ')':
self.error(_("Expected a '{0}' for function call, "
"found '{1}'").format(')', t))
# Check for an inlined function
function_tuple = self.inlined_function_nodes.get(id_, None)
if function_tuple and function_tuple[0](arguments):
return function_tuple[1](line_number, arguments)
# More complicated special cases
if id_ == 'arguments' or id_ == 'globals' or id_ == 'set_globals':
new_args = []
for arg_list in arguments:
arg = arg_list[0]
if arg.node_type not in (Node.NODE_ASSIGN, Node.NODE_RVALUE):
self.error(_("Parameters to '{0}' must be "
"variables or assignments").format(id_))
if arg.node_type == Node.NODE_RVALUE:
arg = AssignNode(line_number, arg.name, ConstantNode(self.line_number, ''))
new_args.append(arg)
if id_ == 'arguments':
return ArgumentsNode(line_number, new_args)
if id_ == 'set_globals':
return SetGlobalsNode(line_number, new_args)
return GlobalsNode(line_number, new_args)
# Check for calling a stored template
if id_ in self.func_names and not self.funcs[id_].is_python:
return self.call_expression(id_, arguments)
# We must have a reference to a formatter function. Check if
# the right number of arguments were supplied
cls = self.funcs[id_]
if cls.arg_count != -1 and len(arguments) != cls.arg_count:
self.error(_('Incorrect number of arguments for function {0}').format(id_))
return FunctionNode(line_number, id_, arguments)
elif self.token_is_constant():
# String or number
return ConstantNode(self.line_number, self.token())
else:
# Who knows what?
self.error(_("Expected an expression, found '{0}'").format(self.token_text()))
class ExecutionBase(Exception):
def __init__(self, name):
super().__init__(_('{0} outside of for loop').format(name) if name else '')
self.value = ''
def set_value(self, v):
self.value = v
def get_value(self):
return self.value
class ContinueExecuted(ExecutionBase):
def __init__(self):
super().__init__('continue')
class BreakExecuted(ExecutionBase):
def __init__(self):
super().__init__('break')
class ReturnExecuted(ExecutionBase):
def __init__(self):
super().__init__('return')
class StopException(Exception):
def __init__(self):
super().__init__('Template evaluation stopped')
class _Interpreter:
def error(self, message, line_number):
m = _('Interpreter: {0} - line number {1}').format(message, line_number)
raise ValueError(m)
def program(self, funcs, parent, prog, val, is_call=False, args=None,
global_vars=None, break_reporter=None):
self.parent = parent
self.parent_kwargs = parent.kwargs
self.parent_book = parent.book
self.funcs = funcs
self.locals = {'$':val}
self.override_line_number = None
self.global_vars = global_vars if isinstance(global_vars, dict) else {}
if break_reporter:
self.break_reporter = self.call_break_reporter
self.real_break_reporter = break_reporter
else:
self.break_reporter = None
try:
if is_call:
ret = self.do_node_call(CallNode(1, prog, None), args=args)
else:
ret = self.expression_list(prog)
except ReturnExecuted as e:
ret = e.get_value()
return ret
def call_break_reporter(self, txt, val, line_number):
self.real_break_reporter(txt, val, self.locals,
self.override_line_number if self.override_line_number
else line_number)
def expression_list(self, prog):
val = ''
try:
for p in prog:
val = self.expr(p)
except (BreakExecuted, ContinueExecuted) as e:
e.set_value(val)
raise e
return val
INFIX_STRING_COMPARE_OPS = {
"==": lambda x, y: strcmp(x, y) == 0,
"!=": lambda x, y: strcmp(x, y) != 0,
"<": lambda x, y: strcmp(x, y) < 0,
"<=": lambda x, y: strcmp(x, y) <= 0,
">": lambda x, y: strcmp(x, y) > 0,
">=": lambda x, y: strcmp(x, y) >= 0,
"in": lambda x, y: re.search(x, y, flags=re.I),
"inlist": lambda x, y: list(filter(partial(re.search, x, flags=re.I),
[v.strip() for v in y.split(',') if v.strip()]))
}
def do_node_string_infix(self, prog):
try:
left = self.expr(prog.left)
right = self.expr(prog.right)
res = '1' if self.INFIX_STRING_COMPARE_OPS[prog.operator](left, right) else ''
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
except (StopException, ValueError) as e:
raise e
except:
self.error(_("Error during string comparison: "
"operator '{0}'").format(prog.operator), prog.line_number)
INFIX_NUMERIC_COMPARE_OPS = {
"==#": lambda x, y: x == y,
"!=#": lambda x, y: x != y,
"<#": lambda x, y: x < y,
"<=#": lambda x, y: x <= y,
">#": lambda x, y: x > y,
">=#": lambda x, y: x >= y,
}
def float_deal_with_none(self, v):
# Undefined values and the string 'None' are assumed to be zero.
# The reason for string 'None': raw_field returns it for undefined values
return float(v if v and v != 'None' else 0)
def do_node_numeric_infix(self, prog):
try:
left = self.float_deal_with_none(self.expr(prog.left))
right = self.float_deal_with_none(self.expr(prog.right))
res = '1' if self.INFIX_NUMERIC_COMPARE_OPS[prog.operator](left, right) else ''
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
except (StopException, ValueError) as e:
raise e
except:
self.error(_("Value used in comparison is not a number: "
"operator '{0}'").format(prog.operator), prog.line_number)
def do_node_if(self, prog):
line_number = prog.line_number
test_part = self.expr(prog.condition)
if self.break_reporter:
self.break_reporter("'if': condition value", test_part, line_number)
if test_part:
v = self.expression_list(prog.then_part)
if self.break_reporter:
self.break_reporter("'if': then-block value", v, line_number)
return v
elif prog.else_part:
v = self.expression_list(prog.else_part)
if self.break_reporter:
self.break_reporter("'if': else-block value", v, line_number)
return v
return ''
def do_node_rvalue(self, prog):
try:
if (self.break_reporter):
self.break_reporter(prog.node_name, self.locals[prog.name], prog.line_number)
return self.locals[prog.name]
except:
self.error(_("Unknown identifier '{0}'").format(prog.name), prog.line_number)
def do_node_func(self, prog):
args = list()
for arg in prog.expression_list:
# evaluate the expression (recursive call)
args.append(self.expr(arg))
# Evaluate the function.
id_ = prog.name.strip()
cls = self.funcs[id_]
res = cls.eval_(self.parent, self.parent_kwargs,
self.parent_book, self.locals, *args)
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
def do_node_call(self, prog, args=None):
if (self.break_reporter):
self.break_reporter(prog.node_name, _('before evaluating arguments'), prog.line_number)
if args is None:
args = []
for arg in prog.expression_list:
# evaluate the expression (recursive call)
args.append(self.expr(arg))
saved_locals = self.locals
self.locals = {}
for dex, v in enumerate(args):
self.locals['*arg_'+ str(dex)] = v
if (self.break_reporter):
self.break_reporter(prog.node_name, _('after evaluating arguments'), prog.line_number)
saved_line_number = self.override_line_number
self.override_line_number = (self.override_line_number if self.override_line_number
else prog.line_number)
else:
saved_line_number = None
try:
val = self.expression_list(prog.function)
except ReturnExecuted as e:
val = e.get_value()
self.override_line_number = saved_line_number
self.locals = saved_locals
if (self.break_reporter):
self.break_reporter(prog.node_name + _(' returned value'), val, prog.line_number)
return val
def do_node_arguments(self, prog):
for dex, arg in enumerate(prog.expression_list):
self.locals[arg.left] = self.locals.get('*arg_'+ str(dex), self.expr(arg.right))
if (self.break_reporter):
self.break_reporter(prog.node_name, '', prog.line_number)
return ''
def do_node_globals(self, prog):
res = ''
for arg in prog.expression_list:
res = self.locals[arg.left] = self.global_vars.get(arg.left, self.expr(arg.right))
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
def do_node_set_globals(self, prog):
res = ''
for arg in prog.expression_list:
res = self.global_vars[arg.left] = self.locals.get(arg.left, self.expr(arg.right))
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
def do_node_constant(self, prog):
if (self.break_reporter):
self.break_reporter(prog.node_name, prog.value, prog.line_number)
return prog.value
def do_node_field(self, prog):
try:
name = self.expr(prog.expression)
try:
res = self.parent.get_value(name, [], self.parent_kwargs)
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
except:
self.error(_("Unknown field '{0}'").format(name), prog.line_number)
except (StopException, ValueError) as e:
raise e
except:
self.error(_("Unknown field '{0}'").format('internal parse error'),
prog.line_number)
def do_node_raw_field(self, prog):
try:
name = self.expr(prog.expression)
name = field_metadata.search_term_to_field_key(name)
res = getattr(self.parent_book, name, None)
if res is None and prog.default is not None:
res = self.expr(prog.default)
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
if res is not None:
if isinstance(res, list):
fm = self.parent_book.metadata_for_field(name)
if fm is None:
res = ', '.join(res)
else:
res = fm['is_multiple']['list_to_ui'].join(res)
else:
res = str(res)
else:
res = str(res) # Should be the string "None"
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
except (StopException, ValueError) as e:
raise e
except:
self.error(_("Unknown field '{0}'").format('internal parse error'),
prog.line_number)
def do_node_assign(self, prog):
t = self.expr(prog.right)
self.locals[prog.left] = t
if (self.break_reporter):
self.break_reporter(prog.node_name, t, prog.line_number)
return t
def do_node_first_non_empty(self, prog):
for expr in prog.expression_list:
v = self.expr(expr)
if v:
if self.break_reporter:
self.break_reporter(prog.node_name, v, prog.line_number)
return v
if (self.break_reporter):
self.break_reporter(prog.node_name, '', prog.line_number)
return ''
def do_node_strcat(self, prog):
res = ''.join([self.expr(expr) for expr in prog.expression_list])
if self.break_reporter:
self.break_reporter(prog.node_name, res, prog.line_number)
return res
def do_node_for(self, prog):
line_number = prog.line_number
try:
separator = ',' if prog.separator is None else self.expr(prog.separator)
v = prog.variable
f = self.expr(prog.list_field_expr)
res = getattr(self.parent_book, f, f)
if res is not None:
if not isinstance(res, list):
res = [r.strip() for r in res.split(separator) if r.strip()]
ret = ''
if self.break_reporter:
self.break_reporter("'for' list value", separator.join(res), line_number)
try:
for x in res:
try:
self.locals[v] = x
ret = self.expression_list(prog.block)
except ContinueExecuted as e:
ret = e.get_value()
except BreakExecuted as e:
ret = e.get_value()
if (self.break_reporter):
self.break_reporter("'for' block value", ret, line_number)
elif self.break_reporter:
# Shouldn't get here
self.break_reporter("'for' list value", '', line_number)
ret = ''
return ret
except (StopException, ValueError) as e:
raise e
except Exception as e:
self.error(_("Unhandled exception '{0}'").format(e), line_number)
def do_node_break(self, prog):
if (self.break_reporter):
self.break_reporter(prog.node_name, '', prog.line_number)
raise BreakExecuted()
def do_node_continue(self, prog):
if (self.break_reporter):
self.break_reporter(prog.node_name, '', prog.line_number)
raise ContinueExecuted()
def do_node_return(self, prog):
v = self.expr(prog.expr)
if (self.break_reporter):
self.break_reporter(prog.node_name, v, prog.line_number)
e = ReturnExecuted()
e.set_value(v)
raise e
def do_node_contains(self, prog):
v = self.expr(prog.value_expression)
t = self.expr(prog.test_expression)
if re.search(t, v, flags=re.I):
res = self.expr(prog.match_expression)
else:
res = self.expr(prog.not_match_expression)
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
LOGICAL_BINARY_OPS = {
'and': lambda self, x, y: self.expr(x) and self.expr(y),
'or': lambda self, x, y: self.expr(x) or self.expr(y),
}
def do_node_logop(self, prog):
try:
res = ('1' if self.LOGICAL_BINARY_OPS[prog.operator](self, prog.left, prog.right) else '')
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
except (StopException, ValueError) as e:
raise e
except:
self.error(_("Error during operator evaluation: "
"operator '{0}'").format(prog.operator), prog.line_number)
LOGICAL_UNARY_OPS = {
'not': lambda x: not x,
}
def do_node_logop_unary(self, prog):
try:
expr = self.expr(prog.expr)
res = ('1' if self.LOGICAL_UNARY_OPS[prog.operator](expr) else '')
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
except (StopException, ValueError) as e:
raise e
except:
self.error(_("Error during operator evaluation: "
"operator '{0}'").format(prog.operator), prog.line_number)
ARITHMETIC_BINARY_OPS = {
'+': lambda x, y: x + y,
'-': lambda x, y: x - y,
'*': lambda x, y: x * y,
'/': lambda x, y: x / y,
}
def do_node_binary_arithop(self, prog):
try:
answer = self.ARITHMETIC_BINARY_OPS[prog.operator](
self.float_deal_with_none(self.expr(prog.left)),
self.float_deal_with_none(self.expr(prog.right)))
res = str(answer if modf(answer)[0] != 0 else int(answer))
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
except (StopException, ValueError) as e:
raise e
except:
self.error(_("Error during operator evaluation: "
"operator '{0}'").format(prog.operator), prog.line_number)
ARITHMETIC_UNARY_OPS = {
'+': lambda x: x,
'-': lambda x: -x,
}
def do_node_unary_arithop(self, prog):
try:
expr = self.ARITHMETIC_UNARY_OPS[prog.operator](float(self.expr(prog.expr)))
res = str(expr if modf(expr)[0] != 0 else int(expr))
if (self.break_reporter):
self.break_reporter(prog.node_name, res, prog.line_number)
return res
except (StopException, ValueError) as e:
raise e
except:
self.error(_("Error during operator evaluation: "
"operator '{0}'").format(prog.operator), prog.line_number)
characters = {
'return': '\r',
'newline': '\n',
'tab': '\t',
'backslash': '\\',
}
def do_node_character(self, prog):
try:
key = self.expr(prog.expression)
ret = self.characters.get(key, None)
if ret is None:
self.error(_("Function {0}: invalid character name '{1}")
.format('character', key), prog.line_number)
if (self.break_reporter):
self.break_reporter(prog.node_name, ret, prog.line_number)
except (StopException, ValueError) as e:
raise e
return ret
def do_node_print(self, prog):
res = []
for arg in prog.arguments:
res.append(self.expr(arg))
print(res)
return res[0] if res else ''
NODE_OPS = {
Node.NODE_IF: do_node_if,
Node.NODE_ASSIGN: do_node_assign,
Node.NODE_CONSTANT: do_node_constant,
Node.NODE_RVALUE: do_node_rvalue,
Node.NODE_FUNC: do_node_func,
Node.NODE_FIELD: do_node_field,
Node.NODE_RAW_FIELD: do_node_raw_field,
Node.NODE_COMPARE_STRING: do_node_string_infix,
Node.NODE_COMPARE_NUMERIC:do_node_numeric_infix,
Node.NODE_ARGUMENTS: do_node_arguments,
Node.NODE_CALL: do_node_call,
Node.NODE_FIRST_NON_EMPTY:do_node_first_non_empty,
Node.NODE_FOR: do_node_for,
Node.NODE_GLOBALS: do_node_globals,
Node.NODE_SET_GLOBALS: do_node_set_globals,
Node.NODE_CONTAINS: do_node_contains,
Node.NODE_BINARY_LOGOP: do_node_logop,
Node.NODE_UNARY_LOGOP: do_node_logop_unary,
Node.NODE_BINARY_ARITHOP: do_node_binary_arithop,
Node.NODE_UNARY_ARITHOP: do_node_unary_arithop,
Node.NODE_PRINT: do_node_print,
Node.NODE_BREAK: do_node_break,
Node.NODE_CONTINUE: do_node_continue,
Node.NODE_RETURN: do_node_return,
Node.NODE_CHARACTER: do_node_character,
Node.NODE_STRCAT: do_node_strcat,
}
def expr(self, prog):
try:
if isinstance(prog, list):
return self.expression_list(prog)
return self.NODE_OPS[prog.node_type](self, prog)
except (ValueError, ExecutionBase, StopException) as e:
raise e
except Exception as e:
if (DEBUG):
traceback.print_exc()
self.error(_("Internal error evaluating an expression: '{0}'").format(str(e)),
prog.line_number)
class TemplateFormatter(string.Formatter):
'''
Provides a format function that substitutes '' for any missing value
'''
_validation_string = 'This Is Some Text THAT SHOULD be LONG Enough.%^&*'
# Dict to do recursion detection. It is up to the individual get_value
# method to use it. It is cleared when starting to format a template
composite_values = {}
def __init__(self):
string.Formatter.__init__(self)
self.book = None
self.kwargs = None
self.strip_results = True
self.column_name = None
self.template_cache = None
self.global_vars = {}
self.locals = {}
self.funcs = formatter_functions().get_functions()
self._interpreters = []
self._template_parser = None
self.recursion_stack = []
self.recursion_level = -1
def _do_format(self, val, fmt):
if not fmt or not val:
return val
if val == self._validation_string:
val = '0'
typ = fmt[-1]
if typ == 's':
pass
elif 'bcdoxXn'.find(typ) >= 0:
try:
val = int(val)
except Exception:
raise ValueError(
_('format: type {0} requires an integer value, got {1}').format(typ, val))
elif 'eEfFgGn%'.find(typ) >= 0:
try:
val = float(val)
except:
raise ValueError(
_('format: type {0} requires a decimal (float) value, got {1}').format(typ, val))
return str(('{0:'+fmt+'}').format(val))
def _explode_format_string(self, fmt):
try:
matches = self.format_string_re.match(fmt)
if matches is None or matches.lastindex != 3:
return fmt, '', ''
return matches.groups()
except:
if DEBUG:
traceback.print_exc()
return fmt, '', ''
format_string_re = re.compile(r'^(.*)\|([^\|]*)\|(.*)$', re.DOTALL)
compress_spaces = re.compile(r'\s+')
backslash_comma_to_comma = re.compile(r'\\,')
arg_parser = re.Scanner([
(r',', lambda x,t: ''),
(r'.*?((?<!\\),)', lambda x,t: t[:-1]),
(r'.*?\)', lambda x,t: t[:-1]),
])
# ################# Template language lexical analyzer ######################
lex_scanner = re.Scanner([
(r'(==#|!=#|<=#|<#|>=#|>#)', lambda x,t: (_Parser.LEX_NUMERIC_INFIX, t)), # noqa
(r'(==|!=|<=|<|>=|>)', lambda x,t: (_Parser.LEX_STRING_INFIX, t)), # noqa
(r'(if|then|else|elif|fi)\b',lambda x,t: (_Parser.LEX_KEYWORD, t)), # noqa
(r'(for|in|rof|separator)\b',lambda x,t: (_Parser.LEX_KEYWORD, t)), # noqa
(r'(break|continue)\b', lambda x,t: (_Parser.LEX_KEYWORD, t)), # noqa
(r'(return|inlist)\b', lambda x,t: (_Parser.LEX_KEYWORD, t)), # noqa
(r'(\|\||&&|!)', lambda x,t: (_Parser.LEX_OP, t)), # noqa
(r'[(),=;:\+\-*/]', lambda x,t: (_Parser.LEX_OP, t)), # noqa
(r'-?[\d\.]+', lambda x,t: (_Parser.LEX_CONST, t)), # noqa
(r'\$\$?#?\w+', lambda x,t: (_Parser.LEX_ID, t)), # noqa
(r'\$', lambda x,t: (_Parser.LEX_ID, t)), # noqa
(r'\w+', lambda x,t: (_Parser.LEX_ID, t)), # noqa
(r'".*?((?<!\\)")', lambda x,t: (_Parser.LEX_CONST, t[1:-1])), # noqa
(r'\'.*?((?<!\\)\')', lambda x,t: (_Parser.LEX_CONST, t[1:-1])), # noqa
(r'\n#.*?(?:(?=\n)|$)', lambda x,t: _Parser.LEX_NEWLINE), # noqa
(r'\s', lambda x,t: _Parser.LEX_NEWLINE if t == '\n' else None), # noqa
], flags=re.DOTALL)
def _eval_program(self, val, prog, column_name, global_vars, break_reporter):
if column_name is not None and self.template_cache is not None:
tree = self.template_cache.get(column_name, None)
if not tree:
tree = self.gpm_parser.program(self, self.funcs, self.lex_scanner.scan(prog))
self.template_cache[column_name] = tree
else:
tree = self.gpm_parser.program(self, self.funcs, self.lex_scanner.scan(prog))
return self.gpm_interpreter.program(self.funcs, self, tree, val,
global_vars=global_vars, break_reporter=break_reporter)
def _eval_sfm_call(self, template_name, args, global_vars):
func = self.funcs[template_name]
tree = func.cached_parse_tree
if tree is None:
tree = self.gpm_parser.program(self, self.funcs,
self.lex_scanner.scan(func.program_text[len('program:'):]))
func.cached_parse_tree = tree
return self.gpm_interpreter.program(self.funcs, self, tree, None,
is_call=True, args=args,
global_vars=global_vars)
# ################# Override parent classes methods #####################
def get_value(self, key, args, kwargs):
raise Exception('get_value must be implemented in the subclass')
def format_field(self, val, fmt):
# ensure we are dealing with a string.
if isinstance(val, numbers.Number):
if val:
val = str(val)
else:
val = ''
# Handle conditional text
fmt, prefix, suffix = self._explode_format_string(fmt)
# Handle functions
# First see if we have a functional-style expression
if fmt.startswith('\''):
p = 0
else:
p = fmt.find(':\'')
if p >= 0:
p += 1
if p >= 0 and fmt[-1] == '\'':
val = self._eval_program(val, fmt[p+1:-1], None, self.global_vars, None)
colon = fmt[0:p].find(':')
if colon < 0:
dispfmt = ''
else:
dispfmt = fmt[0:colon]
else:
# check for old-style function references
p = fmt.find('(')
dispfmt = fmt
if p >= 0 and fmt[-1] == ')':
colon = fmt[0:p].find(':')
if colon < 0:
dispfmt = ''
colon = 0
else:
dispfmt = fmt[0:colon]
colon += 1
fname = fmt[colon:p].strip()
if fname in self.funcs:
func = self.funcs[fname]
if func.arg_count == 2:
# only one arg expected. Don't bother to scan. Avoids need
# for escaping characters
args = [fmt[p+1:-1]]
else:
args = self.arg_parser.scan(fmt[p+1:])[0]
args = [self.backslash_comma_to_comma.sub(',', a) for a in args]
if not func.is_python:
args.insert(0, val)
val = self._eval_sfm_call(fname, args, self.global_vars)
else:
if (func.arg_count == 1 and (len(args) != 1 or args[0])) or \
(func.arg_count > 1 and func.arg_count != len(args)+1):
raise ValueError(
_('Incorrect number of arguments for function {0}').format(fname))
if func.arg_count == 1:
val = func.eval_(self, self.kwargs, self.book, self.locals, val)
if self.strip_results:
val = val.strip()
else:
val = func.eval_(self, self.kwargs, self.book, self.locals, val, *args)
if self.strip_results:
val = val.strip()
else:
return _('%s: unknown function')%fname
if val:
val = self._do_format(val, dispfmt)
if not val:
return ''
return prefix + val + suffix
def evaluate(self, fmt, args, kwargs, global_vars, break_reporter=None):
if fmt.startswith('program:'):
ans = self._eval_program(kwargs.get('$', None), fmt[8:],
self.column_name, global_vars, break_reporter)
else:
ans = self.vformat(fmt, args, kwargs)
if self.strip_results:
ans = self.compress_spaces.sub(' ', ans)
if self.strip_results:
ans = ans.strip(' ')
return ans
# It is possible for a template to indirectly invoke other templates by
# doing field references of composite columns. If this happens then the
# reference can use different parameters when calling safe_format(). Because
# the parameters are saved as instance variables they can possibly affect
# the 'calling' template. To avoid this problem, save the current formatter
# state when recursion is detected. There is no point in saving the level
# 0 state.
def save_state(self):
self.recursion_level += 1
if self.recursion_level > 0:
return (
(self.strip_results,
self.column_name,
self.template_cache,
self.kwargs,
self.book,
self.global_vars,
self.funcs,
self.locals))
else:
return None
def restore_state(self, state):
self.recursion_level -= 1
if state is not None:
(self.strip_results,
self.column_name,
self.template_cache,
self.kwargs,
self.book,
self.global_vars,
self.funcs,
self.locals) = state
# Allocate an interpreter if the formatter encounters a GPM or TPM template.
# We need to allocate additional interpreters if there is composite recursion
# so that the templates are evaluated by separate instances. It is OK to
# reuse already-allocated interpreters because their state is initialized on
# call. As a side effect, no interpreter is instantiated if no TPM/GPM
# template is encountered.
@property
def gpm_interpreter(self):
while len(self._interpreters) <= self.recursion_level:
self._interpreters.append(_Interpreter())
return self._interpreters[self.recursion_level]
# Allocate a parser if needed. Parsers cannot recurse so one is sufficient.
@property
def gpm_parser(self):
if self._template_parser is None:
self._template_parser = _Parser()
return self._template_parser
# ######### a formatter that throws exceptions ############
def unsafe_format(self, fmt, kwargs, book, strip_results=True, global_vars=None):
state = self.save_state()
try:
self.strip_results = strip_results
self.column_name = self.template_cache = None
self.kwargs = kwargs
self.book = book
self.composite_values = {}
self.locals = {}
self.global_vars = global_vars if isinstance(global_vars, dict) else {}
return self.evaluate(fmt, [], kwargs, self.global_vars)
finally:
self.restore_state(state)
# ######### a formatter guaranteed not to throw an exception ############
def safe_format(self, fmt, kwargs, error_value, book,
column_name=None, template_cache=None,
strip_results=True, template_functions=None,
global_vars=None, break_reporter=None):
state = self.save_state()
if self.recursion_level == 0:
# Initialize the composite values dict if this is the base-level
# call. Recursive calls will use the same dict.
self.composite_values = {}
try:
self.strip_results = strip_results
self.column_name = column_name
self.template_cache = template_cache
self.kwargs = kwargs
self.book = book
self.global_vars = global_vars if isinstance(global_vars, dict) else {}
if template_functions:
self.funcs = template_functions
else:
self.funcs = formatter_functions().get_functions()
self.locals = {}
try:
ans = self.evaluate(fmt, [], kwargs, self.global_vars, break_reporter=break_reporter)
except StopException as e:
ans = error_message(e)
except Exception as e:
if DEBUG: # and getattr(e, 'is_locking_error', False):
traceback.print_exc()
if column_name:
prints('Error evaluating column named:', column_name)
ans = error_value + ' ' + error_message(e)
return ans
finally:
self.restore_state(state)
class ValidateFormatter(TemplateFormatter):
'''
Provides a formatter that substitutes the validation string for every value
'''
def get_value(self, key, args, kwargs):
return self._validation_string
def validate(self, x):
from calibre.ebooks.metadata.book.base import Metadata
return self.safe_format(x, {}, 'VALIDATE ERROR', Metadata(''))
validation_formatter = ValidateFormatter()
class EvalFormatter(TemplateFormatter):
'''
A template formatter that uses a simple dict instead of an mi instance
'''
def get_value(self, key, args, kwargs):
if key == '':
return ''
key = key.lower()
return kwargs.get(key, _('No such variable {0}').format(key))
# DEPRECATED. This is not thread safe. Do not use.
eval_formatter = EvalFormatter()