%PDF- %PDF-
Direktori : /lib/calibre/calibre/ebooks/rtf2xml/ |
Current File : //lib/calibre/calibre/ebooks/rtf2xml/old_rtf.py |
######################################################################### # # # # # copyright 2002 Paul Henry Tremblay # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # # General Public License for more details. # # # # # ######################################################################### import sys from . import open_for_read class OldRtf: """ Check to see if the RTF is an older version Logic: If allowable control word/properties happen in text without being enclosed in brackets the file will be considered old rtf """ def __init__(self, in_file, bug_handler, run_level, ): """ Required: 'file'--file to parse 'table_data' -- a dictionary for each table. Optional: 'copy'-- whether to make a copy of result for debugging 'temp_dir' --where to output temporary results (default is directory from which the script is run.) Returns: nothing """ self.__file = in_file self.__bug_handler = bug_handler self.__run_level = run_level self.__allowable = [ 'annotation' , 'blue______' , 'bold______', 'caps______', 'char-style' , 'dbl-strike' , 'emboss____', 'engrave___' , 'font-color', 'font-down_' , 'font-size_', 'font-style', 'font-up___', 'footnot-mk' , 'green_____' , 'hidden____', 'italics___', 'outline___', 'red_______', 'shadow____' , 'small-caps', 'strike-thr', 'subscript_', 'superscrip' , 'underlined' , ] self.__action_dict = { 'before_body' : self.__before_body_func, 'in_body' : self.__check_tokens_func, 'after_pard' : self.__after_pard_func, } def __initiate_values(self): self.__previous_token = '' self.__state = 'before_body' self.__found_new = 0 self.__ob_group = 0 def __check_tokens_func(self, line): if self.__inline_info in self.__allowable: if self.__ob_group == self.__base_ob_count: return 'old_rtf' else: self.__found_new += 1 elif self.__token_info == 'cw<pf<par-def___': self.__state = 'after_pard' def __before_body_func(self, line): if self.__token_info == 'mi<mk<body-open_': self.__state = 'in_body' self.__base_ob_count = self.__ob_group def __after_pard_func(self, line): if line[0:2] != 'cw': self.__state = 'in_body' def check_if_old_rtf(self): """ Requires: nothing Returns: True if file is older RTf False if file is newer RTF """ self.__initiate_values() line_num = 0 with open_for_read(self.__file) as read_obj: for line in read_obj: line_num += 1 self.__token_info = line[:16] if self.__token_info == 'mi<mk<body-close': return False if self.__token_info == 'ob<nu<open-brack': self.__ob_group += 1 self.__ob_count = line[-5:-1] if self.__token_info == 'cb<nu<clos-brack': self.__ob_group -= 1 self.__cb_count = line[-5:-1] self.__inline_info = line[6:16] if self.__state == 'after_body': return False action = self.__action_dict.get(self.__state) if action is None: try: sys.stderr.write('No action for this state!\n') except: pass result = action(line) if result == 'new_rtf': return False elif result == 'old_rtf': if self.__run_level > 3: sys.stderr.write( 'Old rtf construction {} (bracket {}, line {})\n'.format( self.__inline_info, str(self.__ob_group), line_num) ) return True self.__previous_token = line[6:16] return False