%PDF- %PDF-
Direktori : /lib/calibre/calibre/ebooks/rtf2xml/ |
Current File : //lib/calibre/calibre/ebooks/rtf2xml/list_table.py |
######################################################################### # # # # # copyright 2002 Paul Henry Tremblay # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # # General Public License for more details. # # # # # ######################################################################### class ListTable: """ Parse the list table line. Make a string. Form a dictionary. Return the string and the dictionary. """ def __init__( self, bug_handler, run_level=1, ): self.__bug_handler = bug_handler self.__initiate_values() self.__run_level = run_level def __initiate_values(self): self.__list_table_final = '' self.__state = 'default' self.__final_dict = {} self.__list_dict = {} self.__all_lists = [] self.__level_text_string = '' self.__level_text_list = [] self.__found_level_text_length = 0 self.__level_text_position = None self.__prefix_string = None self.__level_numbers_string = '' self.__state_dict = { 'default' : self.__default_func, 'level' : self.__level_func, 'list' : self.__list_func, 'unsure_ob' : self.__after_bracket_func, 'level_number' : self.__level_number_func, 'level_text' : self.__level_text_func, 'list_name' : self.__list_name_func, } self.__main_list_dict = { 'cw<ls<ls-tem-id_' : 'list-template-id', 'cw<ls<list-hybri' : 'list-hybrid', 'cw<ls<lis-tbl-id' : 'list-table-id', } self.__level_dict = { 'cw<ls<level-star' : 'list-number-start', 'cw<ls<level-spac' : 'list-space', 'cw<ls<level-inde' : 'level-indent', 'cw<ls<fir-ln-ind' : 'first-line-indent', 'cw<ls<left-inden' : 'left-indent', 'cw<ls<tab-stop__' : 'tabs', 'cw<ls<level-type' : 'numbering-type', 'cw<pf<right-inde' : 'right-indent', 'cw<pf<left-inden' : 'left-indent', 'cw<pf<fir-ln-ind' : 'first-line-indent', 'cw<ci<italics___' : 'italics', 'cw<ci<bold______' : 'bold', 'cw<ss<para-style' : 'paragraph-style-name', } """ all_lists = [{anything here?} [{list-templateid = ""} [{level-indent}],[{level-indent}] ] ], """ def __parse_lines(self, line): """ Required : line --line to parse Returns: nothing Logic: Split the lines into a list by a new line. Process the line according to the state. """ lines = line.split('\n') self.__ob_count = 0 self.__ob_group = 0 for line in lines: self.__token_info = line[:16] if self.__token_info == 'ob<nu<open-brack': self.__ob_count = line[-4:] self.__ob_group += 1 if self.__token_info == 'cb<nu<clos-brack': self.__cb_count = line[-4:] self.__ob_group -= 1 action = self.__state_dict.get(self.__state) if action is None: print(self.__state) action(line) self.__write_final_string() # self.__add_to_final_line() def __default_func(self, line): """ Requires: line --line to process Return: nothing Logic: This state is used at the start and end of a list. Look for an opening bracket, which marks the change of state. """ if self.__token_info == 'ob<nu<open-brack': self.__state = 'unsure_ob' def __found_list_func(self, line): """ Requires: line -- line to process Returns: nothing Logic: I have found \\list. Change the state to list Get the open bracket count so you know when this state ends. Append an empty list to all lists. Create a temporary dictionary. This dictionary has the key of "list-id" and the value of an empty list. Later, this empty list will be filled with all the ids for which the formatting is valid. Append the temporary dictionary to the new list. """ self.__state = 'list' self.__list_ob_count = self.__ob_count self.__all_lists.append([]) the_dict = {'list-id': []} self.__all_lists[-1].append(the_dict) def __list_func(self, line): """ Requires: line --line to process Returns: nothing Logic: This method is called when you are in a list, but outside of a level. Check for the end of the list. Otherwise, use the self.__mainlist_dict to determine if you need to add a lines values to the main list. """ if self.__token_info == 'cb<nu<clos-brack' and\ self.__cb_count == self.__list_ob_count: self.__state = 'default' elif self.__token_info == 'ob<nu<open-brack': self.__state = 'unsure_ob' else: att = self.__main_list_dict.get(self.__token_info) if att: value = line[20:] # dictionary is always the first item in the last list # [{att:value}, [], [att:value, []] self.__all_lists[-1][0][att] = value def __found_level_func(self, line): """ Requires: line -- line to process Returns: nothing Logic: I have found \\listlevel. Change the state to level Get the open bracket count so you know when this state ends. Append an empty list to the last list inside all lists. Create a temporary dictionary. Append the temporary dictionary to the new list. self.__all_lists now looks like: [[{list-id:[]}, [{}]]] Where: self.__all_lists[-1] => a list. The first item is a dictionary. The second item is a list containing a dictionary: [{list-id:[]}, [{}]] self.__all_lists[-1][0] => a dictionary of the list attributes self.__all_lists[-1][-1] => a list with just a dictionary self.__all_lists[-1][-1][0] => the dictionary of level attributes """ self.__state = 'level' self.__level_ob_count = self.__ob_count self.__all_lists[-1].append([]) the_dict = {} self.__all_lists[-1][-1].append(the_dict) self.__level_dict def __level_func(self, line): """ Requires: line -- line to parse Returns: nothing Logic: Look for the end of the this group. Change states if an open bracket is found. Add attributes to all_dicts if an appropriate token is found. """ if self.__token_info == 'cb<nu<clos-brack' and\ self.__cb_count == self.__level_ob_count: self.__state = 'list' elif self.__token_info == 'ob<nu<open-brack': self.__state = 'unsure_ob' else: att = self.__level_dict.get(self.__token_info) if att: value = line[20:] self.__all_lists[-1][-1][0][att] = value def __level_number_func(self, line): """ Requires: line -- line to process Returns: nothing Logic: Check for the end of the group. Otherwise, if the token is hexadecimal, create an attribute. Do so by finding the base-10 value of the number. Then divide this by 2 and round it. Remove the ".0". Sandwwhich the result to give you something like level1-show-level. The show-level attribute means the numbering for this level. """ if self.__token_info == 'cb<nu<clos-brack' and\ self.__cb_count == self.__level_number_ob_count: self.__state = 'level' self.__all_lists[-1][-1][0]['level-numbers'] = self.__level_numbers_string self.__level_numbers_string = '' elif self.__token_info == 'tx<hx<__________': self.__level_numbers_string += '\\'%s' % line[18:] elif self.__token_info == 'tx<nu<__________': self.__level_numbers_string += line[17:] """ num = line[18:] num = int(num, 16) level = str(round((num - 1)/2, 0)) level = level[:-2] level = 'level%s-show-level' % level self.__all_lists[-1][-1][0][level] = 'true' """ def __level_text_func(self, line): """ Requires: line --line to process Returns: nothing Logic: Check for the end of the group. Otherwise, if the text is hexadecimal, call on the method __parse_level_text_length. Otherwise, if the text is regular text, create an attribute. This attribute indicates the puncuation after a certain level. An example is "level1-marker = '.'" Otherwise, check for a level-template-id. """ if self.__token_info == 'cb<nu<clos-brack' and\ self.__cb_count == self.__level_text_ob_count: if self.__prefix_string: if self.__all_lists[-1][-1][0]['numbering-type'] == 'bullet': self.__prefix_string = self.__prefix_string.replace('_', '') self.__all_lists[-1][-1][0]['bullet-type'] = self.__prefix_string self.__state = 'level' # self.__figure_level_text_func() self.__level_text_string = '' self.__found_level_text_length = 0 elif self.__token_info == 'tx<hx<__________': self.__parse_level_text_length(line) elif self.__token_info == 'tx<nu<__________': text = line[17:] if text and text[-1] == ';': text = text.replace(';', '') if not self.__level_text_position: self.__prefix_string = text else: self.__all_lists[-1][-1][0][self.__level_text_position] = text elif self.__token_info == 'cw<ls<lv-tem-id_': value = line[20:] self.__all_lists[-1][-1][0]['level-template-id'] = value def __parse_level_text_length(self, line): """ Requires: line --line with hexadecimal number Returns: nothing Logic: Method is used for to parse text in the \\leveltext group. """ num = line[18:] the_num = int(num, 16) if not self.__found_level_text_length: self.__all_lists[-1][-1][0]['list-text-length'] = str(the_num) self.__found_level_text_length = 1 else: the_num += 1 the_string = str(the_num) level_marker = 'level%s-suffix' % the_string show_marker = 'show-level%s' % the_string self.__level_text_position = level_marker self.__all_lists[-1][-1][0][show_marker] = 'true' if self.__prefix_string: prefix_marker = 'level%s-prefix' % the_string self.__all_lists[-1][-1][0][prefix_marker] = self.__prefix_string self.__prefix_string = None def __list_name_func(self, line): """ Requires: line --line to process Returns: nothing Logic: Simply check for the end of the group and change states. """ if self.__token_info == 'cb<nu<clos-brack' and\ self.__cb_count == self.__list_name_ob_count: self.__state = 'list' def __after_bracket_func(self, line): """ Requires: line --line to parse Returns: nothing. Logic: The last token found was "{". This method determines what group you are now in. WARNING: this could cause problems. If no group is found, the state will remain unsure_ob, which means no other text will be parsed. """ if self.__token_info == 'cw<ls<level-text': self.__state = 'level_text' self.__level_text_ob_count = self.__ob_count elif self.__token_info == 'cw<ls<level-numb': self.__level_number_ob_count = self.__ob_count self.__state = 'level_number' elif self.__token_info == 'cw<ls<list-tb-le': self.__found_level_func(line) elif self.__token_info == 'cw<ls<list-in-tb': self.__found_list_func(line) elif self.__token_info == 'cw<ls<list-name_': self.__state = 'list_name' self.__list_name_ob_count = self.__ob_count else: if self.__run_level > 3: msg = 'No matching token after open bracket\n' msg += 'token is "%s\n"' % (line) raise self.__bug_handler def __add_to_final_line(self): """ Method no longer used. """ self.__list_table_final = 'mi<mk<listabbeg_\n' self.__list_table_final += 'mi<tg<open______<list-table\n' + \ 'mi<mk<listab-beg\n' + self.__list_table_final self.__list_table_final += \ 'mi<mk<listab-end\n' + 'mi<tg<close_____<list-table\n' self.__list_table_final += 'mi<mk<listabend_\n' def __write_final_string(self): """ Requires: nothing Returns: nothing Logic: Write out the list-table start tag. Iterate through self.__all_lists. For each list, write out a list-in-table tag. Get the dictionary of this list (the first item). Print out the key => value pair. Remove the first item (the dictionary) form this list. Now iterate through what is left in the list. Each list will contain one item, a dictionary. Get this dictionary and print out key => value pair. """ not_allow = ['list-id',] id = 0 self.__list_table_final = 'mi<mk<listabbeg_\n' self.__list_table_final += 'mi<tg<open______<list-table\n' + \ 'mi<mk<listab-beg\n' + self.__list_table_final for list in self.__all_lists: id += 1 self.__list_table_final += 'mi<tg<open-att__<list-in-table' # self.__list_table_final += '<list-id>%s' % (str(id)) the_dict = list[0] the_keys = the_dict.keys() for the_key in the_keys: if the_key in not_allow: continue att = the_key value = the_dict[att] self.__list_table_final += f'<{att}>{value}' self.__list_table_final += '\n' levels = list[1:] level_num = 0 for level in levels: level_num += 1 self.__list_table_final += 'mi<tg<empty-att_<level-in-table' self.__list_table_final += '<level>%s' % (str(level_num)) the_dict2 = level[0] the_keys2 = the_dict2.keys() is_bullet = 0 bullet_text = '' for the_key2 in the_keys2: if the_key2 in not_allow: continue test_bullet = the_dict2.get('numbering-type') if test_bullet == 'bullet': is_bullet = 1 att2 = the_key2 value2 = the_dict2[att2] # sys.stderr.write('%s\n' % att2[0:10]) if att2[0:10] == 'show-level' and is_bullet: # sys.stderr.write('No print %s\n' % att2) pass elif att2[-6:] == 'suffix' and is_bullet: # sys.stderr.write('%s\n' % att2) bullet_text += value2 elif att2[-6:] == 'prefix' and is_bullet: # sys.stderr.write('%s\n' % att2) bullet_text += value2 else: self.__list_table_final += f'<{att2}>{value2}' if is_bullet: pass # self.__list_table_final += '<bullet-type>%s' % (bullet_text) self.__list_table_final += '\n' self.__list_table_final += 'mi<tg<close_____<list-in-table\n' self.__list_table_final += \ 'mi<mk<listab-end\n' + 'mi<tg<close_____<list-table\n' self.__list_table_final += 'mi<mk<listabend_\n' def parse_list_table(self, line): """ Requires: line -- line with border definition in it Returns: A string and the dictionary of list-table values and attributes. Logic: Call on the __parse_lines method, which splits the text string into lines (which will be tokens) and processes them. """ self.__parse_lines(line) return self.__list_table_final, self.__all_lists