%PDF- %PDF-
| Direktori : /lib/calibre/calibre/ebooks/rtf2xml/ |
| Current File : //lib/calibre/calibre/ebooks/rtf2xml/preamble_div.py |
#########################################################################
# #
# #
# copyright 2002 Paul Henry Tremblay #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
# General Public License for more details. #
# #
# #
#########################################################################
import sys, os
from calibre.ebooks.rtf2xml import copy, override_table, list_table
from calibre.ptempfile import better_mktemp
from . import open_for_read, open_for_write
class PreambleDiv:
"""
Break the preamble into divisions.
"""
def __init__(self, in_file,
bug_handler,
copy=None,
no_namespace=None,
run_level=1,
):
"""
Required:
'file'
Optional:
'copy'-- whether to make a copy of result for debugging
'temp_dir' --where to output temporary results (default is
directory from which the script is run.)
Returns:
nothing
"""
self.__file = in_file
self.__bug_handler = bug_handler
self.__copy = copy
self.__no_namespace = no_namespace
self.__write_to = better_mktemp()
self.__run_level = run_level
def __initiate_values(self):
"""
Set values, including those for the dictionary.
"""
self.__all_lists = {}
self.__page = {
'margin-top' : 72,
'margin-bottom' : 72,
'margin-left' : 90,
'margin-right' : 90,
'gutter' : 0,
}
self.__cb_count = ''
self.__ob_count = ''
self.__state = 'preamble'
self.__rtf_final = ''
self.__close_group_count = ''
self.__found_font_table = 0
self.__list_table_final = ''
self.__override_table_final = ''
self.__revision_table_final = ''
self.__doc_info_table_final = ''
self.__state_dict = {
'default' : self.__default_func,
'rtf_header' : self.__rtf_head_func,
'preamble' : self.__preamble_func,
'font_table' : self.__font_table_func,
'color_table' : self.__color_table_func,
'style_sheet' : self.__style_sheet_func,
'list_table' : self.__list_table_func,
'override_table' : self.__override_table_func,
'revision_table' : self.__revision_table_func,
'doc_info' : self.__doc_info_func,
'body' : self.__body_func,
'ignore' : self.__ignore_func,
'cw<ri<rtf_______' : self.__found_rtf_head_func,
'cw<pf<par-def___' : self.__para_def_func,
'tx<nu<__________' : self.__text_func,
'cw<tb<row-def___' : self.__row_def_func,
'cw<sc<section___' : self.__new_section_func,
'cw<sc<sect-defin' : self.__new_section_func,
'cw<it<font-table' : self.__found_font_table_func,
'cw<it<colr-table' : self.__found_color_table_func,
'cw<ss<style-shet' : self.__found_style_sheet_func,
'cw<it<listtable_' : self.__found_list_table_func,
'cw<it<lovr-table' : self.__found_override_table_func,
'cw<it<revi-table' : self.__found_revision_table_func,
'cw<di<doc-info__' : self.__found_doc_info_func,
'cw<pa<margin-lef' : self.__margin_func,
'cw<pa<margin-rig' : self.__margin_func,
'cw<pa<margin-top' : self.__margin_func,
'cw<pa<margin-bot' : self.__margin_func,
'cw<pa<gutter____' : self.__margin_func,
'cw<pa<paper-widt' : self.__margin_func,
'cw<pa<paper-hght' : self.__margin_func,
# 'cw<tb<columns___' : self.__section_func,
}
self.__margin_dict = {
'margin-lef' : 'margin-left',
'margin-rig' : 'margin-right',
'margin-top' : 'margin-top',
'margin-bot' : 'margin-bottom',
'gutter____' : 'gutter',
'paper-widt' : 'paper-width',
'paper-hght' : 'paper-height',
}
self.__translate_sec = {
'columns___' : 'column',
}
self.__section = {}
# self.__write_obj.write(self.__color_table_final)
self.__color_table_final = ''
self.__style_sheet_final = ''
self.__individual_font = 0
self.__old_font = 0
self.__ob_group = 0 # depth of group
self.__font_table_final = 0
self.__list_table_obj = list_table.ListTable(
run_level=self.__run_level,
bug_handler=self.__bug_handler,
)
def __ignore_func(self, line):
"""
Ignore all lines, until the bracket is found that marks the end of
the group.
"""
if self.__ignore_num == self.__cb_count:
self.__state = self.__previous_state
def __found_rtf_head_func(self, line):
self.__state = 'rtf_header'
def __rtf_head_func(self, line):
if self.__ob_count == '0002':
self.__rtf_final = (
'mi<mk<rtfhed-beg\n' +
self.__rtf_final +
'mi<mk<rtfhed-end\n'
)
self.__state = 'preamble'
elif self.__token_info == 'tx<nu<__________' or \
self.__token_info == 'cw<pf<par-def___':
self.__state = 'body'
self.__rtf_final = (
'mi<mk<rtfhed-beg\n' +
self.__rtf_final +
'mi<mk<rtfhed-end\n'
)
self.__make_default_font_table()
self.__write_preamble()
self.__write_obj.write(line)
else:
self.__rtf_final = self.__rtf_final + line
def __make_default_font_table(self):
"""
If not font table is found, need to write one out.
"""
self.__font_table_final = 'mi<tg<open______<font-table\n'
self.__font_table_final += 'mi<mk<fonttb-beg\n'
self.__font_table_final += 'mi<mk<fontit-beg\n'
self.__font_table_final += 'cw<ci<font-style<nu<0\n'
self.__font_table_final += 'tx<nu<__________<Times;\n'
self.__font_table_final += 'mi<mk<fontit-end\n'
self.__font_table_final += 'mi<mk<fonttb-end\n'
self.__font_table_final += 'mi<tg<close_____<font-table\n'
def __make_default_color_table(self):
"""
If no color table is found, write a string for a default one
"""
self.__color_table_final = 'mi<tg<open______<color-table\n'
self.__color_table_final += 'mi<mk<clrtbl-beg\n'
self.__color_table_final += 'cw<ci<red_______<nu<00\n'
self.__color_table_final += 'cw<ci<green_____<nu<00\n'
self.__color_table_final += 'cw<ci<blue______<en<00\n'
self.__color_table_final += 'mi<mk<clrtbl-end\n'
self.__color_table_final += 'mi<tg<close_____<color-table\n'
def __make_default_style_table(self):
"""
If not font table is found, make a string for a default one
"""
"""
self.__style_sheet_final = 'mi<tg<open______<style-table\n'
self.__style_sheet_final +=
self.__style_sheet_final +=
self.__style_sheet_final +=
self.__style_sheet_final +=
self.__style_sheet_final +=
self.__style_sheet_final += 'mi<tg<close_____<style-table\n'
"""
self.__style_sheet_final = """mi<tg<open______<style-table
mi<mk<styles-beg
mi<mk<stylei-beg
cw<ci<font-style<nu<0
tx<nu<__________<Normal;
mi<mk<stylei-end
mi<mk<stylei-beg
cw<ss<char-style<nu<0
tx<nu<__________<Default Paragraph Font;
mi<mk<stylei-end
mi<mk<styles-end
mi<tg<close_____<style-table
"""
def __found_font_table_func(self, line):
if self.__found_font_table:
self.__state = 'ignore'
else:
self.__state = 'font_table'
self.__font_table_final = ''
self.__close_group_count = self.__ob_count
self.__cb_count = 0
self.__found_font_table = 1
def __font_table_func(self, line):
"""
Keep adding to the self.__individual_font string until end of group
found. If a bracket is found, check that it is only one bracket deep.
If it is, then set the marker for an individual font. If it is not,
then ignore all data in this group.
cw<ci<font-style<nu<0
"""
if self.__cb_count == self.__close_group_count:
self.__state = 'preamble'
self.__font_table_final = 'mi<tg<open______<font-table\n' + \
'mi<mk<fonttb-beg\n' + self.__font_table_final
self.__font_table_final += \
'mi<mk<fonttb-end\n' + 'mi<tg<close_____<font-table\n'
elif self.__token_info == 'ob<nu<open-brack':
if int(self.__ob_count) == int(self.__close_group_count) + 1:
self.__font_table_final += \
'mi<mk<fontit-beg\n'
self.__individual_font = 1
else:
# ignore
self.__previous_state = 'font_table'
self.__state = 'ignore'
self.__ignore_num = self.__ob_count
elif self.__token_info == 'cb<nu<clos-brack':
if int(self.__cb_count) == int(self.__close_group_count) + 1:
self.__individual_font = 0
self.__font_table_final += \
'mi<mk<fontit-end\n'
elif self.__individual_font:
if self.__old_font and self.__token_info == 'tx<nu<__________':
if ';' in line:
self.__font_table_final += line
self.__font_table_final += 'mi<mk<fontit-end\n'
self.__individual_font = 0
else:
self.__font_table_final += line
elif self.__token_info == 'cw<ci<font-style':
self.__old_font = 1
self.__individual_font = 1
self.__font_table_final += 'mi<mk<fontit-beg\n'
self.__font_table_final += line
def __old_font_func(self, line):
"""
Required:
line --line to parse
Returns:
nothing
Logic:
used for older forms of RTF:
\f3\fswiss\fcharset77 Helvetica-Oblique;\f4\fnil\fcharset77 Geneva;}
Note how each font is not divided by a bracket
"""
def __found_color_table_func(self, line):
"""
all functions that start with __found operate the same. They set the
state, initiate a string, determine the self.__close_group_count, and
set self.__cb_count to zero.
"""
self.__state = 'color_table'
self.__color_table_final = ''
self.__close_group_count = self.__ob_count
self.__cb_count = 0
def __color_table_func(self, line):
if int(self.__cb_count) == int(self.__close_group_count):
self.__state = 'preamble'
self.__color_table_final = 'mi<tg<open______<color-table\n' + \
'mi<mk<clrtbl-beg\n' + self.__color_table_final
self.__color_table_final += \
'mi<mk<clrtbl-end\n' + 'mi<tg<close_____<color-table\n'
else:
self.__color_table_final += line
def __found_style_sheet_func(self, line):
self.__state = 'style_sheet'
self.__style_sheet_final = ''
self.__close_group_count = self.__ob_count
self.__cb_count = 0
def __style_sheet_func(self, line):
"""
Same logic as the font_table_func.
"""
if self.__cb_count == self.__close_group_count:
self.__state = 'preamble'
self.__style_sheet_final = 'mi<tg<open______<style-table\n' + \
'mi<mk<styles-beg\n' + self.__style_sheet_final
self.__style_sheet_final += \
'mi<mk<styles-end\n' + 'mi<tg<close_____<style-table\n'
elif self.__token_info == 'ob<nu<open-brack':
if int(self.__ob_count) == int(self.__close_group_count) + 1:
self.__style_sheet_final += \
'mi<mk<stylei-beg\n'
elif self.__token_info == 'cb<nu<clos-brack':
if int(self.__cb_count) == int(self.__close_group_count) + 1:
self.__style_sheet_final += \
'mi<mk<stylei-end\n'
else:
self.__style_sheet_final += line
def __found_list_table_func(self, line):
self.__state = 'list_table'
self.__list_table_final = ''
self.__close_group_count = self.__ob_count
self.__cb_count = 0
def __list_table_func(self, line):
if self.__cb_count == self.__close_group_count:
self.__state = 'preamble'
self.__list_table_final, self.__all_lists =\
self.__list_table_obj.parse_list_table(
self.__list_table_final)
# sys.stderr.write(repr(all_lists))
elif self.__token_info == '':
pass
else:
self.__list_table_final += line
pass
def __found_override_table_func(self, line):
self.__override_table_obj = override_table.OverrideTable(
run_level=self.__run_level,
list_of_lists=self.__all_lists,
)
self.__state = 'override_table'
self.__override_table_final = ''
self.__close_group_count = self.__ob_count
self.__cb_count = 0
# cw<it<lovr-table
def __override_table_func(self, line):
if self.__cb_count == self.__close_group_count:
self.__state = 'preamble'
self.__override_table_final, self.__all_lists =\
self.__override_table_obj.parse_override_table(self.__override_table_final)
elif self.__token_info == '':
pass
else:
self.__override_table_final += line
def __found_revision_table_func(self, line):
self.__state = 'revision_table'
self.__revision_table_final = ''
self.__close_group_count = self.__ob_count
self.__cb_count = 0
def __revision_table_func(self, line):
if int(self.__cb_count) == int(self.__close_group_count):
self.__state = 'preamble'
self.__revision_table_final = 'mi<tg<open______<revision-table\n' + \
'mi<mk<revtbl-beg\n' + self.__revision_table_final
self.__revision_table_final += \
'mi<mk<revtbl-end\n' + 'mi<tg<close_____<revision-table\n'
else:
self.__revision_table_final += line
def __found_doc_info_func(self, line):
self.__state = 'doc_info'
self.__doc_info_table_final = ''
self.__close_group_count = self.__ob_count
self.__cb_count = 0
def __doc_info_func(self, line):
if self.__cb_count == self.__close_group_count:
self.__state = 'preamble'
self.__doc_info_table_final = 'mi<tg<open______<doc-information\n' + \
'mi<mk<doc-in-beg\n' + self.__doc_info_table_final
self.__doc_info_table_final += \
'mi<mk<doc-in-end\n' + 'mi<tg<close_____<doc-information\n'
elif self.__token_info == 'ob<nu<open-brack':
if int(self.__ob_count) == int(self.__close_group_count) + 1:
self.__doc_info_table_final += \
'mi<mk<docinf-beg\n'
elif self.__token_info == 'cb<nu<clos-brack':
if int(self.__cb_count) == int(self.__close_group_count) + 1:
self.__doc_info_table_final += \
'mi<mk<docinf-end\n'
else:
self.__doc_info_table_final += line
def __margin_func(self, line):
"""
Handles lines that describe page info. Add the apporpriate info in the
token to the self.__margin_dict dicitonary.
"""
info = line[6:16]
changed = self.__margin_dict.get(info)
if changed is None:
print('woops!')
else:
self.__page[changed] = line[20:-1]
# cw<pa<margin-lef<nu<1728
def __print_page_info(self):
self.__write_obj.write('mi<tg<empty-att_<page-definition')
for key in self.__page.keys():
self.__write_obj.write(
f'<{key}>{self.__page[key]}'
)
self.__write_obj.write('\n')
# mi<tg<open-att__<footn
def __print_sec_info(self):
"""
Check if there is any section info. If so, print it out.
If not, print out an empty tag to satisfy the dtd.
"""
if len(self.__section.keys()) == 0:
self.__write_obj.write(
'mi<tg<open______<section-definition\n'
)
else:
self.__write_obj.write(
'mi<tg<open-att__<section-definition')
keys = self.__section.keys()
for key in keys:
self.__write_obj.write(
'<%s>%s' % (key, self.__section[key])
)
self.__write_obj.write('\n')
def __section_func(self, line):
"""
Add info pertaining to section to the self.__section dictionary, to be
printed out later.
"""
info = self.__translate_sec.get(line[6:16])
if info is None:
sys.stderr.write('woops!\n')
else:
self.__section[info] = 'true'
def __body_func(self, line):
self.__write_obj.write(line)
def __default_func(self, line):
# either in preamble or in body
pass
def __para_def_func(self, line):
# if self.__ob_group == 1
# this tells dept of group
if self.__cb_count == '0002':
self.__state = 'body'
self.__write_preamble()
self.__write_obj.write(line)
def __text_func(self, line):
"""
If the cb_count is less than 1, you have hit the body
For older RTF
Newer RTF should never have to use this function
"""
if self.__cb_count == '':
cb_count = '0002'
else:
cb_count = self.__cb_count
# ignore previous lines
# should be
# if self.__ob_group == 1
# this tells dept of group
if cb_count == '0002':
self.__state = 'body'
self.__write_preamble()
self.__write_obj.write(line)
def __row_def_func(self, line):
# if self.__ob_group == 1
# this tells dept of group
if self.__cb_count == '0002':
self.__state = 'body'
self.__write_preamble()
self.__write_obj.write(line)
def __new_section_func(self, line):
"""
This is new. The start of a section marks the end of the preamble
"""
if self.__cb_count == '0002':
self.__state = 'body'
self.__write_preamble()
else:
sys.stderr.write('module is preamble_div\n')
sys.stderr.write('method is __new_section_func\n')
sys.stderr.write('bracket count should be 2?\n')
self.__write_obj.write(line)
def __write_preamble(self):
"""
Write all the strings, which represent all the data in the preamble.
Write a body and section beginning.
"""
if self.__no_namespace:
self.__write_obj.write(
'mi<tg<open______<doc\n'
)
else:
self.__write_obj.write(
'mi<tg<open-att__<doc<xmlns>http://rtf2xml.sourceforge.net/\n')
self.__write_obj.write('mi<tg<open______<preamble\n')
self.__write_obj.write(self.__rtf_final)
if not self.__color_table_final:
self.__make_default_color_table()
if not self.__font_table_final:
self.__make_default_font_table()
self.__write_obj.write(self.__font_table_final)
self.__write_obj.write(self.__color_table_final)
if not self.__style_sheet_final:
self.__make_default_style_table()
self.__write_obj.write(self.__style_sheet_final)
self.__write_obj.write(self.__list_table_final)
self.__write_obj.write(self.__override_table_final)
self.__write_obj.write(self.__revision_table_final)
self.__write_obj.write(self.__doc_info_table_final)
self.__print_page_info()
self.__write_obj.write('ob<nu<open-brack<0001\n')
self.__write_obj.write('ob<nu<open-brack<0002\n')
self.__write_obj.write('cb<nu<clos-brack<0002\n')
self.__write_obj.write('mi<tg<close_____<preamble\n')
self.__write_obj.write('mi<tg<open______<body\n')
# self.__write_obj.write('mi<tg<open-att__<section<num>1\n')
# self.__print_sec_info()
# self.__write_obj.write('mi<tg<open______<headers-and-footers\n')
# self.__write_obj.write('mi<mk<head_foot_<\n')
# self.__write_obj.write('mi<tg<close_____<headers-and-footers\n')
self.__write_obj.write('mi<mk<body-open_\n')
def __preamble_func(self, line):
"""
Check if the token info belongs to the dictionary. If so, take the
appropriate action.
"""
action = self.__state_dict.get(self.__token_info)
if action:
action(line)
def make_preamble_divisions(self):
self.__initiate_values()
read_obj = open_for_read(self.__file)
self.__write_obj = open_for_write(self.__write_to)
line_to_read = 1
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
self.__ob_group += 1
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
self.__ob_group -= 1
action = self.__state_dict.get(self.__state)
if action is None:
print(self.__state)
action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler=self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "preamble_div.data")
copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to)
return self.__all_lists