%PDF- %PDF-
| Direktori : /lib/calibre/calibre/ebooks/rtf2xml/ |
| Current File : //lib/calibre/calibre/ebooks/rtf2xml/add_brackets.py |
#########################################################################
# #
# #
# copyright 2002 Paul Henry Tremblay #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
# General Public License for more details. #
# #
# #
# #
#########################################################################
import sys, os
from calibre.ebooks.rtf2xml import copy, check_brackets
from calibre.ptempfile import better_mktemp
from polyglot.builtins import iteritems
from . import open_for_read, open_for_write
class AddBrackets:
"""
Add brackets for old RTF.
Logic:
When control words without their own brackets are encountered
and in the list of allowed words, this will add brackets
to facilitate the treatment of the file
"""
def __init__(self, in_file,
bug_handler,
copy=None,
run_level=1,
):
"""
Required:
'file'--file to parse
Optional:
'copy'-- whether to make a copy of result for debugging
'temp_dir' --where to output temporary results (default is
directory from which the script is run.)
Returns:
nothing
"""
self.__file = in_file
self.__bug_handler = bug_handler
self.__copy = copy
self.__write_to = better_mktemp()
self.__run_level = run_level
self.__state_dict = {
'before_body' : self.__before_body_func,
'in_body' : self.__in_body_func,
'after_control_word' : self.__after_control_word_func,
'in_ignore' : self.__ignore_func,
}
self.__accept = [
'cw<ci<bold______' ,
'cw<ci<annotation' ,
'cw<ci<blue______' ,
# 'cw<ci<bold______' ,
'cw<ci<caps______' ,
'cw<ci<char-style' ,
'cw<ci<dbl-strike' ,
'cw<ci<emboss____' ,
'cw<ci<engrave___' ,
'cw<ci<font-color' ,
'cw<ci<font-down_' ,
'cw<ci<font-size_' ,
'cw<ci<font-style' ,
'cw<ci<font-up___' ,
'cw<ci<footnot-mk' ,
'cw<ci<green_____' ,
'cw<ci<hidden____' ,
'cw<ci<italics___' ,
'cw<ci<outline___' ,
'cw<ci<red_______' ,
'cw<ci<shadow____' ,
'cw<ci<small-caps' ,
'cw<ci<strike-thr' ,
'cw<ci<subscript_' ,
'cw<ci<superscrip' ,
'cw<ci<underlined' ,
# 'cw<ul<underlined' ,
]
def __initiate_values(self):
"""
Init temp values
"""
self.__state = 'before_body'
self.__inline = {}
self.__temp_group = []
self.__open_bracket = False
self.__found_brackets = False
def __before_body_func(self, line):
"""
If we are before the body, not interest in changing anything
"""
if self.__token_info == 'mi<mk<body-open_':
self.__state = 'in_body'
self.__write_obj.write(line)
def __in_body_func(self, line):
"""
Select what action to take in body:
1-At the end of the file close the braket if a bracket was opened
This happens if there is achange
2-If an open bracket is found the code inside is ignore
(written without modifications)
3-If an accepted control word is found put the line
in a buffer then change state to after cw
4-Else simply write the line
"""
if line == 'cb<nu<clos-brack<0001\n' and self.__open_bracket:
self.__write_obj.write(
'cb<nu<clos-brack<0003\n'
)
self.__write_obj.write(line)
elif self.__token_info == 'ob<nu<open-brack':
self.__found_brackets = True
self.__state = 'in_ignore'
self.__ignore_count = self.__ob_count
self.__write_obj.write(line)
elif self.__token_info in self.__accept:
self.__temp_group.append(line)
self.__state = 'after_control_word'
else:
self.__write_obj.write(line)
def __after_control_word_func(self, line):
"""
After a cw either add next allowed cw to temporary list or
change groupe and write it.
If the token leading to an exit is an open bracket go to
ignore otherwise goto in body
"""
if self.__token_info in self.__accept:
self.__temp_group.append(line)
else:
self.__change_permanent_group()
self.__write_group()
self.__write_obj.write(line)
if self.__token_info == 'ob<nu<open-brack':
self.__state = 'in_ignore'
self.__ignore_count = self.__ob_count
else:
self.__state = 'in_body'
def __write_group(self):
"""
Write a temporary group after accepted control words end
But this is mostly useless in my opinion as there is no list of rejected cw
This may be a way to implement future old rtf processing for cw
Utility: open a group to just put brackets but why be so complicated?
Scheme: open brackets, write cw then go to body and back with cw after
"""
if self.__open_bracket:
self.__write_obj.write(
'cb<nu<clos-brack<0003\n'
)
self.__open_bracket = False
inline_string = ''.join([f'{k}<nu<{v}\n'
for k, v in iteritems(self.__inline)
if v != 'false'])
if inline_string:
self.__write_obj.write('ob<nu<open-brack<0003\n'
'%s' % inline_string)
self.__open_bracket = True
self.__temp_group = []
def __change_permanent_group(self):
"""
Use temp group to change permanent group
If the control word is not accepted remove it
What is the interest as it is build to accept only accepted cw
in __after_control_word_func?
"""
self.__inline = {line[:16] : line[20:-1]
for line in self.__temp_group\
# Is this really necessary?
if line[:16] in self.__accept}
def __ignore_func(self, line):
"""
Just copy data inside of RTF brackets already here.
"""
self.__write_obj.write(line)
if self.__token_info == 'cb<nu<clos-brack'\
and self.__cb_count == self.__ignore_count:
self.__state = 'in_body'
def __check_brackets(self, in_file):
"""
Return True if brackets match
"""
check_brack_obj = check_brackets.CheckBrackets(file=in_file)
return check_brack_obj.check_brackets()[0]
def add_brackets(self):
"""
"""
self.__initiate_values()
with open_for_read(self.__file) as read_obj:
with open_for_write(self.__write_to) as self.__write_obj:
for line in read_obj:
self.__token_info = line[:16]
if self.__token_info == 'ob<nu<open-brack':
self.__ob_count = line[-5:-1]
if self.__token_info == 'cb<nu<clos-brack':
self.__cb_count = line[-5:-1]
action = self.__state_dict.get(self.__state)
if action is None:
sys.stderr.write(
'No matching state in module add_brackets.py\n'
'%s\n' % self.__state)
action(line)
# Check bad brackets
if self.__check_brackets(self.__write_to):
copy_obj = copy.Copy(bug_handler=self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "add_brackets.data")
copy_obj.rename(self.__write_to, self.__file)
else:
if self.__run_level > 0:
sys.stderr.write(
'Sorry, but this files has a mix of old and new RTF.\n'
'Some characteristics cannot be converted.\n')
os.remove(self.__write_to)