%PDF- %PDF-
| Direktori : /lib/calibre/calibre/ebooks/rtf2xml/ |
| Current File : //lib/calibre/calibre/ebooks/rtf2xml/field_strings.py |
#########################################################################
# #
# #
# copyright 2002 Paul Henry Tremblay #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
# General Public License for more details. #
# #
# #
#########################################################################
import sys, re
class FieldStrings:
"""
This module is given a string. It processes the field instruction string and
returns a list of three values.
"""
def __init__(self, bug_handler, run_level=1):
"""
Requires:
nothing
Returns:
nothing
"""
self.__run_level = run_level
self.__bug_handler = bug_handler
self.__initiate_values()
def __initiate_values(self):
"""
Requires:
nothing.
Returns:
nothing.
Logic:
initiate values for rest of class.
self.__field_instruction_dict:
The dictionary for all field names.
"""
self.__field_instruction_dict = {
# number type (arabic, etc.) and number format (\# " ")
'EDITTIME' : (self.__num_type_and_format_func, 'editing-time'),
'NUMCHARS' : (self.__num_type_and_format_func, 'number-of-characters-in-doc'),
'NUMPAGES' : (self.__num_type_and_format_func, 'number-of-pages-in-doc'),
'NUMWORDS' : (self.__num_type_and_format_func, 'number-of-words-in-doc'),
'REVNUM' : (self.__num_type_and_format_func, 'revision-number'),
'SECTIONPAGES' : (self.__num_type_and_format_func, 'num-of-pages-in-section'),
'SECTION' : (self.__num_type_and_format_func, 'insert-section-number'),
'QUOTE' : (self.__num_type_and_format_func, 'quote'),
# number formatting (\# "")
'PAGE' : (self.__default_inst_func, 'insert-page-number'),
'page' : (self.__default_inst_func, 'insert-page-number'),
# date format (\@ "")
'CREATEDATE' : (self.__date_func, 'insert-date'),
'PRINTDATE' : (self.__date_func, 'insert-date'),
# PRINTDATE?
'SAVEDATE' : (self.__date_func, 'last-saved'),
'TIME' : (self.__date_func, 'insert-time'),
# numbers?
# these fields take four switches
'AUTHOR' : (self.__simple_info_func, 'user-name'),
'COMMENTS' : (self.__simple_info_func, 'comments'),
'FILENAME' : (self.__simple_info_func, 'file-name'),
'filename' : (self.__simple_info_func, 'file-name'),
'KEYWORDS' : (self.__simple_info_func, 'keywords'),
'LASTSAVEDBY' : (self.__simple_info_func, 'last-saved-by'),
'SUBJECT' : (self.__simple_info_func, 'subject'),
'TEMPLATE' : (self.__simple_info_func, 'based-on-template'),
'TITLE' : (self.__simple_info_func, 'document-title'),
'USERADDRESS' : (self.__simple_info_func, 'user-address'),
'USERINITIALS' : (self.__simple_info_func, 'user-initials'),
'USERNAME' : (self.__simple_info_func, 'user-name'),
'EQ' : (self.__equation_func, 'equation'),
'HYPERLINK' : (self.__hyperlink_func, 'hyperlink'),
'INCLUDEPICTURE': (self.__include_pict_func, 'include-picture'),
'INCLUDETEXT' : (self.__include_text_func, 'include-text-from-file'),
'INDEX' : (self.__index_func, 'index'),
'NOTEREF' : (self.__note_ref_func, 'reference-to-note'),
'PAGEREF' : (self.__page_ref_func, 'reference-to-page'),
'REF' : (self.__ref_func, 'reference'),
'ref' : (self.__ref_func, 'reference'),
'SEQ' : (self.__sequence_func, 'numbering-sequence'),
'SYMBOL' : (self.__symbol_func, 'symbol'),
'TA' : (self.__ta_func, 'anchor-for-table-of-authorities'),
'TOA' : (self.__toc_table_func, 'table-of-authorities'),
'TOC' : (self.__toc_table_func, 'table-of-contents'),
# no switches
'AUTONUMOUT' : (self.__no_switch_func, 'auto-num-out?'),
'COMPARE' : (self.__no_switch_func, 'compare'),
'DOCVARIABLE' : (self.__no_switch_func, 'document-variable'),
'GOTOBUTTON' : (self.__no_switch_func, 'go-button'),
'NEXT' : (self.__no_switch_func, 'next'),
'NEXTIF' : (self.__no_switch_func, 'next-if'),
'SKIPIF' : (self.__no_switch_func, 'skip-if'),
'IF' : (self.__no_switch_func, 'if'),
'MERGEFIELD' : (self.__no_switch_func, 'merge-field'),
'MERGEREC' : (self.__no_switch_func, 'merge-record'),
'MERGESEQ' : (self.__no_switch_func, 'merge-sequence'),
'PLACEHOLDER' : (self.__no_switch_func, 'place-holder'),
'PRIVATE' : (self.__no_switch_func, 'private'),
'RD' : (self.__no_switch_func, 'referenced-document'),
'SET' : (self.__no_switch_func, 'set'),
# default instructions (haven't written a method for them
'ADVANCE' : (self.__default_inst_func, 'advance'),
'ASK' : (self.__default_inst_func, 'prompt-user'),
'AUTONUMLGL' : (self.__default_inst_func, 'automatic-number'),
'AUTONUM' : (self.__default_inst_func, 'automatic-number'),
'AUTOTEXTLIST' : (self.__default_inst_func, 'auto-list-text'),
'AUTOTEXT' : (self.__default_inst_func, 'auto-text'),
'BARCODE' : (self.__default_inst_func, 'barcode'),
'CONTACT' : (self.__default_inst_func, 'contact'),
'DATABASE' : (self.__default_inst_func, 'database'),
'DATE' : (self.__default_inst_func, 'date'),
'date' : (self.__default_inst_func, 'date'),
'DOCPROPERTY' : (self.__default_inst_func, 'document-property'),
'FILESIZE' : (self.__default_inst_func, 'file-size'),
'FILLIN' : (self.__default_inst_func, 'fill-in'),
'INFO' : (self.__default_inst_func, 'document-info'),
'LINK' : (self.__default_inst_func, 'link'),
'PA' : (self.__default_inst_func, 'page'),
'PRINT' : (self.__default_inst_func, 'print'),
'STYLEREF' : (self.__default_inst_func, 'style-reference'),
'USERPROPERTY' : (self.__default_inst_func, 'user-property'),
'FORMCHECKBOX' : (self.__default_inst_func, 'form-checkbox'),
'FORMTEXT' : (self.__default_inst_func, 'form-text'),
# buttons
'MACROBUTTON' : (self.__default_inst_func, 'macro-button'),
}
self.__number_dict = {
'Arabic' : 'arabic',
'alphabetic' : 'alphabetic',
'ALPHABETIC' : 'capital-alphabetic',
'roman' : 'roman',
'ROMAN' : 'capital-roman',
'Ordinal' : 'ordinal',
'CardText' : 'cardinal-text',
'OrdText' : 'ordinal-text',
'Hex' : 'hexidecimal',
'DollarText' : 'dollar-text',
'Upper' : 'upper-case',
'Lower' : 'lower-case',
'FirstCap' : 'first-cap',
'Caps' : 'caps',
}
self.__text_format_dict = {
'Upper' : 'upper',
'Lower' : 'lower',
'FirstCap' : 'first-cap',
'Caps' : 'caps',
}
self.__symbol_num_exp = re.compile(r'SYMBOL (.*?) ')
self.__symbol_font_exp = re.compile(r'\\f "(.*?)"')
self.__symbol_size_exp = re.compile(r'\\s (\d+)')
# self.__toc_figure_exp = re.compile(r'\\c "Figure"')
# \\@ "dddd, MMMM d, yyyy"
self.__date_exp = re.compile(r'\\@\s{1,}"(.*?)"')
self.__num_type_exp = re.compile(
r'\\\*\s{1,}(Arabic|alphabetic|ALPHABETIC|roman|ROMAN|Ordinal|CardText|OrdText|Hex|DollarText|Upper|Lower|FirstCap|Caps)')
self.__format_text_exp = re.compile(r'\\\*\s{1,}(Upper|Lower|FirstCap|Caps)')
self.__merge_format_exp = re.compile(r'\\\*\s{1,}MERGEFORMAT')
self.__ta_short_field_exp = re.compile(r'\\s\s{1,}"(.*?)"')
self.__ta_long_field_exp = re.compile(r'\\l\s{1,}"(.*?)"')
self.__ta_category_exp = re.compile(r'\\c\s{1,}(\d+)')
# indices
self.__index_insert_blank_line_exp = re.compile(r'\\h\s{1,}""')
self.__index_insert_letter_exp = re.compile(r'\\h\s{1,}"()"')
self.__index_columns_exp = re.compile(r'\\c\s{1,}"(.*?)"')
self.__bookmark_exp = re.compile(r'\\b\s{1,}(.*?)\s')
self.__d_separator = re.compile(r'\\d\s{1,}(.*?)\s')
self.__e_separator = re.compile(r'\\e\s{1,}(.*?)\s')
self.__l_separator = re.compile(r'\\l\s{1,}(.*?)\s')
self.__p_separator = re.compile(r'\\p\s{1,}(.*?)\s')
self.__index_sequence = re.compile(r'\\s\s{1,}(.*?)\s')
self.__index_entry_typ_exp = re.compile(r'\\f\s{1,}"(.*?)"')
self.__quote_exp = re.compile(r'"(.*?)"')
self.__filter_switch = re.compile(r'\\c\s{1,}(.*?)\s')
self.__link_switch = re.compile(r'\\l\s{1,}(.*?)\s')
def process_string(self, my_string, type):
"""
Requires:
my_string --the string to parse.
type -- the type of string.
Returns:
Returns a string for a field instrution attribute.
Logic:
This handles all "large" fields, which means everything except
toc entries, index entries, and bookmarks
Split the string by spaces, and get the first item in the
resulting list. This item is the field's type. Check for the
action in the field instructions dictionary for further parsing.
If no action is found, print out an error message.
"""
changed_string = ''
lines = my_string.split('\n')
for line in lines:
if line[0:2] == 'tx':
changed_string += line[17:]
fields = changed_string.split()
field_name = fields[0]
action, name = self.__field_instruction_dict.get(field_name, (None, None))
match_obj = re.search(self.__merge_format_exp, changed_string)
if match_obj and name:
name += '<update>dynamic'
elif name:
name += '<update>static'
else:
pass
# no name--not in list above
if action:
the_list = action(field_name, name, changed_string)
else:
# change -1 to 0--for now, I want users to report bugs
msg = f'no key for "{field_name}" "{changed_string}"\n'
sys.stderr.write(msg)
if self.__run_level > 3:
msg = f'no key for "{field_name}" "{changed_string}"\n'
raise self.__bug_handler(msg)
the_list = self.__fall_back_func(field_name, line)
return the_list
return the_list
def __default_inst_func(self, field_name, name, line):
"""
Requires:
field_name -- the first word in the string
name -- the changed name according to the dictionary
line -- the string to be parsed
Returns:
The name of the field.
Logic:
I only need the changed name for the field.
"""
return [None, None, name]
def __fall_back_func(self, field_name, line):
"""
Requires:
field_name -- the first word in the string
name -- the changed name according to the dictionary
line -- the string to be parsed
Returns:
The name of the field.
Logic:
Used for fields not found in dict
"""
the_string = field_name
the_string += '<update>none'
return [None, None, the_string]
def __equation_func(self, field_name, name, line):
"""
Required:
field_name -- the first word in the string
name --the changed name according to the dictionary
line -- the string to be parse
Returns:
The name of the field
Logic:
"""
return [None, None, name]
def __no_switch_func(self, field_name, name, line):
"""
Required:
field_name --the first
field_name -- the first word in the string
name --the changed name according to the dictionary
line -- the string to be parse
Returns:
The name of the field
Logic:
"""
return [None, None, name]
def __num_type_and_format_func(self, field_name, name, line):
"""
Required:
field_name -- the first word in the string
name --the changed name according to the dictionary
line -- the string to be parse
Returns:
list of None, None, and part of a tag
Logic:
parse num_type
parse num_format
"""
the_string = name
num_format = self.__parse_num_format(line)
if num_format:
the_string += '<number-format>%s' % num_format
num_type = self.__parse_num_type(line)
if num_type:
the_string += '<number-type>%s' % num_type
# Only QUOTE takes a (mandatory?) argument
if field_name == 'QUOTE':
match_group = re.search(r'QUOTE\s{1,}"(.*?)"', line)
if match_group:
arg = match_group.group(1)
the_string += '<argument>%s' % arg
return [None, None, the_string]
def __num_format_func(self, field_name, name, line):
"""
Required:
field_name -- the first word in the string
name --the changed name according to the dictionary
line -- the string to be parse
Returns:
list of None, None, and part of a tag
Logic:
"""
the_string = name
num_format = self.__parse_num_format(line)
if num_format:
the_string += '<number-format>%s' % num_format
return [None, None, the_string]
def __parse_num_format(self, the_string):
"""
Required:
the_string -- the string to parse
Returns:
a string if the_string contains number formatting information
None, otherwise
Logic:
"""
match_group = re.search(self.__date_exp, the_string)
if match_group:
return match_group(1)
def __parse_num_type(self, the_string):
"""
Required:
the_string -- the string to parse
Returns:
a string if the_string contains number type information
None, otherwise
Logic:
the_string might look like:
USERNAME \\* Arabic \\* MERGEFORMAT
Get the \\* Upper part. Use a dictionary to convert the "Arabic" to
a more-readable word for the value of the key "number-type".
(<field number-type = "Arabic">
"""
match_group = re.search(self.__num_type_exp, the_string)
if match_group:
name = match_group.group(1)
changed_name = self.__number_dict.get(name)
if changed_name:
return changed_name
else:
sys.stderr.write('module is fields_string\n')
sys.stderr.write('method is __parse_num_type\n')
sys.stderr.write('no dictionary entry for %s\n' % name)
def __date_func(self, field_name, name, line):
"""
Required:
field_name --the fist
field_name -- the first word in the string
name --the changed name according to the dictionary
line -- the string to be parse
Returns:
list of None, None, and part of a tag
Logic:
"""
the_string = name
match_group = re.search(self.__date_exp, line)
if match_group:
the_string += '<date-format>%s' % match_group.group(1)
return [None, None, the_string]
def __simple_info_func(self, field_name, name, line):
"""
Required:
field_name -- the first word in the string
name --the changed name according to the dictionary
line -- the string to be parse
Returns:
The name of the field
Logic:
These fields can only have the following switches:
1. Upper
2. Lower
3. FirstCap
4. Caps
"""
the_string = name
match_group = re.search(self.__format_text_exp, line)
if match_group:
name = match_group.group(1)
changed_name = self.__text_format_dict.get(name)
if changed_name:
the_string += '<format>%s' % changed_name
else:
sys.stderr.write('module is fields_string\n')
sys.stderr.write('method is __parse_num_type\n')
sys.stderr.write('no dictionary entry for %s\n' % name)
return [None, None, the_string]
def __hyperlink_func(self, field_name, name, line):
"""
Required:
field_name -- the first word in the string
name --the changed name according to the dictionary
line -- the string to be parse
Returns:
The name of the field
"""
self.__link_switch = re.compile(r'\\l\s{1,}"{0,1}(.*?)"{0,1}\s')
the_string = name
match_group = re.search(self.__link_switch, line)
if match_group:
link = match_group.group(1)
link = link.replace('"', """)
the_string += '<link>%s' % link
# \l "txt" "link"
# want "file name" so must get rid of \c "txt"
line = re.sub(self.__link_switch, '', line)
match_group = re.search(self.__quote_exp, line)
if match_group:
arg = match_group.group(1)
the_string += '<argument>%s' % arg
else:
pass
index = line.find('\\m')
if index > -1:
the_string += '<html2-image-map>true'
index = line.find('\\n')
if index > -1:
the_string += '<new-window>true'
index = line.find('\\h')
if index > -1:
the_string += '<no-history>true'
return [None, None, the_string]
def __include_text_func(self, field_name, name, line):
"""
Required:
field_name -- the first word in the string
name --the changed name according to the dictionary
line -- the string to be parse
Returns:
The name of the field
Logic:
"""
the_string = name
match_group = re.search(self.__format_text_exp, line)
if match_group:
name = match_group.group(1)
changed_name = self.__text_format_dict.get(name)
if changed_name:
the_string += '<format>%s' % changed_name
else:
sys.stderr.write('module is fields_string\n')
sys.stderr.write('method is __parse_num_type\n')
sys.stderr.write('no dictionary entry for %s\n' % name)
match_group = re.search(self.__filter_switch, line)
if match_group:
arg = match_group.group(1)
the_string += '<filter>%s' % arg
# \c "txt" "file name"
# want "file name" so must get rid of \c "txt"
line = re.sub(self.__filter_switch, '', line)
match_group = re.search(self.__quote_exp, line)
if match_group:
arg = match_group.group(1)
arg = arg.replace('"', """)
the_string += '<argument>%s' % arg
else:
sys.stderr.write('Module is field_strings\n')
sys.stderr.write('method is include_text_func\n')
sys.stderr.write('no argument for include text\n')
index = line.find('\\!')
if index > -1:
the_string += '<no-field-update>true'
return [None, None, the_string]
def __include_pict_func(self, field_name, name, line):
"""
Required:
field_name -- the first word in the string
name --the changed name according to the dictionary
line -- the string to be parse
Returns:
The name of the field
Logic:
"""
the_string = name
match_group = re.search(self.__filter_switch, line)
if match_group:
arg = match_group.group(1)
arg = arg.replace('"', """)
the_string += '<filter>%s' % arg
# \c "txt" "file name"
# want "file name" so must get rid of \c "txt"
line = re.sub(self.__filter_switch, '', line)
match_group = re.search(self.__quote_exp, line)
if match_group:
arg = match_group.group(1)
the_string += '<argument>%s' % arg
else:
sys.stderr.write('Module is field_strings\n')
sys.stderr.write('method is include_pict_func\n')
sys.stderr.write('no argument for include pict\n')
index = line.find('\\d')
if index > -1:
the_string += '<external>true'
return [None, None, the_string]
def __ref_func(self, field_name, name, line):
"""
Requires:
field_name -- the first word in the string
name -- the changed name according to the dictionary
line -- the string to be parsed
Returns:
The name of the field.
Logic:
A page reference field looks like this:
PAGEREF _Toc440880424 \\h
I want to extract the second line of info, which is used as an
anchor in the resulting XML file.
"""
the_string = name
match_group = re.search(self.__format_text_exp, line)
if match_group:
name = match_group.group(1)
changed_name = self.__text_format_dict.get(name)
if changed_name:
the_string += '<format>%s' % changed_name
else:
sys.stderr.write('module is fields_string\n')
sys.stderr.write('method is __parse_num_type\n')
sys.stderr.write('no dictionary entry for %s\n' % name)
line = re.sub(self.__merge_format_exp, '', line)
words = line.split()
words = words[1:] # get rid of field name
for word in words:
if word[0:1] != '\\':
the_string += '<bookmark>%s' % word
index = line.find('\\f')
if index > -1:
the_string += '<include-note-number>true'
index = line.find('\\h')
if index > -1:
the_string += '<hyperlink>true'
index = line.find('\\n')
if index > -1:
the_string += '<insert-number>true'
index = line.find('\\r')
if index > -1:
the_string += '<insert-number-relative>true'
index = line.find('\\p')
if index > -1:
the_string += '<paragraph-relative-position>true'
index = line.find('\\t')
if index > -1:
the_string += '<suppress-non-delimeter>true'
index = line.find('\\w')
if index > -1:
the_string += '<insert-number-full>true'
return [None, None, the_string]
def __toc_table_func(self, field_name, name, line):
"""
Requires:
field_name -- the name of the first word in the string
name --the changed name, according to the dictionary.
line --the string to be parsed.
Returns:
A string for a TOC table field.
Logic:
If the string contains Figure, it is a table of figures.
Otherwise, it is a plain old table of contents.
"""
the_string = name
index = line.find('\\c "Figure"')
if index > -1:
the_string = the_string.replace('table-of-contents', 'table-of-figures')
# don't really need the first value in this list, I don't believe
return [name, None, the_string]
def __sequence_func(self, field_name, name, line):
"""
Requires:
field_name --the name of the first word in the string.
name --the changed name according to the dictionary.
line -- the string to parse.
Returns:
A string with a value for the type and label attributes
Logic:
The type of sequence--whether figure, graph, my-name, or
whatever--is represented by the second word in the string. Extract
and return.
SEQ Figure \\* ARABIC
"""
fields = line.split()
label = fields[1]
my_string = f'{name}<label>{label}'
return [None, None, my_string]
def __ta_func(self, field_name, name, line):
"""
Requires:
field_name --the name of the first word in the string.
name --the changed name according to the dictionary.
line -- the string to parse.
Returns:
A string with a value for the type and label attributes
Logic:
"""
the_string = name
match_group = re.search(self.__ta_short_field_exp, line)
if match_group:
short_name = match_group.group(1)
the_string += '<short-field>%s' % short_name
match_group = re.search(self.__ta_long_field_exp, line)
if match_group:
long_name = match_group.group(1)
the_string += '<long-field>%s' % long_name
match_group = re.search(self.__ta_category_exp, line)
if match_group:
category = match_group.group(1)
the_string += '<category>%s' % category
index = line.find('\\b')
if index > -1:
the_string += '<bold>true'
index = line.find('\\i')
if index > -1:
the_string += '<italics>true'
return [None, None, the_string]
def __index_func(self, field_name, name, line):
"""
Requires:
field_name --the name of the first word in the string.
name --the changed name according to the dictionary.
line -- the string to parse.
Returns:
A string with a value for the type and label attributes
Logic:
"""
# self.__index_insert_blank_line_exp = re.compile(r'\\h\s{1,}""')
# self.__index_insert_letter_exp = re.compile(r'\\h\s{1,}(".*?")')
the_string = name
match_group = re.search(self.__index_insert_blank_line_exp, line)
if match_group:
the_string += '<insert-blank-line>true'
else:
match_group = re.search(self.__index_insert_letter_exp, line)
if match_group:
insert_letter = match_group.group(1)
the_string += '<insert-letter>%s' % insert_letter
match_group = re.search(self.__index_columns_exp, line)
if match_group:
columns = match_group.group(1)
the_string += '<number-of-columns>%s' % columns
# self.__bookmark_exp = re.compile(r'\\b\s{1,}(.*?)\s')
match_group = re.search(self.__bookmark_exp, line)
if match_group:
bookmark = match_group.group(1)
the_string += '<use-bookmark>%s' % bookmark
match_group = re.search(self.__d_separator, line)
if match_group:
separator = match_group.group(1)
separator = separator.replace('"', '"')
the_string += '<sequence-separator>%s' % separator
# self.__e_separator = re.compile(r'\\e\s{1,}(.*?)\s')
match_group = re.search(self.__e_separator, line)
if match_group:
separator = match_group.group(1)
separator = separator.replace('"', '"')
the_string += '<page-separator>%s' % separator
# self.__index_sequence = re.compile(r'\\s\s{1,}(.*?)\s')
match_group = re.search(self.__index_sequence, line)
if match_group:
sequence = match_group.group(1)
separator = separator.replace('"', '"')
the_string += '<use-sequence>%s' % sequence
# self.__index_entry_typ_exp = re.compile(r'\\f\s{1,}"(.*?)"')
match_group = re.search(self.__index_entry_typ_exp, line)
if match_group:
entry_type = match_group.group(1)
the_string += '<entry-type>%s' % entry_type
# self.__p_separator = re.compile(r'\\p\s{1,}(.*?)\s')
match_group = re.search(self.__p_separator, line)
if match_group:
limit = match_group.group(1)
the_string += '<limit-to-letters>%s' % limit
match_group = re.search(self.__l_separator, line)
if match_group:
separator = match_group.group(1)
separator = separator.replace('"', '"')
the_string += '<multi-page-separator>%s' % separator
index = line.find('\\a')
if index > -1:
the_string += '<accented>true'
index = line.find('\\r')
if index > -1:
the_string += '<sub-entry-on-same-line>true'
index = line.find('\\t')
if index > -1:
the_string += '<enable-yomi-text>true'
return [None, None, the_string]
def __page_ref_func(self, field_name, name, line):
"""
Requires:
field_name --first name in the string.
name -- the changed name according to the dictionary.
line -- the string to parse.
Returns:
A string .
Logic:
"""
the_string = name
num_format = self.__parse_num_format(line)
if num_format:
the_string += '<number-format>%s' % num_format
num_type = self.__parse_num_type(line)
if num_type:
the_string += '<number-type>%s' % num_type
line = re.sub(self.__merge_format_exp, '', line)
words = line.split()
words = words[1:] # get rid of field name
for word in words:
if word[0:1] != '\\':
the_string += '<bookmark>%s' % word
index = line.find('\\h')
if index > -1:
the_string += '<hyperlink>true'
index = line.find('\\p')
if index > -1:
the_string += '<paragraph-relative-position>true'
return [None, None, the_string]
def __note_ref_func(self, field_name, name, line):
"""
Requires:
field_name --first name in the string.
name -- the changed name according to the dictionary.
line -- the string to parse.
Returns:
A string .
Logic:
"""
the_string = name
line = re.sub(self.__merge_format_exp, '', line)
words = line.split()
words = words[1:] # get rid of field name
for word in words:
if word[0:1] != '\\':
the_string += '<bookmark>%s' % word
index = line.find('\\h')
if index > -1:
the_string += '<hyperlink>true'
index = line.find('\\p')
if index > -1:
the_string += '<paragraph-relative-position>true'
index = line.find('\\f')
if index > -1:
the_string += '<include-note-number>true'
return [None, None, the_string]
def __symbol_func(self, field_name, name, line):
"""
Requires:
field_name --first name in the string.
name -- the changed name according to the dictionary.
line -- the string to parse.
Returns:
A string containing font size, font style, and a hexadecimal value.
Logic:
The SYMBOL field is one of Microsoft's many quirky ways of
entering text. The string that results from this method looks like
this:
SYMBOL 97 \\f "Symbol" \\s 12
The first word merely tells us that we have encountered a SYMBOL
field.
The next value is the Microsoft decimal value. Change this to
hexadecimal.
The pattern '\\f "some font' tells us the font.
The pattern '\\s some size' tells us the font size.
Extract all of this information. Store this information in a
string, and make this string the last item in a list. The first
item in the list is the simple word 'symbol', which tells me that
I don't really have field, but UTF-8 data.
"""
num = ''
font = ''
font_size = ''
changed_line = ''
search_obj = re.search(self.__symbol_num_exp, line)
if search_obj:
num = search_obj.group(1)
num = int(num)
num = '%X' % num
search_obj = re.search(self.__symbol_font_exp, line)
if search_obj:
font = search_obj.group(1)
changed_line += 'cw<ci<font-style<nu<%s\n' % font
search_obj = re.search(self.__symbol_size_exp, line)
if search_obj:
font_size = search_obj.group(1)
font_size = int(font_size)
font_size = '%.2f' % font_size
changed_line += 'cw<ci<font-size_<nu<%s\n' % font_size
changed_line += 'tx<hx<__________<\'%s\n' % num
return ['Symbol', None, changed_line]