%PDF- %PDF-
| Direktori : /lib/calibre/calibre/ebooks/epub/ |
| Current File : //lib/calibre/calibre/ebooks/epub/pages.py |
'''
Add page mapping information to an EPUB book.
'''
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
__docformat__ = 'restructuredtext en'
import re
from itertools import count
from calibre.ebooks.oeb.base import XHTML_NS
from calibre.ebooks.oeb.base import OEBBook
from lxml.etree import XPath
NSMAP = {'h': XHTML_NS, 'html': XHTML_NS, 'xhtml': XHTML_NS}
PAGE_RE = re.compile(r'page', re.IGNORECASE)
ROMAN_RE = re.compile(r'^[ivxlcdm]+$', re.IGNORECASE)
def filter_name(name):
name = name.strip()
name = PAGE_RE.sub('', name)
for word in name.split():
if word.isdigit() or ROMAN_RE.match(word):
name = word
break
return name
def build_name_for(expr):
if not expr:
counter = count(1)
return lambda elem: str(next(counter))
selector = XPath(expr, namespaces=NSMAP)
def name_for(elem):
results = selector(elem)
if not results:
return ''
name = ' '.join(results)
return filter_name(name)
return name_for
def add_page_map(opfpath, opts):
oeb = OEBBook(opfpath)
selector = XPath(opts.page, namespaces=NSMAP)
name_for = build_name_for(opts.page_names)
idgen = ("calibre-page-%d" % n for n in count(1))
for item in oeb.spine:
data = item.data
for elem in selector(data):
name = name_for(elem)
id = elem.get('id', None)
if id is None:
id = elem.attrib['id'] = next(idgen)
href = '#'.join((item.href, id))
oeb.pages.add(name, href)
writer = None # DirWriter(version='2.0', page_map=True)
writer.dump(oeb, opfpath)