%PDF- %PDF-
Direktori : /lib/calibre/calibre/utils/ |
Current File : //lib/calibre/calibre/utils/html2text.py |
#!/usr/bin/env python3 # License: GPLv3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net> def html2text(html, single_line_break=True): from html2text import HTML2Text import re if isinstance(html, bytes): from calibre.ebooks.chardet import xml_to_unicode html = xml_to_unicode(html, strip_encoding_pats=True, resolve_entities=True)[0] # replace <u> tags with <span> as <u> becomes emphasis in html2text html = re.sub( r'<\s*(?P<solidus>/?)\s*[uU]\b(?P<rest>[^>]*)>', r'<\g<solidus>span\g<rest>>', html) h2t = HTML2Text() h2t.default_image_alt = _('Unnamed image') h2t.body_width = 0 h2t.single_line_break = single_line_break h2t.emphasis_mark = '*' return h2t.handle(html) def find_tests(): import unittest class TestH2T(unittest.TestCase): def test_html2text_behavior(self): for src, expected in { '<u>test</U>': 'test\n', '<i>test</i>': '*test*\n', '<a href="http://else.where/other">other</a>': '[other](http://else.where/other)\n', '<img src="test.jpeg">': '\n', '<a href="#t">test</a> <span id="t">dest</span>': 'test dest\n', '<>a': '<>a\n', '<p>a<p>b': 'a\nb\n', }.items(): self.assertEqual(html2text(src), expected) return unittest.defaultTestLoader.loadTestsFromTestCase(TestH2T)