%PDF- %PDF-
Direktori : /usr/lib/calibre/calibre/ebooks/textile/ |
Current File : //usr/lib/calibre/calibre/ebooks/textile/unsmarten.py |
__license__ = 'GPL 3' __copyright__ = '2011, Leigh Parry <leighparry@blueyonder.co.uk>' __docformat__ = 'restructuredtext en' import re def unsmarten(txt): txt = re.sub('¢|¢|¢', r'{c\}', txt) # cent txt = re.sub('£|£|£', r'{L-}', txt) # pound txt = re.sub('¥|¥|¥', r'{Y=}', txt) # yen txt = re.sub('©|©|©', r'{(c)}', txt) # copyright txt = re.sub('®|®|®', r'{(r)}', txt) # registered txt = re.sub('¼|¼|¼', r'{1/4}', txt) # quarter txt = re.sub('½|½|½', r'{1/2}', txt) # half txt = re.sub('¾|¾|¾', r'{3/4}', txt) # three-quarter txt = re.sub('À|À|À', r'{A`)}', txt) # A-grave txt = re.sub('Á|Á|Á', r"{A'}", txt) # A-acute txt = re.sub('Â|Â|Â', r'{A^}', txt) # A-circumflex txt = re.sub('Ã|Ã|Ã', r'{A~}', txt) # A-tilde txt = re.sub('Ä|Ä|Ä', r'{A"}', txt) # A-umlaut txt = re.sub('Å|Å|Å', r'{Ao}', txt) # A-ring txt = re.sub('Æ|Æ|Æ', r'{AE}', txt) # AE txt = re.sub('Ç|Ç|Ç', r'{C,}', txt) # C-cedilla txt = re.sub('È|È|È', r'{E`}', txt) # E-grave txt = re.sub('É|É|É', r"{E'}", txt) # E-acute txt = re.sub('Ê|Ê|Ê', r'{E^}', txt) # E-circumflex txt = re.sub('Ë|Ë|Ë', r'{E"}', txt) # E-umlaut txt = re.sub('Ì|Ì|Ì', r'{I`}', txt) # I-grave txt = re.sub('Í|Í|Í', r"{I'}", txt) # I-acute txt = re.sub('Î|Î|Î', r'{I^}', txt) # I-circumflex txt = re.sub('Ï|Ï|Ï', r'{I"}', txt) # I-umlaut txt = re.sub('Ð|Ð|Ð', r'{D-}', txt) # ETH txt = re.sub('Ñ|Ñ|Ñ', r'{N~}', txt) # N-tilde txt = re.sub('Ò|Ò|Ò', r'{O`}', txt) # O-grave txt = re.sub('Ó|Ó|Ó', r"{O'}", txt) # O-acute txt = re.sub('Ô|Ô|Ô', r'{O^}', txt) # O-circumflex txt = re.sub('Õ|Õ|Õ', r'{O~}', txt) # O-tilde txt = re.sub('Ö|Ö|Ö', r'{O"}', txt) # O-umlaut txt = re.sub('×|×|×', r'{x}', txt) # dimension txt = re.sub('Ø|Ø|Ø', r'{O/}', txt) # O-slash txt = re.sub('Ù|Ù|Ù', r"{U`}", txt) # U-grave txt = re.sub('Ú|Ú|Ú', r"{U'}", txt) # U-acute txt = re.sub('Û|Û|Û', r'{U^}', txt) # U-circumflex txt = re.sub('Ü|Ü|Ü', r'{U"}', txt) # U-umlaut txt = re.sub('Ý|Ý|Ý', r"{Y'}", txt) # Y-grave txt = re.sub('ß|ß|ß', r'{sz}', txt) # sharp-s txt = re.sub('à|à|à', r'{a`}', txt) # a-grave txt = re.sub('á|á|á', r"{a'}", txt) # a-acute txt = re.sub('â|â|â', r'{a^}', txt) # a-circumflex txt = re.sub('ã|ã|ã', r'{a~}', txt) # a-tilde txt = re.sub('ä|ä|ä', r'{a"}', txt) # a-umlaut txt = re.sub('å|å|å', r'{ao}', txt) # a-ring txt = re.sub('æ|æ|æ', r'{ae}', txt) # ae txt = re.sub('ç|ç|ç', r'{c,}', txt) # c-cedilla txt = re.sub('è|è|è', r'{e`}', txt) # e-grave txt = re.sub('é|é|é', r"{e'}", txt) # e-acute txt = re.sub('ê|ê|ê', r'{e^}', txt) # e-circumflex txt = re.sub('ë|ë|ë', r'{e"}', txt) # e-umlaut txt = re.sub('ì|ì|ì', r'{i`}', txt) # i-grave txt = re.sub('í|í|í', r"{i'}", txt) # i-acute txt = re.sub('î|î|î', r'{i^}', txt) # i-circumflex txt = re.sub('ï|ï|ï', r'{i"}', txt) # i-umlaut txt = re.sub('ð|ð|ð', r'{d-}', txt) # eth txt = re.sub('ñ|ñ|ñ', r'{n~}', txt) # n-tilde txt = re.sub('ò|ò|ò', r'{o`}', txt) # o-grave txt = re.sub('ó|ó|ó', r"{o'}", txt) # o-acute txt = re.sub('ô|ô|ô', r'{o^}', txt) # o-circumflex txt = re.sub('õ|õ|õ', r'{o~}', txt) # o-tilde txt = re.sub('ö|ö|ö', r'{o"}', txt) # o-umlaut txt = re.sub('ø|ø|ø', r'{o/}', txt) # o-stroke txt = re.sub('ù|ù|ù', r'{u`}', txt) # u-grave txt = re.sub('ú|ú|ú', r"{u'}", txt) # u-acute txt = re.sub('û|û|û', r'{u^}', txt) # u-circumflex txt = re.sub('ü|ü|ü', r'{u"}', txt) # u-umlaut txt = re.sub('ý|ý|ý', r"{y'}", txt) # y-acute txt = re.sub('ÿ|ÿ|ÿ', r'{y"}', txt) # y-umlaut txt = re.sub('Č|Č|Č', r'{Cˇ}', txt) # C-caron txt = re.sub('č|č|č', r'{cˇ}', txt) # c-caron txt = re.sub('Ď|Ď|Ď', r'{Dˇ}', txt) # D-caron txt = re.sub('ď|ď|ď', r'{dˇ}', txt) # d-caron txt = re.sub('Ě|Ě|Ě', r'{Eˇ}', txt) # E-caron txt = re.sub('ě|ě|ě', r'{eˇ}', txt) # e-caron txt = re.sub('Ĺ|Ĺ|Ĺ', r"{L'}", txt) # L-acute txt = re.sub('ĺ|ĺ|ĺ', r"{l'}", txt) # l-acute txt = re.sub('Ľ|Ľ|Ľ', r'{Lˇ}', txt) # L-caron txt = re.sub('ľ|ľ|ľ', r'{lˇ}', txt) # l-caron txt = re.sub('Ň|Ň|Ň', r'{Nˇ}', txt) # N-caron txt = re.sub('ň|ň|ň', r'{nˇ}', txt) # n-caron txt = re.sub('Œ|Œ|Œ', r'{OE}', txt) # OE txt = re.sub('œ|œ|œ', r'{oe}', txt) # oe txt = re.sub('Ŕ|Ŕ|Ŕ', r"{R'}", txt) # R-acute txt = re.sub('ŕ|ŕ|ŕ', r"{r'}", txt) # r-acute txt = re.sub('Ř|Ř|Ř', r'{Rˇ}', txt) # R-caron txt = re.sub('ř|ř|ř', r'{rˇ}', txt) # r-caron txt = re.sub('Ŝ|Ŝ', r'{S^}', txt) # S-circumflex txt = re.sub('ŝ|ŝ', r'{s^}', txt) # s-circumflex txt = re.sub('Š|Š|Š', r'{Sˇ}', txt) # S-caron txt = re.sub('š|š|š', r'{sˇ}', txt) # s-caron txt = re.sub('Ť|Ť|Ť', r'{Tˇ}', txt) # T-caron txt = re.sub('ť|ť|ť', r'{tˇ}', txt) # t-caron txt = re.sub('Ů|Ů|Ů', r'{U°}', txt) # U-ring txt = re.sub('ů|ů|ů', r'{u°}', txt) # u-ring txt = re.sub('Ž|Ž|Ž', r'{Zˇ}', txt) # Z-caron txt = re.sub('ž|ž|ž', r'{zˇ}', txt) # z-caron txt = re.sub('•|•|•', r'{*}', txt) # bullet txt = re.sub('₣|₣', r'{Fr}', txt) # Franc txt = re.sub('₤|₤', r'{L=}', txt) # Lira txt = re.sub('₨|₨', r'{Rs}', txt) # Rupee txt = re.sub('€|€|€', r'{C=}', txt) # euro txt = re.sub('™|™|™', r'{tm}', txt) # trademark txt = re.sub('♠|♠|♠', r'{spade}', txt) # spade txt = re.sub('♣|♣|♣', r'{club}', txt) # club txt = re.sub('♥|♥|♥', r'{heart}', txt) # heart txt = re.sub('♦|♦|♦', r'{diamond}', txt) # diamond # Move into main code? # txt = re.sub(u'\xa0', r'p. ', txt) # blank paragraph # txt = re.sub(u'\n\n\n\n', r'\n\np. \n\n', txt) # blank paragraph # txt = re.sub(u'\n \n', r'\n<br />\n', txt) # blank paragraph - br tag return txt