%PDF- %PDF-
| Direktori : /lib/calibre/calibre/utils/ |
| Current File : //lib/calibre/calibre/utils/titlecase.py |
#!/usr/bin/env python3
"""
Original Perl version by: John Gruber https://daringfireball.net/ 10 May 2008
Python version by Stuart Colville http://muffinresearch.co.uk
Modifications to make it work with non-ascii chars by Kovid Goyal
License: http://www.opensource.org/licenses/mit-license.php
"""
import re
from calibre.utils.icu import capitalize, upper
__all__ = ['titlecase']
__version__ = '0.5'
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\\.?|via|vs\\.?'
PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
CAPFIRST = re.compile(str(r"^[%s]*?(\w)" % PUNCT), flags=re.UNICODE)
SMALL_FIRST = re.compile(fr'^([{PUNCT}]*)({SMALL})\b', re.I|re.U)
SMALL_LAST = re.compile(fr'\b({SMALL})[{PUNCT}]?$', re.I|re.U)
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I)
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
_lang = None
def lang():
global _lang
if _lang is None:
from calibre.utils.localization import get_lang
_lang = get_lang().lower()
return _lang
def titlecase(text):
"""
Titlecases input text
This filter changes all words to Title Caps, and attempts to be clever
about *un*capitalizing SMALL words like a/an/the in the input.
The list of "SMALL words" which are not capped comes from
the New York Times Manual of Style, plus 'vs' and 'v'.
"""
all_caps = upper(text) == text
pat = re.compile(r'(\s+)')
line = []
for word in pat.split(text):
if not word:
continue
if pat.match(word) is not None:
line.append(word)
continue
if all_caps:
if UC_INITIALS.match(word):
line.append(word)
continue
else:
word = icu_lower(word)
if APOS_SECOND.match(word):
word = word.replace(word[0], icu_upper(word[0]), 1)
word = word[:2] + icu_upper(word[2]) + word[3:]
line.append(word)
continue
if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
line.append(word)
continue
if SMALL_WORDS.match(word):
line.append(icu_lower(word))
continue
hyphenated = []
for item in word.split('-'):
hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
line.append("-".join(hyphenated))
result = "".join(line)
result = SMALL_FIRST.sub(lambda m: '{}{}'.format(
m.group(1),
capitalize(m.group(2))
), result)
result = SMALL_AFTER_NUM.sub(lambda m: '{}{}'.format(m.group(1),
capitalize(m.group(2))
), result)
result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)
result = SUBPHRASE.sub(lambda m: '{}{}'.format(
m.group(1),
capitalize(m.group(2))
), result)
return result