%PDF- %PDF-
| Direktori : /usr/lib/calibre/calibre/gui2/viewer/ |
| Current File : //usr/lib/calibre/calibre/gui2/viewer/search.py |
#!/usr/bin/env python3
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
import json
import regex
from collections import Counter, OrderedDict
from html import escape
from qt.core import (
QCheckBox, QComboBox, QFont, QHBoxLayout, QIcon, QLabel, Qt, QToolButton,
QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, pyqtSignal
)
from threading import Thread
from calibre.ebooks.conversion.search_replace import REGEX_FLAGS
from calibre.gui2 import warning_dialog
from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.gui2.viewer.config import vprefs
from calibre.gui2.viewer.web_view import get_data, get_manifest
from calibre.gui2.viewer.widgets import ResultsDelegate, SearchBox
from polyglot.builtins import iteritems
from polyglot.functools import lru_cache
from polyglot.queue import Queue
class BusySpinner(QWidget): # {{{
def __init__(self, parent=None):
QWidget.__init__(self, parent)
self.l = l = QHBoxLayout(self)
l.setContentsMargins(0, 0, 0, 0)
self.pi = ProgressIndicator(self, 24)
l.addWidget(self.pi)
self.la = la = QLabel(_('Searching...'))
l.addWidget(la)
l.addStretch(10)
self.is_running = False
def start(self):
self.setVisible(True)
self.pi.start()
self.is_running = True
def stop(self):
self.setVisible(False)
self.pi.stop()
self.is_running = False
# }}}
quote_map= {'"':'"“”', "'": "'‘’"}
qpat = regex.compile(r'''(['"])''')
spat = regex.compile(r'(\s+)')
invisible_chars = '(?:[\u00ad\u200c\u200d]{0,1})'
SEARCH_RESULT_ROLE = Qt.ItemDataRole.UserRole
RESULT_NUMBER_ROLE = SEARCH_RESULT_ROLE + 1
SPINE_IDX_ROLE = RESULT_NUMBER_ROLE + 1
def text_to_regex(text):
has_leading = text.lstrip() != text
has_trailing = text.rstrip() != text
if text and not text.strip():
return r'\s+'
ans = []
for wpart in spat.split(text.strip()):
if not wpart.strip():
ans.append(r'\s+')
else:
for part in qpat.split(wpart):
r = quote_map.get(part)
if r is not None:
ans.append('[' + r + ']')
else:
part = invisible_chars.join(map(regex.escape, part))
ans.append(part)
if has_leading:
ans.insert(0, r'\s+')
if has_trailing:
ans.append(r'\s+')
return ''.join(ans)
class Search:
def __init__(self, text, mode, case_sensitive, backwards):
self.text, self.mode = text, mode
self.case_sensitive = case_sensitive
self.backwards = backwards
self._regex = None
def __eq__(self, other):
if not isinstance(other, Search):
return False
return self.text == other.text and self.mode == other.mode and self.case_sensitive == other.case_sensitive
@property
def regex(self):
if self._regex is None:
expr = self.text
flags = REGEX_FLAGS
if not self.case_sensitive:
flags = regex.IGNORECASE
if self.mode != 'regex':
if self.mode == 'word':
words = []
for part in expr.split():
words.append(fr'\b{text_to_regex(part)}\b')
expr = r'\s+'.join(words)
else:
expr = text_to_regex(expr)
self._regex = regex.compile(expr, flags)
return self._regex
def __str__(self):
from collections import namedtuple
s = ('text', 'mode', 'case_sensitive', 'backwards')
return str(namedtuple('Search', s)(*tuple(getattr(self, x) for x in s)))
class SearchFinished:
def __init__(self, search_query):
self.search_query = search_query
class SearchResult:
__slots__ = (
'search_query', 'before', 'text', 'after', 'q', 'spine_idx',
'index', 'file_name', 'is_hidden', 'offset', 'toc_nodes',
'result_num'
)
def __init__(self, search_query, before, text, after, q, name, spine_idx, index, offset, result_num):
self.search_query = search_query
self.q = q
self.result_num = result_num
self.before, self.text, self.after = before, text, after
self.spine_idx, self.index = spine_idx, index
self.file_name = name
self.is_hidden = False
self.offset = offset
try:
self.toc_nodes = toc_nodes_for_search_result(self)
except Exception:
import traceback
traceback.print_exc()
self.toc_nodes = ()
@property
def for_js(self):
return {
'file_name': self.file_name, 'spine_idx': self.spine_idx, 'index': self.index, 'text': self.text,
'before': self.before, 'after': self.after, 'mode': self.search_query.mode, 'q': self.q,
'result_num': self.result_num
}
def is_result(self, result_from_js):
return result_from_js['spine_idx'] == self.spine_idx and self.index == result_from_js['index'] and result_from_js['q'] == self.q
def __str__(self):
from collections import namedtuple
s = self.__slots__[:-1]
return str(namedtuple('SearchResult', s)(*tuple(getattr(self, x) for x in s)))
@lru_cache(maxsize=None)
def searchable_text_for_name(name):
ans = []
serialized_data = json.loads(get_data(name)[0])
stack = []
removed_tails = []
for child in serialized_data['tree']['c']:
if child.get('n') == 'body':
stack.append(child)
# the JS code does not add the tail of body tags to flat text
removed_tails.append((child.pop('l', None), child))
ignore_text = {'script', 'style', 'title'}
text_pos = 0
anchor_offset_map = OrderedDict()
while stack:
node = stack.pop()
if isinstance(node, str):
ans.append(node)
text_pos += len(node)
continue
g = node.get
name = g('n')
text = g('x')
tail = g('l')
children = g('c')
attributes = g('a')
if attributes:
for x in attributes:
if x[0] == 'id':
aid = x[1]
if aid not in anchor_offset_map:
anchor_offset_map[aid] = text_pos
if name and text and name not in ignore_text:
ans.append(text)
text_pos += len(text)
if tail:
stack.append(tail)
if children:
stack.extend(reversed(children))
for (tail, body) in removed_tails:
if tail is not None:
body['l'] = tail
return ''.join(ans), anchor_offset_map
@lru_cache(maxsize=2)
def get_toc_data():
manifest = get_manifest() or {}
spine = manifest.get('spine') or []
spine_toc_map = {name: [] for name in spine}
parent_map = {}
def process_node(node):
items = spine_toc_map.get(node['dest'])
if items is not None:
items.append(node)
children = node.get('children')
if children:
for child in children:
parent_map[id(child)] = node
process_node(child)
toc = manifest.get('toc')
if toc:
process_node(toc)
return {
'spine': tuple(spine), 'spine_toc_map': spine_toc_map,
'spine_idx_map': {name: idx for idx, name in enumerate(spine)},
'parent_map': parent_map
}
class ToCOffsetMap:
def __init__(self, toc_nodes=(), offset_map=None, previous_toc_node=None, parent_map=None):
self.toc_nodes = toc_nodes
self.offset_map = offset_map or {}
self.previous_toc_node = previous_toc_node
self.parent_map = parent_map or {}
def toc_nodes_for_offset(self, offset):
matches = []
for node in self.toc_nodes:
q = self.offset_map.get(node.get('id'))
if q is not None:
if q > offset:
break
matches.append(node)
if not matches and self.previous_toc_node is not None:
matches.append(self.previous_toc_node)
if matches:
ancestors = []
node = matches[-1]
parent = self.parent_map.get(id(node))
while parent is not None:
ancestors.append(parent)
parent = self.parent_map.get(id(parent))
if len(ancestors) > 1:
ancestors.pop() # root node
yield from reversed(ancestors)
yield node
@lru_cache(maxsize=None)
def toc_offset_map_for_name(name):
anchor_map = searchable_text_for_name(name)[1]
toc_data = get_toc_data()
try:
idx = toc_data['spine_idx_map'][name]
toc_nodes = toc_data['spine_toc_map'][name]
except Exception:
idx = -1
if idx < 0:
return ToCOffsetMap()
offset_map = {}
for node in toc_nodes:
node_id = node.get('id')
if node_id is not None:
aid = node.get('frag')
offset = anchor_map.get(aid, 0)
offset_map[node_id] = offset
prev_toc_node = None
for spine_name in reversed(toc_data['spine'][:idx]):
try:
ptn = toc_data['spine_toc_map'][spine_name]
except Exception:
continue
if ptn:
prev_toc_node = ptn[-1]
break
return ToCOffsetMap(toc_nodes, offset_map, prev_toc_node, toc_data['parent_map'])
def toc_nodes_for_search_result(sr):
sidx = sr.spine_idx
toc_data = get_toc_data()
try:
name = toc_data['spine'][sidx]
except Exception:
return ()
tmap = toc_offset_map_for_name(name)
return tuple(tmap.toc_nodes_for_offset(sr.offset))
def search_in_name(name, search_query, ctx_size=75):
raw = searchable_text_for_name(name)[0]
for match in search_query.regex.finditer(raw):
start, end = match.span()
before = raw[max(0, start-ctx_size):start]
after = raw[end:end+ctx_size]
yield before, match.group(), after, start
class SearchInput(QWidget): # {{{
do_search = pyqtSignal(object)
cleared = pyqtSignal()
go_back = pyqtSignal()
def __init__(self, parent=None, panel_name='search'):
QWidget.__init__(self, parent)
self.ignore_search_type_changes = False
self.l = l = QVBoxLayout(self)
l.setContentsMargins(0, 0, 0, 0)
h = QHBoxLayout()
h.setContentsMargins(0, 0, 0, 0)
l.addLayout(h)
self.search_box = sb = SearchBox(self)
self.panel_name = panel_name
sb.initialize(f'viewer-{panel_name}-panel-expression')
sb.item_selected.connect(self.saved_search_selected)
sb.history_saved.connect(self.history_saved)
sb.history_cleared.connect(self.history_cleared)
sb.cleared.connect(self.cleared)
sb.lineEdit().returnPressed.connect(self.find_next)
h.addWidget(sb)
self.next_button = nb = QToolButton(self)
h.addWidget(nb)
nb.setFocusPolicy(Qt.FocusPolicy.NoFocus)
nb.setIcon(QIcon(I('arrow-down.png')))
nb.clicked.connect(self.find_next)
nb.setToolTip(_('Find next match'))
self.prev_button = nb = QToolButton(self)
h.addWidget(nb)
nb.setFocusPolicy(Qt.FocusPolicy.NoFocus)
nb.setIcon(QIcon(I('arrow-up.png')))
nb.clicked.connect(self.find_previous)
nb.setToolTip(_('Find previous match'))
h = QHBoxLayout()
h.setContentsMargins(0, 0, 0, 0)
l.addLayout(h)
self.query_type = qt = QComboBox(self)
qt.setFocusPolicy(Qt.FocusPolicy.NoFocus)
qt.addItem(_('Contains'), 'normal')
qt.addItem(_('Whole words'), 'word')
qt.addItem(_('Regex'), 'regex')
qt.setToolTip('<p>' + _(
'Choose the type of search: <ul>'
'<li><b>Contains</b> will search for the entered text anywhere.'
'<li><b>Whole words</b> will search for whole words that equal the entered text.'
'<li><b>Regex</b> will interpret the text as a regular expression.'
))
qt.setCurrentIndex(qt.findData(vprefs.get(f'viewer-{self.panel_name}-mode', 'normal') or 'normal'))
qt.currentIndexChanged.connect(self.save_search_type)
h.addWidget(qt)
self.case_sensitive = cs = QCheckBox(_('&Case sensitive'), self)
cs.setFocusPolicy(Qt.FocusPolicy.NoFocus)
cs.setChecked(bool(vprefs.get(f'viewer-{self.panel_name}-case-sensitive', False)))
cs.stateChanged.connect(self.save_search_type)
h.addWidget(cs)
self.return_button = rb = QToolButton(self)
rb.setIcon(QIcon(I('back.png')))
rb.setToolTip(_('Go back to where you were before searching'))
rb.clicked.connect(self.go_back)
h.addWidget(rb)
def history_saved(self, new_text, history):
if new_text:
sss = vprefs.get(f'saved-{self.panel_name}-settings') or {}
sss[new_text] = {'case_sensitive': self.case_sensitive.isChecked(), 'mode': self.query_type.currentData()}
history = frozenset(history)
sss = {k: v for k, v in iteritems(sss) if k in history}
vprefs[f'saved-{self.panel_name}-settings'] = sss
def history_cleared(self):
vprefs[f'saved-{self.panel_name}-settings'] = {}
def save_search_type(self):
text = self.search_box.currentText()
if text and not self.ignore_search_type_changes:
sss = vprefs.get(f'saved-{self.panel_name}-settings') or {}
sss[text] = {'case_sensitive': self.case_sensitive.isChecked(), 'mode': self.query_type.currentData()}
vprefs[f'saved-{self.panel_name}-settings'] = sss
def saved_search_selected(self):
text = self.search_box.currentText()
if text:
s = (vprefs.get(f'saved-{self.panel_name}-settings') or {}).get(text)
if s:
self.ignore_search_type_changes = True
if 'case_sensitive' in s:
self.case_sensitive.setChecked(s['case_sensitive'])
if 'mode' in s:
idx = self.query_type.findData(s['mode'])
if idx > -1:
self.query_type.setCurrentIndex(idx)
self.ignore_search_type_changes = False
self.find_next()
def search_query(self, backwards=False):
text = self.search_box.currentText()
if text:
return Search(
text, self.query_type.currentData() or 'normal',
self.case_sensitive.isChecked(), backwards
)
def emit_search(self, backwards=False):
vprefs[f'viewer-{self.panel_name}-case-sensitive'] = self.case_sensitive.isChecked()
vprefs[f'viewer-{self.panel_name}-mode'] = self.query_type.currentData()
sq = self.search_query(backwards)
if sq is not None:
self.do_search.emit(sq)
def find_next(self):
self.emit_search()
def find_previous(self):
self.emit_search(backwards=True)
def focus_input(self, text=None):
if text and hasattr(text, 'rstrip'):
self.search_box.setText(text)
self.search_box.setFocus(Qt.FocusReason.OtherFocusReason)
le = self.search_box.lineEdit()
le.end(False)
le.selectAll()
# }}}
class Results(QTreeWidget): # {{{
show_search_result = pyqtSignal(object)
current_result_changed = pyqtSignal(object)
count_changed = pyqtSignal(object)
def __init__(self, parent=None):
QTreeWidget.__init__(self, parent)
self.setHeaderHidden(True)
self.setFocusPolicy(Qt.FocusPolicy.NoFocus)
self.delegate = ResultsDelegate(self)
self.setItemDelegate(self.delegate)
self.itemClicked.connect(self.item_activated)
self.blank_icon = QIcon(I('blank.png'))
self.not_found_icon = QIcon(I('dialog_warning.png'))
self.currentItemChanged.connect(self.current_item_changed)
self.section_font = QFont(self.font())
self.section_font.setItalic(True)
self.section_map = {}
self.search_results = []
self.item_map = {}
def current_item_changed(self, current, previous):
if current is not None:
r = current.data(0, SEARCH_RESULT_ROLE)
if isinstance(r, SearchResult):
self.current_result_changed.emit(r)
else:
self.current_result_changed.emit(None)
def add_result(self, result):
section_title = _('Unknown')
section_id = -1
toc_nodes = getattr(result, 'toc_nodes', ()) or ()
if toc_nodes:
section_title = toc_nodes[-1].get('title') or _('Unknown')
section_id = toc_nodes[-1].get('id')
if section_id is None:
section_id = -1
section_key = section_id
section = self.section_map.get(section_key)
spine_idx = getattr(result, 'spine_idx', -1)
if section is None:
section = QTreeWidgetItem([section_title], 1)
section.setFlags(Qt.ItemFlag.ItemIsEnabled)
section.setFont(0, self.section_font)
section.setData(0, SPINE_IDX_ROLE, spine_idx)
lines = []
for i, node in enumerate(toc_nodes):
lines.append('\xa0\xa0' * i + '➤ ' + (node.get('title') or _('Unknown')))
if lines:
tt = ngettext('Table of Contents section:', 'Table of Contents sections:', len(lines))
tt += '\n' + '\n'.join(lines)
section.setToolTip(0, tt)
self.section_map[section_key] = section
for s in range(self.topLevelItemCount()):
ti = self.topLevelItem(s)
if ti.data(0, SPINE_IDX_ROLE) > spine_idx:
self.insertTopLevelItem(s, section)
break
else:
self.addTopLevelItem(section)
section.setExpanded(True)
item = QTreeWidgetItem(section, [' '], 2)
item.setFlags(Qt.ItemFlag.ItemIsSelectable | Qt.ItemFlag.ItemIsEnabled | Qt.ItemFlag.ItemNeverHasChildren)
item.setData(0, SEARCH_RESULT_ROLE, result)
item.setData(0, RESULT_NUMBER_ROLE, len(self.search_results))
item.setData(0, SPINE_IDX_ROLE, spine_idx)
if isinstance(result, SearchResult):
tt = '<p>…' + escape(result.before, False) + '<b>' + escape(
result.text, False) + '</b>' + escape(result.after, False) + '…'
item.setData(0, Qt.ItemDataRole.ToolTipRole, tt)
item.setIcon(0, self.blank_icon)
self.item_map[len(self.search_results)] = item
self.search_results.append(result)
n = self.number_of_results
self.count_changed.emit(n)
def item_activated(self):
i = self.currentItem()
if i:
sr = i.data(0, SEARCH_RESULT_ROLE)
if isinstance(sr, SearchResult):
if not sr.is_hidden:
self.show_search_result.emit(sr)
def find_next(self, previous):
if self.number_of_results < 1:
return
item = self.currentItem()
if item is None:
return
i = int(item.data(0, RESULT_NUMBER_ROLE))
i += -1 if previous else 1
i %= self.number_of_results
self.setCurrentItem(self.item_map[i])
self.item_activated()
def search_result_not_found(self, sr):
for i in range(self.number_of_results):
item = self.item_map[i]
r = item.data(0, SEARCH_RESULT_ROLE)
if r.is_result(sr):
r.is_hidden = True
item.setIcon(0, self.not_found_icon)
break
def search_result_discovered(self, sr):
q = sr['result_num']
for i in range(self.number_of_results):
item = self.item_map[i]
r = item.data(0, SEARCH_RESULT_ROLE)
if r.result_num == q:
self.setCurrentItem(item)
@property
def current_result_is_hidden(self):
item = self.currentItem()
if item is not None:
sr = item.data(0, SEARCH_RESULT_ROLE)
if isinstance(sr, SearchResult) and sr.is_hidden:
return True
return False
@property
def number_of_results(self):
return len(self.search_results)
def clear_all_results(self):
self.section_map = {}
self.item_map = {}
self.search_results = []
self.clear()
self.count_changed.emit(-1)
def select_first_result(self):
if self.number_of_results:
item = self.item_map[0]
self.setCurrentItem(item)
def ensure_current_result_visible(self):
item = self.currentItem()
if item is not None:
self.scrollToItem(item)
# }}}
class SearchPanel(QWidget): # {{{
search_requested = pyqtSignal(object)
results_found = pyqtSignal(object)
show_search_result = pyqtSignal(object)
count_changed = pyqtSignal(object)
hide_search_panel = pyqtSignal()
goto_cfi = pyqtSignal(object)
def __init__(self, parent=None):
QWidget.__init__(self, parent)
self.discovery_counter = 0
self.last_hidden_text_warning = None
self.current_search = None
self.anchor_cfi = None
self.l = l = QVBoxLayout(self)
l.setContentsMargins(0, 0, 0, 0)
self.search_input = si = SearchInput(self)
self.searcher = None
self.search_tasks = Queue()
self.results_found.connect(self.on_result_found, type=Qt.ConnectionType.QueuedConnection)
si.do_search.connect(self.search_requested)
si.cleared.connect(self.search_cleared)
si.go_back.connect(self.go_back)
l.addWidget(si)
self.results = r = Results(self)
r.count_changed.connect(self.count_changed)
r.show_search_result.connect(self.do_show_search_result, type=Qt.ConnectionType.QueuedConnection)
r.current_result_changed.connect(self.update_hidden_message)
l.addWidget(r, 100)
self.spinner = s = BusySpinner(self)
s.setVisible(False)
l.addWidget(s)
self.hidden_message = la = QLabel(_('This text is hidden in the book and cannot be displayed'))
la.setStyleSheet('QLabel { margin-left: 1ex }')
la.setWordWrap(True)
la.setVisible(False)
l.addWidget(la)
def go_back(self):
if self.anchor_cfi:
self.goto_cfi.emit(self.anchor_cfi)
def update_hidden_message(self):
self.hidden_message.setVisible(self.results.current_result_is_hidden)
def focus_input(self, text=None):
self.search_input.focus_input(text)
def search_cleared(self):
self.results.clear_all_results()
self.current_search = None
def start_search(self, search_query, current_name):
if self.current_search is not None and search_query == self.current_search:
self.find_next_requested(search_query.backwards)
return
if self.searcher is None:
self.searcher = Thread(name='Searcher', target=self.run_searches)
self.searcher.daemon = True
self.searcher.start()
self.results.clear_all_results()
self.hidden_message.setVisible(False)
self.spinner.start()
self.current_search = search_query
self.last_hidden_text_warning = None
self.search_tasks.put((search_query, current_name))
self.discovery_counter += 1
def set_anchor_cfi(self, pos_data):
self.anchor_cfi = pos_data['cfi']
def run_searches(self):
while True:
x = self.search_tasks.get()
if x is None:
break
search_query, current_name = x
try:
manifest = get_manifest() or {}
spine = manifest.get('spine', ())
idx_map = {name: i for i, name in enumerate(spine)}
spine_idx = idx_map.get(current_name, -1)
except Exception:
import traceback
traceback.print_exc()
spine_idx = -1
if spine_idx < 0:
self.results_found.emit(SearchFinished(search_query))
continue
num_in_spine = len(spine)
result_num = 0
for n in range(num_in_spine):
idx = (spine_idx + n) % num_in_spine
name = spine[idx]
counter = Counter()
try:
for i, result in enumerate(search_in_name(name, search_query)):
before, text, after, offset = result
q = (before or '')[-15:] + text + (after or '')[:15]
result_num += 1
self.results_found.emit(SearchResult(search_query, before, text, after, q, name, idx, counter[q], offset, result_num))
counter[q] += 1
except Exception:
import traceback
traceback.print_exc()
self.results_found.emit(SearchFinished(search_query))
def on_result_found(self, result):
if self.current_search is None or result.search_query != self.current_search:
return
if isinstance(result, SearchFinished):
self.spinner.stop()
if self.results.number_of_results:
self.results.ensure_current_result_visible()
else:
self.show_no_results_found()
return
self.results.add_result(result)
obj = result.for_js
obj['on_discovery'] = self.discovery_counter
self.show_search_result.emit(obj)
self.update_hidden_message()
def visibility_changed(self, visible):
if visible:
self.focus_input()
def clear_searches(self):
self.current_search = None
self.last_hidden_text_warning = None
searchable_text_for_name.cache_clear()
toc_offset_map_for_name.cache_clear()
get_toc_data.cache_clear()
self.spinner.stop()
self.results.clear_all_results()
def shutdown(self):
self.search_tasks.put(None)
self.spinner.stop()
self.current_search = None
self.last_hidden_text_warning = None
self.searcher = None
def find_next_requested(self, previous):
self.results.find_next(previous)
def trigger(self):
self.search_input.find_next()
def do_show_search_result(self, sr):
self.show_search_result.emit(sr.for_js)
def search_result_not_found(self, sr):
self.results.search_result_not_found(sr)
self.update_hidden_message()
def search_result_discovered(self, sr):
self.results.search_result_discovered(sr)
def show_no_results_found(self):
msg = _('No matches were found for:')
warning_dialog(self, _('No matches found'), msg + f' <b>{self.current_search.text}</b>', show=True)
def keyPressEvent(self, ev):
if ev.key() == Qt.Key.Key_Escape:
self.hide_search_panel.emit()
ev.accept()
return
return QWidget.keyPressEvent(self, ev)
# }}}