diff options
Diffstat (limited to 'sphinx/search/__init__.py')
-rw-r--r-- | sphinx/search/__init__.py | 85 |
1 files changed, 30 insertions, 55 deletions
diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py index 22ba14d40..54a1aad92 100644 --- a/sphinx/search/__init__.py +++ b/sphinx/search/__init__.py @@ -13,21 +13,21 @@ import re import warnings from importlib import import_module from os import path +from typing import Any, Dict, IO, Iterable, List, Tuple, Set from docutils import nodes +from docutils.nodes import Node from sphinx import addnodes from sphinx import package_dir from sphinx.deprecation import RemovedInSphinx40Warning +from sphinx.environment import BuildEnvironment from sphinx.search.jssplitter import splitter_code from sphinx.util import jsdump, rpartition if False: # For type annotation - from typing import Any, Dict, IO, Iterable, List, Tuple, Set # NOQA from typing import Type # for python3.5.1 - from docutils import nodes # NOQA - from sphinx.environment import BuildEnvironment # NOQA class SearchLanguage: @@ -69,19 +69,16 @@ var Stemmer = function() { _word_re = re.compile(r'(?u)\w+') - def __init__(self, options): - # type: (Dict) -> None + def __init__(self, options: Dict) -> None: self.options = options self.init(options) - def init(self, options): - # type: (Dict) -> None + def init(self, options: Dict) -> None: """ Initialize the class with the options the user has given. """ - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: """ This method splits a sentence into words. Default splitter splits input at white spaces, which should be enough for most languages except CJK @@ -89,8 +86,7 @@ var Stemmer = function() { """ return self._word_re.findall(input) - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: """ This method implements stemming algorithm of the Python version. @@ -103,8 +99,7 @@ var Stemmer = function() { """ return word - def word_filter(self, word): - # type: (str) -> bool + def word_filter(self, word: str) -> bool: """ Return true if the target word should be registered in the search index. This method is called after stemming. @@ -121,8 +116,7 @@ var Stemmer = function() { from sphinx.search.en import SearchEnglish -def parse_stop_word(source): - # type: (str) -> Set[str] +def parse_stop_word(source: str) -> Set[str]: """ parse snowball style word list like this: @@ -166,24 +160,20 @@ class _JavaScriptIndex: PREFIX = 'Search.setIndex(' SUFFIX = ')' - def dumps(self, data): - # type: (Any) -> str + def dumps(self, data: Any) -> str: return self.PREFIX + jsdump.dumps(data) + self.SUFFIX - def loads(self, s): - # type: (str) -> Any + def loads(self, s: str) -> Any: data = s[len(self.PREFIX):-len(self.SUFFIX)] if not data or not s.startswith(self.PREFIX) or not \ s.endswith(self.SUFFIX): raise ValueError('invalid data') return jsdump.loads(data) - def dump(self, data, f): - # type: (Any, IO) -> None + def dump(self, data: Any, f: IO) -> None: f.write(self.dumps(data)) - def load(self, f): - # type: (IO) -> Any + def load(self, f: IO) -> Any: return self.loads(f.read()) @@ -195,15 +185,13 @@ class WordCollector(nodes.NodeVisitor): A special visitor that collects words for the `IndexBuilder`. """ - def __init__(self, document, lang): - # type: (nodes.document, SearchLanguage) -> None + def __init__(self, document: nodes.document, lang: SearchLanguage) -> None: super().__init__(document) self.found_words = [] # type: List[str] self.found_title_words = [] # type: List[str] self.lang = lang - def is_meta_keywords(self, node, nodetype=None): - # type: (addnodes.meta, Any) -> bool + def is_meta_keywords(self, node: addnodes.meta, nodetype: Any = None) -> bool: if nodetype is not None: warnings.warn('"nodetype" argument for WordCollector.is_meta_keywords() ' 'is deprecated.', RemovedInSphinx40Warning) @@ -217,8 +205,7 @@ class WordCollector(nodes.NodeVisitor): return False - def dispatch_visit(self, node): - # type: (nodes.Node) -> None + def dispatch_visit(self, node: Node) -> None: if isinstance(node, nodes.comment): raise nodes.SkipNode elif isinstance(node, nodes.raw): @@ -251,8 +238,7 @@ class IndexBuilder: 'pickle': pickle } - def __init__(self, env, lang, options, scoring): - # type: (BuildEnvironment, str, Dict, str) -> None + def __init__(self, env: BuildEnvironment, lang: str, options: Dict, scoring: str) -> None: self.env = env self._titles = {} # type: Dict[str, str] # docname -> title @@ -292,8 +278,7 @@ class IndexBuilder: self.js_scorer_code = '' self.js_splitter_code = splitter_code - def load(self, stream, format): - # type: (IO, Any) -> None + def load(self, stream: IO, format: Any) -> None: """Reconstruct from frozen data.""" if isinstance(format, str): format = self.formats[format] @@ -306,8 +291,7 @@ class IndexBuilder: self._filenames = dict(zip(index2fn, frozen['filenames'])) self._titles = dict(zip(index2fn, frozen['titles'])) - def load_terms(mapping): - # type: (Dict[str, Any]) -> Dict[str, Set[str]] + def load_terms(mapping: Dict[str, Any]) -> Dict[str, Set[str]]: rv = {} for k, v in mapping.items(): if isinstance(v, int): @@ -320,15 +304,14 @@ class IndexBuilder: self._title_mapping = load_terms(frozen['titleterms']) # no need to load keywords/objtypes - def dump(self, stream, format): - # type: (IO, Any) -> None + def dump(self, stream: IO, format: Any) -> None: """Dump the frozen index to a stream.""" if isinstance(format, str): format = self.formats[format] format.dump(self.freeze(), stream) - def get_objects(self, fn2index): - # type: (Dict[str, int]) -> Dict[str, Dict[str, Tuple[int, int, int, str]]] + def get_objects(self, fn2index: Dict[str, int] + ) -> Dict[str, Dict[str, Tuple[int, int, int, str]]]: rv = {} # type: Dict[str, Dict[str, Tuple[int, int, int, str]]] otypes = self._objtypes onames = self._objnames @@ -364,8 +347,7 @@ class IndexBuilder: pdict[name] = (fn2index[docname], typeindex, prio, shortanchor) return rv - def get_terms(self, fn2index): - # type: (Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]] + def get_terms(self, fn2index: Dict) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]: rvs = {}, {} # type: Tuple[Dict[str, List[str]], Dict[str, List[str]]] for rv, mapping in zip(rvs, (self._mapping, self._title_mapping)): for k, v in mapping.items(): @@ -377,8 +359,7 @@ class IndexBuilder: rv[k] = sorted([fn2index[fn] for fn in v if fn in fn2index]) return rvs - def freeze(self): - # type: () -> Dict[str, Any] + def freeze(self) -> Dict[str, Any]: """Create a usable data structure for serializing.""" docnames, titles = zip(*sorted(self._titles.items())) filenames = [self._filenames.get(docname) for docname in docnames] @@ -392,12 +373,10 @@ class IndexBuilder: objects=objects, objtypes=objtypes, objnames=objnames, titleterms=title_terms, envversion=self.env.version) - def label(self): - # type: () -> str + def label(self) -> str: return "%s (code: %s)" % (self.lang.language_name, self.lang.lang) - def prune(self, docnames): - # type: (Iterable[str]) -> None + def prune(self, docnames: Iterable[str]) -> None: """Remove data for all docnames not in the list.""" new_titles = {} new_filenames = {} @@ -412,8 +391,7 @@ class IndexBuilder: for wordnames in self._title_mapping.values(): wordnames.intersection_update(docnames) - def feed(self, docname, filename, title, doctree): - # type: (str, str, str, nodes.document) -> None + def feed(self, docname: str, filename: str, title: str, doctree: nodes.document) -> None: """Feed a doctree to the index.""" self._titles[docname] = title self._filenames[docname] = filename @@ -422,8 +400,7 @@ class IndexBuilder: doctree.walk(visitor) # memoize self.lang.stem - def stem(word): - # type: (str) -> str + def stem(word: str) -> str: try: return self._stem_cache[word] except KeyError: @@ -447,8 +424,7 @@ class IndexBuilder: if _filter(stemmed_word) and not already_indexed: self._mapping.setdefault(stemmed_word, set()).add(docname) - def context_for_searchtool(self): - # type: () -> Dict[str, Any] + def context_for_searchtool(self) -> Dict[str, Any]: return { 'search_language_stemming_code': self.lang.js_stemmer_code, 'search_language_stop_words': jsdump.dumps(sorted(self.lang.stopwords)), @@ -456,8 +432,7 @@ class IndexBuilder: 'search_word_splitter_code': self.js_splitter_code, } - def get_js_stemmer_rawcode(self): - # type: () -> str + def get_js_stemmer_rawcode(self) -> str: if self.lang.js_stemmer_rawcode: return path.join(package_dir, 'search', 'non-minified-js', self.lang.js_stemmer_rawcode) |