diff options
author | Takeshi KOMIYA <i.tkomiya@gmail.com> | 2019-12-25 05:41:54 +0300 |
---|---|---|
committer | Takeshi KOMIYA <i.tkomiya@gmail.com> | 2019-12-25 05:41:54 +0300 |
commit | d717f5ae31d17533a5a20b581f571eb4a95b1b30 (patch) | |
tree | 65bb3a38edc57d2b5e70b19ca8996ccebd1e3951 /sphinx/search/ja.py | |
parent | d82e7c12a177a6a547ba1e72540f079f64590f8a (diff) | |
parent | 869ba4f67947b97af90dc706fb7e6ed17946ccd3 (diff) |
Merge branch '2.0'
Diffstat (limited to 'sphinx/search/ja.py')
-rw-r--r-- | sphinx/search/ja.py | 58 |
1 files changed, 18 insertions, 40 deletions
diff --git a/sphinx/search/ja.py b/sphinx/search/ja.py index e1f18209a..1b0a0e865 100644 --- a/sphinx/search/ja.py +++ b/sphinx/search/ja.py @@ -19,6 +19,7 @@ import os import re import sys +from typing import Any, Dict, List try: import MeCab @@ -36,21 +37,13 @@ from sphinx.errors import SphinxError, ExtensionError from sphinx.search import SearchLanguage from sphinx.util import import_object -if False: - # For type annotation - from typing import Any, Dict, List # NOQA - class BaseSplitter: - - def __init__(self, options): - # type: (Dict) -> None + def __init__(self, options: Dict) -> None: self.options = options - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: """ - :param str input: :return: :rtype: list[str] @@ -59,8 +52,7 @@ class BaseSplitter: class MecabSplitter(BaseSplitter): - def __init__(self, options): - # type: (Dict) -> None + def __init__(self, options: Dict) -> None: super().__init__(options) self.ctypes_libmecab = None # type: Any self.ctypes_mecab = None # type: Any @@ -70,8 +62,7 @@ class MecabSplitter(BaseSplitter): self.init_native(options) self.dict_encode = options.get('dic_enc', 'utf-8') - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: if native_module: result = self.native.parse(input) else: @@ -79,16 +70,14 @@ class MecabSplitter(BaseSplitter): self.ctypes_mecab, input.encode(self.dict_encode)) return result.split(' ') - def init_native(self, options): - # type: (Dict) -> None + def init_native(self, options: Dict) -> None: param = '-Owakati' dict = options.get('dict') if dict: param += ' -d %s' % dict self.native = MeCab.Tagger(param) - def init_ctypes(self, options): - # type: (Dict) -> None + def init_ctypes(self, options: Dict) -> None: import ctypes.util lib = options.get('lib') @@ -124,8 +113,7 @@ class MecabSplitter(BaseSplitter): if self.ctypes_mecab is None: raise SphinxError('mecab initialization failed') - def __del__(self): - # type: () -> None + def __del__(self) -> None: if self.ctypes_libmecab: self.ctypes_libmecab.mecab_destroy(self.ctypes_mecab) @@ -133,21 +121,18 @@ MeCabBinder = MecabSplitter # keep backward compatibility until Sphinx-1.6 class JanomeSplitter(BaseSplitter): - def __init__(self, options): - # type: (Dict) -> None + def __init__(self, options: Dict) -> None: super().__init__(options) self.user_dict = options.get('user_dic') self.user_dict_enc = options.get('user_dic_enc', 'utf8') self.init_tokenizer() - def init_tokenizer(self): - # type: () -> None + def init_tokenizer(self) -> None: if not janome_module: raise RuntimeError('Janome is not available') self.tokenizer = janome.tokenizer.Tokenizer(udic=self.user_dict, udic_enc=self.user_dict_enc) - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: result = ' '.join(token.surface for token in self.tokenizer.tokenize(input)) return result.split(' ') @@ -423,23 +408,20 @@ class DefaultSplitter(BaseSplitter): '郎': 1082, '1': -270, 'E1': 306, 'ル': -673, 'ン': -496} # ctype_ - def ctype_(self, char): - # type: (str) -> str + def ctype_(self, char: str) -> str: for pattern, value in self.patterns_.items(): if pattern.match(char): return value return 'O' # ts_ - def ts_(self, dict, key): - # type: (Dict[str, int], str) -> int + def ts_(self, dict: Dict[str, int], key: str) -> int: if key in dict: return dict[key] return 0 # segment - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: if not input: return [] @@ -542,8 +524,7 @@ class SearchJapanese(SearchLanguage): lang = 'ja' language_name = 'Japanese' - def init(self, options): - # type: (Dict) -> None + def init(self, options: Dict) -> None: dotted_path = options.get('type', 'sphinx.search.ja.DefaultSplitter') try: self.splitter = import_object(dotted_path)(options) @@ -551,14 +532,11 @@ class SearchJapanese(SearchLanguage): raise ExtensionError("Splitter module %r can't be imported" % dotted_path) - def split(self, input): - # type: (str) -> List[str] + def split(self, input: str) -> List[str]: return self.splitter.split(input) - def word_filter(self, stemmed_word): - # type: (str) -> bool + def word_filter(self, stemmed_word: str) -> bool: return len(stemmed_word) > 1 - def stem(self, word): - # type: (str) -> str + def stem(self, word: str) -> str: return word |