Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/sphinx-doc/sphinx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakeshi KOMIYA <i.tkomiya@gmail.com>2019-12-25 05:41:54 +0300
committerTakeshi KOMIYA <i.tkomiya@gmail.com>2019-12-25 05:41:54 +0300
commitd717f5ae31d17533a5a20b581f571eb4a95b1b30 (patch)
tree65bb3a38edc57d2b5e70b19ca8996ccebd1e3951 /sphinx/search/ja.py
parentd82e7c12a177a6a547ba1e72540f079f64590f8a (diff)
parent869ba4f67947b97af90dc706fb7e6ed17946ccd3 (diff)
Merge branch '2.0'
Diffstat (limited to 'sphinx/search/ja.py')
-rw-r--r--sphinx/search/ja.py58
1 files changed, 18 insertions, 40 deletions
diff --git a/sphinx/search/ja.py b/sphinx/search/ja.py
index e1f18209a..1b0a0e865 100644
--- a/sphinx/search/ja.py
+++ b/sphinx/search/ja.py
@@ -19,6 +19,7 @@
import os
import re
import sys
+from typing import Any, Dict, List
try:
import MeCab
@@ -36,21 +37,13 @@ from sphinx.errors import SphinxError, ExtensionError
from sphinx.search import SearchLanguage
from sphinx.util import import_object
-if False:
- # For type annotation
- from typing import Any, Dict, List # NOQA
-
class BaseSplitter:
-
- def __init__(self, options):
- # type: (Dict) -> None
+ def __init__(self, options: Dict) -> None:
self.options = options
- def split(self, input):
- # type: (str) -> List[str]
+ def split(self, input: str) -> List[str]:
"""
-
:param str input:
:return:
:rtype: list[str]
@@ -59,8 +52,7 @@ class BaseSplitter:
class MecabSplitter(BaseSplitter):
- def __init__(self, options):
- # type: (Dict) -> None
+ def __init__(self, options: Dict) -> None:
super().__init__(options)
self.ctypes_libmecab = None # type: Any
self.ctypes_mecab = None # type: Any
@@ -70,8 +62,7 @@ class MecabSplitter(BaseSplitter):
self.init_native(options)
self.dict_encode = options.get('dic_enc', 'utf-8')
- def split(self, input):
- # type: (str) -> List[str]
+ def split(self, input: str) -> List[str]:
if native_module:
result = self.native.parse(input)
else:
@@ -79,16 +70,14 @@ class MecabSplitter(BaseSplitter):
self.ctypes_mecab, input.encode(self.dict_encode))
return result.split(' ')
- def init_native(self, options):
- # type: (Dict) -> None
+ def init_native(self, options: Dict) -> None:
param = '-Owakati'
dict = options.get('dict')
if dict:
param += ' -d %s' % dict
self.native = MeCab.Tagger(param)
- def init_ctypes(self, options):
- # type: (Dict) -> None
+ def init_ctypes(self, options: Dict) -> None:
import ctypes.util
lib = options.get('lib')
@@ -124,8 +113,7 @@ class MecabSplitter(BaseSplitter):
if self.ctypes_mecab is None:
raise SphinxError('mecab initialization failed')
- def __del__(self):
- # type: () -> None
+ def __del__(self) -> None:
if self.ctypes_libmecab:
self.ctypes_libmecab.mecab_destroy(self.ctypes_mecab)
@@ -133,21 +121,18 @@ MeCabBinder = MecabSplitter # keep backward compatibility until Sphinx-1.6
class JanomeSplitter(BaseSplitter):
- def __init__(self, options):
- # type: (Dict) -> None
+ def __init__(self, options: Dict) -> None:
super().__init__(options)
self.user_dict = options.get('user_dic')
self.user_dict_enc = options.get('user_dic_enc', 'utf8')
self.init_tokenizer()
- def init_tokenizer(self):
- # type: () -> None
+ def init_tokenizer(self) -> None:
if not janome_module:
raise RuntimeError('Janome is not available')
self.tokenizer = janome.tokenizer.Tokenizer(udic=self.user_dict, udic_enc=self.user_dict_enc)
- def split(self, input):
- # type: (str) -> List[str]
+ def split(self, input: str) -> List[str]:
result = ' '.join(token.surface for token in self.tokenizer.tokenize(input))
return result.split(' ')
@@ -423,23 +408,20 @@ class DefaultSplitter(BaseSplitter):
'郎': 1082, '1': -270, 'E1': 306, 'ル': -673, 'ン': -496}
# ctype_
- def ctype_(self, char):
- # type: (str) -> str
+ def ctype_(self, char: str) -> str:
for pattern, value in self.patterns_.items():
if pattern.match(char):
return value
return 'O'
# ts_
- def ts_(self, dict, key):
- # type: (Dict[str, int], str) -> int
+ def ts_(self, dict: Dict[str, int], key: str) -> int:
if key in dict:
return dict[key]
return 0
# segment
- def split(self, input):
- # type: (str) -> List[str]
+ def split(self, input: str) -> List[str]:
if not input:
return []
@@ -542,8 +524,7 @@ class SearchJapanese(SearchLanguage):
lang = 'ja'
language_name = 'Japanese'
- def init(self, options):
- # type: (Dict) -> None
+ def init(self, options: Dict) -> None:
dotted_path = options.get('type', 'sphinx.search.ja.DefaultSplitter')
try:
self.splitter = import_object(dotted_path)(options)
@@ -551,14 +532,11 @@ class SearchJapanese(SearchLanguage):
raise ExtensionError("Splitter module %r can't be imported" %
dotted_path)
- def split(self, input):
- # type: (str) -> List[str]
+ def split(self, input: str) -> List[str]:
return self.splitter.split(input)
- def word_filter(self, stemmed_word):
- # type: (str) -> bool
+ def word_filter(self, stemmed_word: str) -> bool:
return len(stemmed_word) > 1
- def stem(self, word):
- # type: (str) -> str
+ def stem(self, word: str) -> str:
return word