Welcome to mirror list, hosted at ThFree Co, Russian Federation.

nl.py « search « sphinx - github.com/sphinx-doc/sphinx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: b5c549f66a29a8d9541a0221986398a8c60d3b24 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""Dutch search language: includes the JS porter stemmer."""

from typing import Dict

import snowballstemmer

from sphinx.search import SearchLanguage, parse_stop_word

dutch_stopwords = parse_stop_word('''
| source: http://snowball.tartarus.org/algorithms/dutch/stop.txt
de             |  the
en             |  and
van            |  of, from
ik             |  I, the ego
te             |  (1) chez, at etc, (2) to, (3) too
dat            |  that, which
die            |  that, those, who, which
in             |  in, inside
een            |  a, an, one
hij            |  he
het            |  the, it
niet           |  not, nothing, naught
zijn           |  (1) to be, being, (2) his, one's, its
is             |  is
was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
op             |  on, upon, at, in, up, used up
aan            |  on, upon, to (as dative)
met            |  with, by
als            |  like, such as, when
voor           |  (1) before, in front of, (2) furrow
had            |  had, past tense all persons sing. of 'hebben' (have)
er             |  there
maar           |  but, only
om             |  round, about, for etc
hem            |  him
dan            |  then
zou            |  should/would, past tense all persons sing. of 'zullen'
of             |  or, whether, if
wat            |  what, something, anything
mijn           |  possessive and noun 'mine'
men            |  people, 'one'
dit            |  this
zo             |  so, thus, in this way
door           |  through by
over           |  over, across
ze             |  she, her, they, them
zich           |  oneself
bij            |  (1) a bee, (2) by, near, at
ook            |  also, too
tot            |  till, until
je             |  you
mij            |  me
uit            |  out of, from
der            |  Old Dutch form of 'van der' still found in surnames
daar           |  (1) there, (2) because
haar           |  (1) her, their, them, (2) hair
naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as
heb            |  present first person sing. of 'to have'
hoe            |  how, why
heeft          |  present third person sing. of 'to have'
hebben         |  'to have' and various parts thereof
deze           |  this
u              |  you
want           |  (1) for, (2) mitten, (3) rigging
nog            |  yet, still
zal            |  'shall', first and third person sing. of verb 'zullen' (will)
me             |  me
zij            |  she, they
nu             |  now
ge             |  'thou', still used in Belgium and south Netherlands
geen           |  none
omdat          |  because
iets           |  something, somewhat
worden         |  to become, grow, get
toch           |  yet, still
al             |  all, every, each
waren          |  (1) 'were' (2) to wander, (3) wares, (3)
veel           |  much, many
meer           |  (1) more, (2) lake
doen           |  to do, to make
toen           |  then, when
moet           |  noun 'spot/mote' and present form of 'to must'
ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be'
zonder         |  without
kan            |  noun 'can' and present form of 'to be able'
hun            |  their, them
dus            |  so, consequently
alles          |  all, everything, anything
onder          |  under, beneath
ja             |  yes, of course
eens           |  once, one day
hier           |  here
wie            |  who
werd           |  imperfect third person sing. of 'become'
altijd         |  always
doch           |  yet, but etc
wordt          |  present third person sing. of 'become'
wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans
kunnen         |  to be able
ons            |  us/our
zelf           |  self
tegen          |  against, towards, at
na             |  after, near
reeds          |  already
wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender
kon            |  could; past tense of 'to be able'
niets          |  nothing
uw             |  your
iemand         |  somebody
geweest        |  been; past participle of 'be'
andere         |  other
''')


class SearchDutch(SearchLanguage):
    lang = 'nl'
    language_name = 'Dutch'
    js_stemmer_rawcode = 'dutch-stemmer.js'
    stopwords = dutch_stopwords

    def init(self, options: Dict) -> None:
        self.stemmer = snowballstemmer.stemmer('dutch')

    def stem(self, word: str) -> str:
        return self.stemmer.stemWord(word.lower())