Welcome to mirror list, hosted at ThFree Co, Russian Federation.

sentencepiece.py « python - github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3320b9781baa8e24efb464633243173711ccce35 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# This file was automatically generated by SWIG (http://www.swig.org).
# Version 3.0.10
#
# Do not make changes to this file unless you know what you are doing--modify
# the SWIG interface file instead.





from sys import version_info as _swig_python_version_info
if _swig_python_version_info >= (2, 7, 0):
    def swig_import_helper():
        import importlib
        pkg = __name__.rpartition('.')[0]
        mname = '.'.join((pkg, '_sentencepiece')).lstrip('.')
        try:
            return importlib.import_module(mname)
        except ImportError:
            return importlib.import_module('_sentencepiece')
    _sentencepiece = swig_import_helper()
    del swig_import_helper
elif _swig_python_version_info >= (2, 6, 0):
    def swig_import_helper():
        from os.path import dirname
        import imp
        fp = None
        try:
            fp, pathname, description = imp.find_module('_sentencepiece', [dirname(__file__)])
        except ImportError:
            import _sentencepiece
            return _sentencepiece
        if fp is not None:
            try:
                _mod = imp.load_module('_sentencepiece', fp, pathname, description)
            finally:
                fp.close()
            return _mod
    _sentencepiece = swig_import_helper()
    del swig_import_helper
else:
    import _sentencepiece
del _swig_python_version_info
try:
    _swig_property = property
except NameError:
    pass  # Python < 2.2 doesn't have 'property'.

try:
    import builtins as __builtin__
except ImportError:
    import __builtin__

def _swig_setattr_nondynamic(self, class_type, name, value, static=1):
    if (name == "thisown"):
        return self.this.own(value)
    if (name == "this"):
        if type(value).__name__ == 'SwigPyObject':
            self.__dict__[name] = value
            return
    method = class_type.__swig_setmethods__.get(name, None)
    if method:
        return method(self, value)
    if (not static):
        if _newclass:
            object.__setattr__(self, name, value)
        else:
            self.__dict__[name] = value
    else:
        raise AttributeError("You cannot add attributes to %s" % self)


def _swig_setattr(self, class_type, name, value):
    return _swig_setattr_nondynamic(self, class_type, name, value, 0)


def _swig_getattr(self, class_type, name):
    if (name == "thisown"):
        return self.this.own()
    method = class_type.__swig_getmethods__.get(name, None)
    if method:
        return method(self)
    raise AttributeError("'%s' object has no attribute '%s'" % (class_type.__name__, name))


def _swig_repr(self):
    try:
        strthis = "proxy of " + self.this.__repr__()
    except __builtin__.Exception:
        strthis = ""
    return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,)

try:
    _object = object
    _newclass = 1
except __builtin__.Exception:
    class _object:
        pass
    _newclass = 0

class SentencePieceProcessor(_object):
    __swig_setmethods__ = {}
    __setattr__ = lambda self, name, value: _swig_setattr(self, SentencePieceProcessor, name, value)
    __swig_getmethods__ = {}
    __getattr__ = lambda self, name: _swig_getattr(self, SentencePieceProcessor, name)
    __repr__ = _swig_repr

    def __init__(self):
        this = _sentencepiece.new_SentencePieceProcessor()
        try:
            self.this.append(this)
        except __builtin__.Exception:
            self.this = this
    __swig_destroy__ = _sentencepiece.delete_SentencePieceProcessor
    __del__ = lambda self: None

    def Load(self, filename):
        return _sentencepiece.SentencePieceProcessor_Load(self, filename)

    def LoadOrDie(self, filename):
        return _sentencepiece.SentencePieceProcessor_LoadOrDie(self, filename)

    def LoadFromSerializedProto(self, serialized):
        return _sentencepiece.SentencePieceProcessor_LoadFromSerializedProto(self, serialized)

    def SetEncodeExtraOptions(self, extra_option):
        return _sentencepiece.SentencePieceProcessor_SetEncodeExtraOptions(self, extra_option)

    def SetDecodeExtraOptions(self, extra_option):
        return _sentencepiece.SentencePieceProcessor_SetDecodeExtraOptions(self, extra_option)

    def SetVocabulary(self, valid_vocab):
        return _sentencepiece.SentencePieceProcessor_SetVocabulary(self, valid_vocab)

    def ResetVocabulary(self):
        return _sentencepiece.SentencePieceProcessor_ResetVocabulary(self)

    def LoadVocabulary(self, filename, threshold):
        return _sentencepiece.SentencePieceProcessor_LoadVocabulary(self, filename, threshold)

    def EncodeAsPieces(self, input):
        return _sentencepiece.SentencePieceProcessor_EncodeAsPieces(self, input)

    def EncodeAsIds(self, input):
        return _sentencepiece.SentencePieceProcessor_EncodeAsIds(self, input)

    def NBestEncodeAsPieces(self, input, nbest_size):
        return _sentencepiece.SentencePieceProcessor_NBestEncodeAsPieces(self, input, nbest_size)

    def NBestEncodeAsIds(self, input, nbest_size):
        return _sentencepiece.SentencePieceProcessor_NBestEncodeAsIds(self, input, nbest_size)

    def SampleEncodeAsPieces(self, input, nbest_size, alpha):
        return _sentencepiece.SentencePieceProcessor_SampleEncodeAsPieces(self, input, nbest_size, alpha)

    def SampleEncodeAsIds(self, input, nbest_size, alpha):
        return _sentencepiece.SentencePieceProcessor_SampleEncodeAsIds(self, input, nbest_size, alpha)

    def DecodePieces(self, pieces):
        return _sentencepiece.SentencePieceProcessor_DecodePieces(self, pieces)

    def DecodeIds(self, ids):
        return _sentencepiece.SentencePieceProcessor_DecodeIds(self, ids)

    def GetPieceSize(self):
        return _sentencepiece.SentencePieceProcessor_GetPieceSize(self)

    def PieceToId(self, piece):
        return _sentencepiece.SentencePieceProcessor_PieceToId(self, piece)

    def IdToPiece(self, id):
        return _sentencepiece.SentencePieceProcessor_IdToPiece(self, id)

    def GetScore(self, id):
        return _sentencepiece.SentencePieceProcessor_GetScore(self, id)

    def IsUnknown(self, id):
        return _sentencepiece.SentencePieceProcessor_IsUnknown(self, id)

    def IsControl(self, id):
        return _sentencepiece.SentencePieceProcessor_IsControl(self, id)

    def IsUnused(self, id):
        return _sentencepiece.SentencePieceProcessor_IsUnused(self, id)

    def unk_id(self):
        return _sentencepiece.SentencePieceProcessor_unk_id(self)

    def bos_id(self):
        return _sentencepiece.SentencePieceProcessor_bos_id(self)

    def eos_id(self):
        return _sentencepiece.SentencePieceProcessor_eos_id(self)

    def pad_id(self):
        return _sentencepiece.SentencePieceProcessor_pad_id(self)

    def load(self, filename):
        return _sentencepiece.SentencePieceProcessor_load(self, filename)

    def load_from_serialized_proto(self, filename):
        return _sentencepiece.SentencePieceProcessor_load_from_serialized_proto(self, filename)

    def set_encode_extra_options(self, extra_option):
        return _sentencepiece.SentencePieceProcessor_set_encode_extra_options(self, extra_option)

    def set_decode_extra_options(self, extra_option):
        return _sentencepiece.SentencePieceProcessor_set_decode_extra_options(self, extra_option)

    def set_vocabulary(self, valid_vocab):
        return _sentencepiece.SentencePieceProcessor_set_vocabulary(self, valid_vocab)

    def reset_vocabulary(self):
        return _sentencepiece.SentencePieceProcessor_reset_vocabulary(self)

    def load_vocabulary(self, filename, threshold):
        return _sentencepiece.SentencePieceProcessor_load_vocabulary(self, filename, threshold)

    def encode_as_pieces(self, input):
        return _sentencepiece.SentencePieceProcessor_encode_as_pieces(self, input)

    def encode_as_ids(self, input):
        return _sentencepiece.SentencePieceProcessor_encode_as_ids(self, input)

    def nbest_encode_as_pieces(self, input, nbest_size):
        return _sentencepiece.SentencePieceProcessor_nbest_encode_as_pieces(self, input, nbest_size)

    def nbest_encode_as_ids(self, input, nbest_size):
        return _sentencepiece.SentencePieceProcessor_nbest_encode_as_ids(self, input, nbest_size)

    def sample_encode_as_pieces(self, input, nbest_size, alpha):
        return _sentencepiece.SentencePieceProcessor_sample_encode_as_pieces(self, input, nbest_size, alpha)

    def sample_encode_as_ids(self, input, nbest_size, alpha):
        return _sentencepiece.SentencePieceProcessor_sample_encode_as_ids(self, input, nbest_size, alpha)

    def decode_pieces(self, input):
        return _sentencepiece.SentencePieceProcessor_decode_pieces(self, input)

    def decode_ids(self, input):
        return _sentencepiece.SentencePieceProcessor_decode_ids(self, input)

    def get_piece_size(self):
        return _sentencepiece.SentencePieceProcessor_get_piece_size(self)

    def piece_to_id(self, piece):
        return _sentencepiece.SentencePieceProcessor_piece_to_id(self, piece)

    def id_to_piece(self, id):
        return _sentencepiece.SentencePieceProcessor_id_to_piece(self, id)

    def get_score(self, id):
        return _sentencepiece.SentencePieceProcessor_get_score(self, id)

    def is_unknown(self, id):
        return _sentencepiece.SentencePieceProcessor_is_unknown(self, id)

    def is_control(self, id):
        return _sentencepiece.SentencePieceProcessor_is_control(self, id)

    def is_unused(self, id):
        return _sentencepiece.SentencePieceProcessor_is_unused(self, id)

    def __len__(self):
        return _sentencepiece.SentencePieceProcessor___len__(self)

    def __getitem__(self, key):
        return _sentencepiece.SentencePieceProcessor___getitem__(self, key)
SentencePieceProcessor_swigregister = _sentencepiece.SentencePieceProcessor_swigregister
SentencePieceProcessor_swigregister(SentencePieceProcessor)

class SentencePieceTrainer(_object):
    __swig_setmethods__ = {}
    __setattr__ = lambda self, name, value: _swig_setattr(self, SentencePieceTrainer, name, value)
    __swig_getmethods__ = {}
    __getattr__ = lambda self, name: _swig_getattr(self, SentencePieceTrainer, name)

    def __init__(self, *args, **kwargs):
        raise AttributeError("No constructor defined")
    __repr__ = _swig_repr
    if _newclass:
        Train = staticmethod(_sentencepiece.SentencePieceTrainer_Train)
    else:
        Train = _sentencepiece.SentencePieceTrainer_Train
    if _newclass:
        train = staticmethod(_sentencepiece.SentencePieceTrainer_train)
    else:
        train = _sentencepiece.SentencePieceTrainer_train
SentencePieceTrainer_swigregister = _sentencepiece.SentencePieceTrainer_swigregister
SentencePieceTrainer_swigregister(SentencePieceTrainer)

def SentencePieceTrainer_Train(args):
    return _sentencepiece.SentencePieceTrainer_Train(args)
SentencePieceTrainer_Train = _sentencepiece.SentencePieceTrainer_Train

def SentencePieceTrainer_train(args):
    return _sentencepiece.SentencePieceTrainer_train(args)
SentencePieceTrainer_train = _sentencepiece.SentencePieceTrainer_train

# This file is compatible with both classic and new-style classes.