Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'stanza/models/common/pretrain.py')
-rw-r--r--stanza/models/common/pretrain.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/stanza/models/common/pretrain.py b/stanza/models/common/pretrain.py
index e18accbf..193cc71d 100644
--- a/stanza/models/common/pretrain.py
+++ b/stanza/models/common/pretrain.py
@@ -20,6 +20,12 @@ class PretrainedWordVocab(BaseVocab):
self._id2unit = VOCAB_PREFIX + self.data
self._unit2id = {w:i for i, w in enumerate(self._id2unit)}
+ def normalize_unit(self, unit):
+ unit = super().normalize_unit(unit)
+ if unit:
+ unit = unit.replace(" ","\xa0")
+ return unit
+
class Pretrain:
""" A loader and saver for pretrained embeddings. """