diff options
Diffstat (limited to 'stanza/models/common/pretrain.py')
-rw-r--r-- | stanza/models/common/pretrain.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/stanza/models/common/pretrain.py b/stanza/models/common/pretrain.py index e18accbf..193cc71d 100644 --- a/stanza/models/common/pretrain.py +++ b/stanza/models/common/pretrain.py @@ -20,6 +20,12 @@ class PretrainedWordVocab(BaseVocab): self._id2unit = VOCAB_PREFIX + self.data self._unit2id = {w:i for i, w in enumerate(self._id2unit)} + def normalize_unit(self, unit): + unit = super().normalize_unit(unit) + if unit: + unit = unit.replace(" ","\xa0") + return unit + class Pretrain: """ A loader and saver for pretrained embeddings. """ |