Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'stanza/models/common/vocab.py')
-rw-r--r--stanza/models/common/vocab.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/stanza/models/common/vocab.py b/stanza/models/common/vocab.py
index 7150af1d..e3e2c300 100644
--- a/stanza/models/common/vocab.py
+++ b/stanza/models/common/vocab.py
@@ -47,6 +47,9 @@ class BaseVocab:
return new
def normalize_unit(self, unit):
+ if unit is None:
+ return unit
+ unit = unit.replace(" ","\xa0")
if self.lower:
return unit.lower()
return unit
@@ -79,7 +82,7 @@ class BaseVocab:
raise TypeError("Vocab key must be one of str, list, or int")
def __contains__(self, key):
- return key in self._unit2id
+ return self.normalize_unit(key) in self._unit2id
@property
def size(self):