diff options
author | Taku Kudo <taku@google.com> | 2018-06-07 16:44:11 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2018-06-07 18:06:42 +0300 |
commit | 54ccef78b800625a58cbdbac1245d77c9b744e84 (patch) | |
tree | 977467b98b2517e20bfa6df17c5f79df92f6106b /src/unigram_model.h | |
parent | 7edac0b4ef81e94a1fdf041fab03771f943c9643 (diff) |
Support user defined symbols in Char/BPE
Diffstat (limited to 'src/unigram_model.h')
-rw-r--r-- | src/unigram_model.h | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/src/unigram_model.h b/src/unigram_model.h index 0ea054f..50b152a 100644 --- a/src/unigram_model.h +++ b/src/unigram_model.h @@ -125,6 +125,10 @@ class ModelBase : public ModelInterface { // min_score() - 10 is used for the cost of unknown sentence. float min_score() const { return min_score_; } + // Returns the maximum score in sentence pieces. + // max_score() is used for the cost of user defined symbols. + float max_score() const { return max_score_; } + // Populates all sentence pieces to the |lattice|. // After calling this function, lattice.Viterbi() returns the // best segmentation. @@ -137,7 +141,8 @@ class ModelBase : public ModelInterface { // Builds a Trie index. void BuildTrie(std::vector<std::pair<StringPiece, int>> *pieces); - float min_score_; + float min_score_ = 0.0; + float max_score_ = 0.0; std::unique_ptr<Darts::DoubleArray> trie_; // Maximum size of the return value of Trie, which corresponds |