diff options
author | Taku Kudo <taku@google.com> | 2020-05-07 19:06:50 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2020-05-07 19:06:50 +0300 |
commit | 329383b455a5795f3d182159eb0985a3f20f0fa2 (patch) | |
tree | b5af4450144f7e61f8af456bff9929bff41ca54f /src/unigram_model.h | |
parent | 662c7549f0e1110dbb9b015ad7a89be49743fc69 (diff) |
Initial release of 0.19. Merged internal sentencepiece.
Diffstat (limited to 'src/unigram_model.h')
-rw-r--r-- | src/unigram_model.h | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/src/unigram_model.h b/src/unigram_model.h index 466a1c2..d67c7c7 100644 --- a/src/unigram_model.h +++ b/src/unigram_model.h @@ -20,10 +20,10 @@ #include <utility> #include <vector> +#include "builtin_pb/sentencepiece_model.pb.h" #include "common.h" #include "freelist.h" #include "model_interface.h" -#include "sentencepiece_model.pb.h" #include "third_party/darts_clone/darts.h" namespace sentencepiece { @@ -143,10 +143,25 @@ class Model : public ModelInterface { // Returns a vocab id of |piece|. int PieceToId(absl::string_view piece) const override; + // Verifies if two outputs are equivalent by comparing their scores. + bool VerifyOutputsEquivalent(absl::string_view expected, + absl::string_view actual) const override; + protected: // Builds a Trie index. void BuildTrie(std::vector<std::pair<absl::string_view, int>> *pieces); + // The optimized Viterbi encode. + // Main differences from the original function: + // 1. Memorizes the best path at each postion so far, + // 2. No need to store the Lattice nodes, + // 3. Works in utf-8 directly, + // 4. Defines a new struct with fewer fields than Lattice, + // 5. Does not depend on `class Lattice` nor call `SetSentence()`, + // `PopulateNodes()`, or `Viterbi()`. It does everything in one function. + // For detailed explanations please see the comments inside the function body. + EncodeResult EncodeOptimized(absl::string_view normalized) const; + float min_score_ = 0.0; float max_score_ = 0.0; std::unique_ptr<Darts::DoubleArray> trie_; |