Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2020-05-07 19:06:50 +0300
committerTaku Kudo <taku@google.com>2020-05-07 19:06:50 +0300
commit329383b455a5795f3d182159eb0985a3f20f0fa2 (patch)
treeb5af4450144f7e61f8af456bff9929bff41ca54f /src/unigram_model.h
parent662c7549f0e1110dbb9b015ad7a89be49743fc69 (diff)
Initial release of 0.19. Merged internal sentencepiece.
Diffstat (limited to 'src/unigram_model.h')
-rw-r--r--src/unigram_model.h17
1 files changed, 16 insertions, 1 deletions
diff --git a/src/unigram_model.h b/src/unigram_model.h
index 466a1c2..d67c7c7 100644
--- a/src/unigram_model.h
+++ b/src/unigram_model.h
@@ -20,10 +20,10 @@
#include <utility>
#include <vector>
+#include "builtin_pb/sentencepiece_model.pb.h"
#include "common.h"
#include "freelist.h"
#include "model_interface.h"
-#include "sentencepiece_model.pb.h"
#include "third_party/darts_clone/darts.h"
namespace sentencepiece {
@@ -143,10 +143,25 @@ class Model : public ModelInterface {
// Returns a vocab id of |piece|.
int PieceToId(absl::string_view piece) const override;
+ // Verifies if two outputs are equivalent by comparing their scores.
+ bool VerifyOutputsEquivalent(absl::string_view expected,
+ absl::string_view actual) const override;
+
protected:
// Builds a Trie index.
void BuildTrie(std::vector<std::pair<absl::string_view, int>> *pieces);
+ // The optimized Viterbi encode.
+ // Main differences from the original function:
+ // 1. Memorizes the best path at each postion so far,
+ // 2. No need to store the Lattice nodes,
+ // 3. Works in utf-8 directly,
+ // 4. Defines a new struct with fewer fields than Lattice,
+ // 5. Does not depend on `class Lattice` nor call `SetSentence()`,
+ // `PopulateNodes()`, or `Viterbi()`. It does everything in one function.
+ // For detailed explanations please see the comments inside the function body.
+ EncodeResult EncodeOptimized(absl::string_view normalized) const;
+
float min_score_ = 0.0;
float max_score_ = 0.0;
std::unique_ptr<Darts::DoubleArray> trie_;