Add Sample/NBestEncode

author: Taku Kudo <taku@google.com> 2018-02-28 07:14:52 +0300
committer: Taku Kudo <taku@google.com> 2018-02-28 07:14:52 +0300
commit: c6a1a196651789ba4c0334dbf41d5885b3334b2f (patch)
tree: a6e2b3a0e7a71b9d4d21e9f8800dc9b4b3cea94b /src/unigram_model.h
parent: ab766cbdaac1332776ae2c457fed9380f500159b (diff)
1 files changed, 11 insertions, 2 deletions
diff --git a/src/unigram_model.h b/src/unigram_model.h
index 43841fc..bc2941f 100644
--- a/src/unigram_model.h
+++ b/src/unigram_model.h
@@ -82,6 +82,11 @@ class Lattice {
   // Returns n-best results.
   std::vector<std::vector<Node *>> NBest(size_t nbest_size);
 
+  // Samples one path from the lattice according to the
+  // generation probability (Product of piece probabilities).
+  // `theta` is a smoothing parameter.
+  std::vector<Node *> Sample(float theta);
+
   // Populates marginal probability of every node in this lattice.
   // |freq| is the frequency of the sentence.
   //  for (auto *node : all_nodes_) {
@@ -141,8 +146,12 @@ class Model : public ModelBase {
   explicit Model(const ModelProto &model_proto);
   ~Model() override;
 
-  std::vector<std::pair<StringPiece, int>> Encode(
-      StringPiece normalized) const override;
+  EncodeResult Encode(StringPiece normalized) const override;
+
+  NBestEncodeResult NBestEncode(StringPiece normalized,
+                                int nbest_size) const override;
+
+  EncodeResult SampleEncode(StringPiece normalized, float theta) const override;
 };
 }  // namespace unigram
 }  // namespace sentencepiece
author	Taku Kudo <taku@google.com>	2018-02-28 07:14:52 +0300
committer	Taku Kudo <taku@google.com>	2018-02-28 07:14:52 +0300
commit	c6a1a196651789ba4c0334dbf41d5885b3334b2f (patch)
tree	a6e2b3a0e7a71b9d4d21e9f8800dc9b4b3cea94b /src/unigram_model.h
parent	ab766cbdaac1332776ae2c457fed9380f500159b (diff)