diff options
author | Taku Kudo <taku@google.com> | 2018-02-28 07:14:52 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2018-02-28 07:14:52 +0300 |
commit | c6a1a196651789ba4c0334dbf41d5885b3334b2f (patch) | |
tree | a6e2b3a0e7a71b9d4d21e9f8800dc9b4b3cea94b /src/unigram_model.h | |
parent | ab766cbdaac1332776ae2c457fed9380f500159b (diff) |
Add Sample/NBestEncode
Diffstat (limited to 'src/unigram_model.h')
-rw-r--r-- | src/unigram_model.h | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/src/unigram_model.h b/src/unigram_model.h index 43841fc..bc2941f 100644 --- a/src/unigram_model.h +++ b/src/unigram_model.h @@ -82,6 +82,11 @@ class Lattice { // Returns n-best results. std::vector<std::vector<Node *>> NBest(size_t nbest_size); + // Samples one path from the lattice according to the + // generation probability (Product of piece probabilities). + // `theta` is a smoothing parameter. + std::vector<Node *> Sample(float theta); + // Populates marginal probability of every node in this lattice. // |freq| is the frequency of the sentence. // for (auto *node : all_nodes_) { @@ -141,8 +146,12 @@ class Model : public ModelBase { explicit Model(const ModelProto &model_proto); ~Model() override; - std::vector<std::pair<StringPiece, int>> Encode( - StringPiece normalized) const override; + EncodeResult Encode(StringPiece normalized) const override; + + NBestEncodeResult NBestEncode(StringPiece normalized, + int nbest_size) const override; + + EncodeResult SampleEncode(StringPiece normalized, float theta) const override; }; } // namespace unigram } // namespace sentencepiece |