Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-02-28 07:14:52 +0300
committerTaku Kudo <taku@google.com>2018-02-28 07:14:52 +0300
commitc6a1a196651789ba4c0334dbf41d5885b3334b2f (patch)
treea6e2b3a0e7a71b9d4d21e9f8800dc9b4b3cea94b /src/unigram_model.h
parentab766cbdaac1332776ae2c457fed9380f500159b (diff)
Add Sample/NBestEncode
Diffstat (limited to 'src/unigram_model.h')
-rw-r--r--src/unigram_model.h13
1 files changed, 11 insertions, 2 deletions
diff --git a/src/unigram_model.h b/src/unigram_model.h
index 43841fc..bc2941f 100644
--- a/src/unigram_model.h
+++ b/src/unigram_model.h
@@ -82,6 +82,11 @@ class Lattice {
// Returns n-best results.
std::vector<std::vector<Node *>> NBest(size_t nbest_size);
+ // Samples one path from the lattice according to the
+ // generation probability (Product of piece probabilities).
+ // `theta` is a smoothing parameter.
+ std::vector<Node *> Sample(float theta);
+
// Populates marginal probability of every node in this lattice.
// |freq| is the frequency of the sentence.
// for (auto *node : all_nodes_) {
@@ -141,8 +146,12 @@ class Model : public ModelBase {
explicit Model(const ModelProto &model_proto);
~Model() override;
- std::vector<std::pair<StringPiece, int>> Encode(
- StringPiece normalized) const override;
+ EncodeResult Encode(StringPiece normalized) const override;
+
+ NBestEncodeResult NBestEncode(StringPiece normalized,
+ int nbest_size) const override;
+
+ EncodeResult SampleEncode(StringPiece normalized, float theta) const override;
};
} // namespace unigram
} // namespace sentencepiece