diff options
author | Kenneth Heafield <github@kheafield.com> | 2014-08-25 16:21:15 +0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2014-08-25 16:21:15 +0400 |
commit | 77e60573d4f782a45dcbec82222af092e7fb86b9 (patch) | |
tree | e0e8e68ddcbeaf9884579b56eaa383bb72d388f6 | |
parent | 280f5c4f16231e1391a8cfe0621d01ea52773b85 (diff) | |
parent | c661551a0ab07ab37660e6cf43fb73a2d16ab1d8 (diff) |
Merge branch 'master' of github.com:kpu/kenlm
-rw-r--r-- | lm/builder/initial_probabilities.cc | 2 | ||||
-rw-r--r-- | lm/builder/interpolate.cc | 16 |
2 files changed, 11 insertions, 7 deletions
diff --git a/lm/builder/initial_probabilities.cc b/lm/builder/initial_probabilities.cc index f6ee334..4f7b0a2 100644 --- a/lm/builder/initial_probabilities.cc +++ b/lm/builder/initial_probabilities.cc @@ -183,7 +183,7 @@ class AddRight { entry.gamma += normalizer; entry.gamma /= entry.denominator; - + if(pruning_) { // If pruning is enabled the stream actually contains HashBufferEntry, see InitialProbabilities(...), // so add a hash value that identifies the current ngram. diff --git a/lm/builder/interpolate.cc b/lm/builder/interpolate.cc index db85374..83f6755 100644 --- a/lm/builder/interpolate.cc +++ b/lm/builder/interpolate.cc @@ -25,6 +25,11 @@ class Callback { ~Callback() { for (std::size_t i = 0; i < backoffs_.size(); ++i) { + + if(prune_thresholds_[i + 1] > 0) + while(backoffs_[i]) + ++backoffs_[i]; + if (backoffs_[i]) { std::cerr << "Backoffs do not match for order " << (i + 1) << std::endl; abort(); @@ -36,7 +41,7 @@ class Callback { Payload &pay = gram.Value(); pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1]; probs_[order_minus_1 + 1] = pay.complete.prob; - pay.complete.prob = log10(pay.complete.prob); + pay.complete.prob = log10(pay.complete.prob); if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS) { // This skips over ngrams if backoffs have been exhausted. @@ -48,13 +53,12 @@ class Callback { if(prune_thresholds_[order_minus_1 + 1] > 0) { //Compute hash value for current context uint64_t current_hash = util::MurmurHashNative(gram.begin(), gram.Order() * sizeof(WordIndex)); - + const HashGamma *hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get()); - while(backoffs_[order_minus_1] && current_hash != hashed_backoff->hash_value) { - hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get()); - ++backoffs_[order_minus_1]; - } + while(current_hash != hashed_backoff->hash_value && ++backoffs_[order_minus_1]) + hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get()); + if(current_hash == hashed_backoff->hash_value) { pay.complete.backoff = log10(hashed_backoff->gamma); ++backoffs_[order_minus_1]; |