diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2014-11-12 01:57:25 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2014-11-12 01:57:25 +0300 |
commit | 5dfaf9244a4bbf16514a822b3d242e4e732b552c (patch) | |
tree | fb7f9631a8780eb78705b092f9b04edb004abfa0 | |
parent | 454c695cfb029ede789fe1edd80f87ff332fe664 (diff) |
some more cleaning
-rw-r--r-- | lm/builder/adjust_counts.cc | 6 | ||||
-rw-r--r-- | lm/builder/initial_probabilities.cc | 3 | ||||
-rw-r--r-- | lm/builder/initial_probabilities.hh | 2 | ||||
-rw-r--r-- | lm/builder/interpolate.cc | 2 | ||||
-rw-r--r-- | lm/builder/pipeline.cc | 3 | ||||
-rw-r--r-- | util/stream/chain.cc | 2 |
6 files changed, 6 insertions, 12 deletions
diff --git a/lm/builder/adjust_counts.cc b/lm/builder/adjust_counts.cc index 47ff76c..bbe6e37 100644 --- a/lm/builder/adjust_counts.cc +++ b/lm/builder/adjust_counts.cc @@ -267,13 +267,9 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) { uint64_t lower_order = (*lower_valid)->Order(); uint64_t lower_count = lower_counts[lower_order - 1]; - if(lower_order > 1 && lower_count <= prune_thresholds_[lower_order - 1]) + if(lower_count <= prune_thresholds_[lower_order - 1] && (lower_order > 1 || (lower_order == 1 && *(*lower_valid)->begin() > 2))) (*lower_valid)->Mark(); - // Do not prune unigrams <unk> <s> </s> - if(lower_order == 1 && *(*lower_valid)->begin() > 2 && lower_count <= prune_thresholds_[0]) - (*lower_valid)->Mark(); - if(!prune_words_.empty()) { for(WordIndex* i = (*lower_valid)->begin(); i != (*lower_valid)->end(); i++) { if(prune_words_[*i]) { diff --git a/lm/builder/initial_probabilities.cc b/lm/builder/initial_probabilities.cc index 74844a2..4376593 100644 --- a/lm/builder/initial_probabilities.cc +++ b/lm/builder/initial_probabilities.cc @@ -51,8 +51,7 @@ class PruneNGramStream { PruneNGramStream &operator++() { assert(block_); - WordIndex w = *current_.begin(); - if(current_.Order() == 1 && (w == kBOS || w == kEOS || w == kUNK)) + if(current_.Order() == 1 && *current_.begin() <= 2) dest_.NextInMemory(); else if(currentCount_ > 0) { if(dest_.Base() < current_.Base()) { diff --git a/lm/builder/initial_probabilities.hh b/lm/builder/initial_probabilities.hh index c421ff5..57e09cd 100644 --- a/lm/builder/initial_probabilities.hh +++ b/lm/builder/initial_probabilities.hh @@ -34,7 +34,7 @@ void InitialProbabilities( util::stream::Chains &second_in, util::stream::Chains &gamma_out, const std::vector<uint64_t> &prune_thresholds, - bool vocabPruning); + bool prune_vocab); } // namespace builder } // namespace lm diff --git a/lm/builder/interpolate.cc b/lm/builder/interpolate.cc index 8675165..0251221 100644 --- a/lm/builder/interpolate.cc +++ b/lm/builder/interpolate.cc @@ -96,7 +96,7 @@ template <class Output> class Callback { float out_backoff; if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS) { - if(prune_vocab_ || prune_thresholds_[order_minus_1 + 1] > 0) { + if(prune_vocab_ || prune_thresholds_[order_minus_1] > 0) { //Compute hash value for current context uint64_t current_hash = util::MurmurHashNative(gram.begin(), gram.Order() * sizeof(WordIndex)); diff --git a/lm/builder/pipeline.cc b/lm/builder/pipeline.cc index 6b368b8..7768770 100644 --- a/lm/builder/pipeline.cc +++ b/lm/builder/pipeline.cc @@ -272,8 +272,7 @@ void InterpolateProbabilities(const std::vector<uint64_t> &counts, Master &maste for (std::size_t i = 0; i < config.order - 1; ++i) { util::stream::ChainConfig read_backoffs(config.read_backoffs); - // Add 1 because here we are skipping unigrams - if(config.prune_vocab || config.prune_thresholds[i + 1] > 0) + if(config.prune_vocab || config.prune_thresholds[i] > 0) read_backoffs.entry_size = sizeof(HashGamma); else read_backoffs.entry_size = sizeof(float); diff --git a/util/stream/chain.cc b/util/stream/chain.cc index 91c24e2..ce29e42 100644 --- a/util/stream/chain.cc +++ b/util/stream/chain.cc @@ -128,7 +128,7 @@ Link::~Link() { if (current_) { // Probably an exception unwinding. std::cerr << "Last input should have been poison." << std::endl; - abort(); + // abort(); } else { if (!poisoned_) { // Poison is a block whose memory pointer is NULL. |