Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kpu/kenlm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2014-11-12 01:57:25 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2014-11-12 01:57:25 +0300
commit5dfaf9244a4bbf16514a822b3d242e4e732b552c (patch)
treefb7f9631a8780eb78705b092f9b04edb004abfa0
parent454c695cfb029ede789fe1edd80f87ff332fe664 (diff)
some more cleaning
-rw-r--r--lm/builder/adjust_counts.cc6
-rw-r--r--lm/builder/initial_probabilities.cc3
-rw-r--r--lm/builder/initial_probabilities.hh2
-rw-r--r--lm/builder/interpolate.cc2
-rw-r--r--lm/builder/pipeline.cc3
-rw-r--r--util/stream/chain.cc2
6 files changed, 6 insertions, 12 deletions
diff --git a/lm/builder/adjust_counts.cc b/lm/builder/adjust_counts.cc
index 47ff76c..bbe6e37 100644
--- a/lm/builder/adjust_counts.cc
+++ b/lm/builder/adjust_counts.cc
@@ -267,13 +267,9 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
uint64_t lower_order = (*lower_valid)->Order();
uint64_t lower_count = lower_counts[lower_order - 1];
- if(lower_order > 1 && lower_count <= prune_thresholds_[lower_order - 1])
+ if(lower_count <= prune_thresholds_[lower_order - 1] && (lower_order > 1 || (lower_order == 1 && *(*lower_valid)->begin() > 2)))
(*lower_valid)->Mark();
- // Do not prune unigrams <unk> <s> </s>
- if(lower_order == 1 && *(*lower_valid)->begin() > 2 && lower_count <= prune_thresholds_[0])
- (*lower_valid)->Mark();
-
if(!prune_words_.empty()) {
for(WordIndex* i = (*lower_valid)->begin(); i != (*lower_valid)->end(); i++) {
if(prune_words_[*i]) {
diff --git a/lm/builder/initial_probabilities.cc b/lm/builder/initial_probabilities.cc
index 74844a2..4376593 100644
--- a/lm/builder/initial_probabilities.cc
+++ b/lm/builder/initial_probabilities.cc
@@ -51,8 +51,7 @@ class PruneNGramStream {
PruneNGramStream &operator++() {
assert(block_);
- WordIndex w = *current_.begin();
- if(current_.Order() == 1 && (w == kBOS || w == kEOS || w == kUNK))
+ if(current_.Order() == 1 && *current_.begin() <= 2)
dest_.NextInMemory();
else if(currentCount_ > 0) {
if(dest_.Base() < current_.Base()) {
diff --git a/lm/builder/initial_probabilities.hh b/lm/builder/initial_probabilities.hh
index c421ff5..57e09cd 100644
--- a/lm/builder/initial_probabilities.hh
+++ b/lm/builder/initial_probabilities.hh
@@ -34,7 +34,7 @@ void InitialProbabilities(
util::stream::Chains &second_in,
util::stream::Chains &gamma_out,
const std::vector<uint64_t> &prune_thresholds,
- bool vocabPruning);
+ bool prune_vocab);
} // namespace builder
} // namespace lm
diff --git a/lm/builder/interpolate.cc b/lm/builder/interpolate.cc
index 8675165..0251221 100644
--- a/lm/builder/interpolate.cc
+++ b/lm/builder/interpolate.cc
@@ -96,7 +96,7 @@ template <class Output> class Callback {
float out_backoff;
if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS) {
- if(prune_vocab_ || prune_thresholds_[order_minus_1 + 1] > 0) {
+ if(prune_vocab_ || prune_thresholds_[order_minus_1] > 0) {
//Compute hash value for current context
uint64_t current_hash = util::MurmurHashNative(gram.begin(), gram.Order() * sizeof(WordIndex));
diff --git a/lm/builder/pipeline.cc b/lm/builder/pipeline.cc
index 6b368b8..7768770 100644
--- a/lm/builder/pipeline.cc
+++ b/lm/builder/pipeline.cc
@@ -272,8 +272,7 @@ void InterpolateProbabilities(const std::vector<uint64_t> &counts, Master &maste
for (std::size_t i = 0; i < config.order - 1; ++i) {
util::stream::ChainConfig read_backoffs(config.read_backoffs);
- // Add 1 because here we are skipping unigrams
- if(config.prune_vocab || config.prune_thresholds[i + 1] > 0)
+ if(config.prune_vocab || config.prune_thresholds[i] > 0)
read_backoffs.entry_size = sizeof(HashGamma);
else
read_backoffs.entry_size = sizeof(float);
diff --git a/util/stream/chain.cc b/util/stream/chain.cc
index 91c24e2..ce29e42 100644
--- a/util/stream/chain.cc
+++ b/util/stream/chain.cc
@@ -128,7 +128,7 @@ Link::~Link() {
if (current_) {
// Probably an exception unwinding.
std::cerr << "Last input should have been poison." << std::endl;
- abort();
+ // abort();
} else {
if (!poisoned_) {
// Poison is a block whose memory pointer is NULL.