Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kpu/kenlm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2014-04-26 01:01:53 +0400
committerKenneth Heafield <github@kheafield.com>2014-04-26 01:01:53 +0400
commit5d4ad1e4fc1cdfb75f9c2d018579e0e9d700870c (patch)
treee62a98d0a20e922251b026398fc4cb23610cf8aa
parent40006ff19dea5d783944e4840cba8250ab23be33 (diff)
Generate q files with ngramspaul
-rw-r--r--lm/builder/interpolate.cc8
1 files changed, 5 insertions, 3 deletions
diff --git a/lm/builder/interpolate.cc b/lm/builder/interpolate.cc
index 6e99250..5074dcb 100644
--- a/lm/builder/interpolate.cc
+++ b/lm/builder/interpolate.cc
@@ -25,10 +25,10 @@ class Callback {
full_backoffs_.push_back(backoffs[i]);
}
util::stream::ChainConfig write_qs;
- write_qs.entry_size = 4;
write_qs.total_memory = 1048576;
write_qs.block_count = 2;
for (std::size_t i = 0; i < backoffs.size() + 1; ++i) {
+ write_qs.entry_size = (i + 1) * sizeof(WordIndex) + 4;
std::string file("q");
file += boost::lexical_cast<std::string>(i + 1);
q_files_.push_back(util::CreateOrThrow(file.c_str()));
@@ -74,8 +74,10 @@ class Callback {
// Not a context.
pay.complete.backoff = ngram::kNoExtensionBackoff;
}
- // Write the q value ot to a stream.
- *static_cast<float*>(q_out_[order_minus_1].Get()) = q_delta_[order_minus_1] * actual_prob;
+ // Write the q value to a stream with n-grams.
+ std::size_t gram_size = sizeof(WordIndex) * gram.Order();
+ memcpy(q_out_[order_minus_1].Get(), gram.begin(), gram_size);
+ *reinterpret_cast<float*>(static_cast<uint8_t*>(q_out_[order_minus_1].Get()) + gram_size) = q_delta_[order_minus_1] * actual_prob;
++q_out_[order_minus_1];
binarize_.Enter(order_minus_1, gram);
}