Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kpu/kenlm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2014-02-18 02:11:13 +0400
committerKenneth Heafield <github@kheafield.com>2014-02-18 02:12:45 +0400
commit8142108fb0f810a135753ab9dc54bc18395df592 (patch)
tree1001cfdd031adbbac47f5abf15be73bbf9dd0d4a
parent30215fc5bb228188de260b914f83f075dac9cd5f (diff)
Fix test, build using BoolCharacter::Build
-rw-r--r--lm/builder/corpus_count.cc6
-rw-r--r--lm/builder/corpus_count_test.cc2
2 files changed, 2 insertions, 6 deletions
diff --git a/lm/builder/corpus_count.cc b/lm/builder/corpus_count.cc
index 7021eaa..b99edd0 100644
--- a/lm/builder/corpus_count.cc
+++ b/lm/builder/corpus_count.cc
@@ -253,11 +253,7 @@ void CorpusCount::Run(const util::stream::ChainPosition &position) {
Writer writer(NGram::OrderFromSize(position.GetChain().EntrySize()), position, dedupe_mem_.get(), dedupe_mem_size_);
uint64_t count = 0;
bool delimiters[256];
- memset(delimiters, 0, sizeof(delimiters));
- const char kDelimiterSet[] = "\0\t\n\r ";
- for (const char *i = kDelimiterSet; i < kDelimiterSet + sizeof(kDelimiterSet); ++i) {
- delimiters[static_cast<unsigned char>(*i)] = true;
- }
+ util::BoolCharacter::Build("\0\t\n\r ", delimiters);
try {
while(true) {
StringPiece line(from_.ReadLine());
diff --git a/lm/builder/corpus_count_test.cc b/lm/builder/corpus_count_test.cc
index 6d325ef..26cb634 100644
--- a/lm/builder/corpus_count_test.cc
+++ b/lm/builder/corpus_count_test.cc
@@ -45,7 +45,7 @@ BOOST_AUTO_TEST_CASE(Short) {
NGramStream stream;
uint64_t token_count;
WordIndex type_count = 10;
- CorpusCount counter(input_piece, vocab.get(), token_count, type_count, chain.BlockSize() / chain.EntrySize());
+ CorpusCount counter(input_piece, vocab.get(), token_count, type_count, chain.BlockSize() / chain.EntrySize(), SILENT);
chain >> boost::ref(counter) >> stream >> util::stream::kRecycle;
const char *v[] = {"<unk>", "<s>", "</s>", "looking", "on", "a", "little", "more", "loin", "foo", "bar"};