diff options
author | Kenneth Heafield <github@kheafield.com> | 2014-02-18 02:11:13 +0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2014-02-18 02:12:45 +0400 |
commit | 8142108fb0f810a135753ab9dc54bc18395df592 (patch) | |
tree | 1001cfdd031adbbac47f5abf15be73bbf9dd0d4a | |
parent | 30215fc5bb228188de260b914f83f075dac9cd5f (diff) |
Fix test, build using BoolCharacter::Build
-rw-r--r-- | lm/builder/corpus_count.cc | 6 | ||||
-rw-r--r-- | lm/builder/corpus_count_test.cc | 2 |
2 files changed, 2 insertions, 6 deletions
diff --git a/lm/builder/corpus_count.cc b/lm/builder/corpus_count.cc index 7021eaa..b99edd0 100644 --- a/lm/builder/corpus_count.cc +++ b/lm/builder/corpus_count.cc @@ -253,11 +253,7 @@ void CorpusCount::Run(const util::stream::ChainPosition &position) { Writer writer(NGram::OrderFromSize(position.GetChain().EntrySize()), position, dedupe_mem_.get(), dedupe_mem_size_); uint64_t count = 0; bool delimiters[256]; - memset(delimiters, 0, sizeof(delimiters)); - const char kDelimiterSet[] = "\0\t\n\r "; - for (const char *i = kDelimiterSet; i < kDelimiterSet + sizeof(kDelimiterSet); ++i) { - delimiters[static_cast<unsigned char>(*i)] = true; - } + util::BoolCharacter::Build("\0\t\n\r ", delimiters); try { while(true) { StringPiece line(from_.ReadLine()); diff --git a/lm/builder/corpus_count_test.cc b/lm/builder/corpus_count_test.cc index 6d325ef..26cb634 100644 --- a/lm/builder/corpus_count_test.cc +++ b/lm/builder/corpus_count_test.cc @@ -45,7 +45,7 @@ BOOST_AUTO_TEST_CASE(Short) { NGramStream stream; uint64_t token_count; WordIndex type_count = 10; - CorpusCount counter(input_piece, vocab.get(), token_count, type_count, chain.BlockSize() / chain.EntrySize()); + CorpusCount counter(input_piece, vocab.get(), token_count, type_count, chain.BlockSize() / chain.EntrySize(), SILENT); chain >> boost::ref(counter) >> stream >> util::stream::kRecycle; const char *v[] = {"<unk>", "<s>", "</s>", "looking", "on", "a", "little", "more", "loin", "foo", "bar"}; |