diff options
author | Kenneth Heafield <github@kheafield.com> | 2014-02-27 00:57:32 +0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2014-02-27 00:57:32 +0400 |
commit | 40006ff19dea5d783944e4840cba8250ab23be33 (patch) | |
tree | 17260a5df085152ca963f0adaf5a2dc61c2f6619 | |
parent | 2879764b0890cce5a39b88e253b59d7684c0c7d9 (diff) |
Some change to merge linesbounded-noquant
-rw-r--r-- | lm/filter/merge_lines_main.cc | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/lm/filter/merge_lines_main.cc b/lm/filter/merge_lines_main.cc index a58ee42..9e68a80 100644 --- a/lm/filter/merge_lines_main.cc +++ b/lm/filter/merge_lines_main.cc @@ -28,15 +28,21 @@ int main(int argc, char *argv[]) { try { while (true) { util::AutoProbing<Entry, util::IdentityHash> dedupe; for (boost::ptr_vector<util::FilePiece>::iterator i = files.begin(); i != files.end(); ++i) { - while (i->ReadWordSameLine(word)) { - Entry entry; - entry.key = util::MurmurHashNative(word.data(), word.size()); - util::AutoProbing<Entry, util::IdentityHash>::MutableIterator ignored; - if (!dedupe.FindOrInsert(entry, ignored)) { - out << word << ' '; + try { + while (i->ReadWordSameLine(word)) { + Entry entry; + entry.key = util::MurmurHashNative(word.data(), word.size()); + util::AutoProbing<Entry, util::IdentityHash>::MutableIterator ignored; + if (!dedupe.FindOrInsert(entry, ignored)) { + out << word << ' '; + } } + i->ReadLine(); + } catch (const util::EndOfFileException &e) { + if (i == files.begin()) throw; + std::cerr << "File " << i->FileName() << " is shorter than the others." << std::endl; + return 1; } - i->ReadLine(); } out << '\n'; } } catch (const util::EndOfFileException &e) { |