diff options
author | Kenneth Heafield <github@kheafield.com> | 2014-02-24 11:46:16 +0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2014-02-24 11:46:16 +0400 |
commit | 5c868f8294729bdf68991a93c37b2544d5624631 (patch) | |
tree | b9618f108a11ca363d165725925fdbd46720ffb0 | |
parent | a43451030b6d352293f74a33cde0ea30b2d0608a (diff) |
Remove debug statements, add progress bar
-rw-r--r-- | lm/filter/merge_lines_main.cc | 14 |
1 files changed, 6 insertions, 8 deletions
diff --git a/lm/filter/merge_lines_main.cc b/lm/filter/merge_lines_main.cc index b764615..a58ee42 100644 --- a/lm/filter/merge_lines_main.cc +++ b/lm/filter/merge_lines_main.cc @@ -14,27 +14,25 @@ struct Entry { }; int main(int argc, char *argv[]) { - boost::ptr_vector<util::FilePiece> files; - for (int i = 1; i < argc; ++i) { - files.push_back(new util::FilePiece(argv[i])); - } - if (files.empty()) { + if (argc < 2) { std::cerr << "Provide a list of vocabulary files to merge on the command line." << std::endl; return 1; } + boost::ptr_vector<util::FilePiece> files; + files.push_back(new util::FilePiece(argv[1], &std::cerr)); + for (int i = 2; i < argc; ++i) { + files.push_back(new util::FilePiece(argv[i])); + } util::FakeOFStream out(1); StringPiece word; try { while (true) { - std::cerr << "New dedupe" << std::endl; util::AutoProbing<Entry, util::IdentityHash> dedupe; for (boost::ptr_vector<util::FilePiece>::iterator i = files.begin(); i != files.end(); ++i) { while (i->ReadWordSameLine(word)) { - std::cerr << "Read " << word << std::endl; Entry entry; entry.key = util::MurmurHashNative(word.data(), word.size()); util::AutoProbing<Entry, util::IdentityHash>::MutableIterator ignored; if (!dedupe.FindOrInsert(entry, ignored)) { - std::cerr << "Identified word " << word << std::endl; out << word << ' '; } } |