Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kpu/kenlm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2014-02-24 11:46:16 +0400
committerKenneth Heafield <github@kheafield.com>2014-02-24 11:46:16 +0400
commit5c868f8294729bdf68991a93c37b2544d5624631 (patch)
treeb9618f108a11ca363d165725925fdbd46720ffb0
parenta43451030b6d352293f74a33cde0ea30b2d0608a (diff)
Remove debug statements, add progress bar
-rw-r--r--lm/filter/merge_lines_main.cc14
1 files changed, 6 insertions, 8 deletions
diff --git a/lm/filter/merge_lines_main.cc b/lm/filter/merge_lines_main.cc
index b764615..a58ee42 100644
--- a/lm/filter/merge_lines_main.cc
+++ b/lm/filter/merge_lines_main.cc
@@ -14,27 +14,25 @@ struct Entry {
};
int main(int argc, char *argv[]) {
- boost::ptr_vector<util::FilePiece> files;
- for (int i = 1; i < argc; ++i) {
- files.push_back(new util::FilePiece(argv[i]));
- }
- if (files.empty()) {
+ if (argc < 2) {
std::cerr << "Provide a list of vocabulary files to merge on the command line." << std::endl;
return 1;
}
+ boost::ptr_vector<util::FilePiece> files;
+ files.push_back(new util::FilePiece(argv[1], &std::cerr));
+ for (int i = 2; i < argc; ++i) {
+ files.push_back(new util::FilePiece(argv[i]));
+ }
util::FakeOFStream out(1);
StringPiece word;
try { while (true) {
- std::cerr << "New dedupe" << std::endl;
util::AutoProbing<Entry, util::IdentityHash> dedupe;
for (boost::ptr_vector<util::FilePiece>::iterator i = files.begin(); i != files.end(); ++i) {
while (i->ReadWordSameLine(word)) {
- std::cerr << "Read " << word << std::endl;
Entry entry;
entry.key = util::MurmurHashNative(word.data(), word.size());
util::AutoProbing<Entry, util::IdentityHash>::MutableIterator ignored;
if (!dedupe.FindOrInsert(entry, ignored)) {
- std::cerr << "Identified word " << word << std::endl;
out << word << ' ';
}
}