Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kpu/kenlm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2014-02-27 00:57:32 +0400
committerKenneth Heafield <github@kheafield.com>2014-02-27 00:57:32 +0400
commit40006ff19dea5d783944e4840cba8250ab23be33 (patch)
tree17260a5df085152ca963f0adaf5a2dc61c2f6619
parent2879764b0890cce5a39b88e253b59d7684c0c7d9 (diff)
Some change to merge linesbounded-noquant
-rw-r--r--lm/filter/merge_lines_main.cc20
1 files changed, 13 insertions, 7 deletions
diff --git a/lm/filter/merge_lines_main.cc b/lm/filter/merge_lines_main.cc
index a58ee42..9e68a80 100644
--- a/lm/filter/merge_lines_main.cc
+++ b/lm/filter/merge_lines_main.cc
@@ -28,15 +28,21 @@ int main(int argc, char *argv[]) {
try { while (true) {
util::AutoProbing<Entry, util::IdentityHash> dedupe;
for (boost::ptr_vector<util::FilePiece>::iterator i = files.begin(); i != files.end(); ++i) {
- while (i->ReadWordSameLine(word)) {
- Entry entry;
- entry.key = util::MurmurHashNative(word.data(), word.size());
- util::AutoProbing<Entry, util::IdentityHash>::MutableIterator ignored;
- if (!dedupe.FindOrInsert(entry, ignored)) {
- out << word << ' ';
+ try {
+ while (i->ReadWordSameLine(word)) {
+ Entry entry;
+ entry.key = util::MurmurHashNative(word.data(), word.size());
+ util::AutoProbing<Entry, util::IdentityHash>::MutableIterator ignored;
+ if (!dedupe.FindOrInsert(entry, ignored)) {
+ out << word << ' ';
+ }
}
+ i->ReadLine();
+ } catch (const util::EndOfFileException &e) {
+ if (i == files.begin()) throw;
+ std::cerr << "File " << i->FileName() << " is shorter than the others." << std::endl;
+ return 1;
}
- i->ReadLine();
}
out << '\n';
} } catch (const util::EndOfFileException &e) {