diff options
-rw-r--r-- | Jamroot | 2 | ||||
-rw-r--r-- | lm/model.cc | 2 | ||||
-rw-r--r-- | lm/vocab.cc | 4 | ||||
-rw-r--r-- | lm/vocab.hh | 2 |
4 files changed, 6 insertions, 4 deletions
@@ -59,6 +59,8 @@ # # --max-kenlm-order maximum ngram order that kenlm can process (default 6) # +# --max-factors maximum number of factors (default 4) +# #CONTROLLING THE BUILD #-a to build from scratch #-j$NCPUS to compile in parallel diff --git a/lm/model.cc b/lm/model.cc index 40af8a637..2fd204815 100644 --- a/lm/model.cc +++ b/lm/model.cc @@ -87,7 +87,7 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT WriteWordsWrapper wrap(config.enumerate_vocab); vocab_.ConfigureEnumerate(&wrap, counts[0]); search_.InitializeFromARPA(file, f, counts, config, vocab_, backing_); - wrap.Write(backing_.file.get()); + wrap.Write(backing_.file.get(), backing_.vocab.size() + vocab_.UnkCountChangePadding() + backing_.search.size()); } else { vocab_.ConfigureEnumerate(config.enumerate_vocab, counts[0]); search_.InitializeFromARPA(file, f, counts, config, vocab_, backing_); diff --git a/lm/vocab.cc b/lm/vocab.cc index 398475bee..11c27518d 100644 --- a/lm/vocab.cc +++ b/lm/vocab.cc @@ -80,8 +80,8 @@ void WriteWordsWrapper::Add(WordIndex index, const StringPiece &str) { buffer_.push_back(0); } -void WriteWordsWrapper::Write(int fd) { - util::SeekEnd(fd); +void WriteWordsWrapper::Write(int fd, uint64_t start) { + util::SeekOrThrow(fd, start); util::WriteOrThrow(fd, buffer_.data(), buffer_.size()); } diff --git a/lm/vocab.hh b/lm/vocab.hh index 074cd446e..de54eb064 100644 --- a/lm/vocab.hh +++ b/lm/vocab.hh @@ -35,7 +35,7 @@ class WriteWordsWrapper : public EnumerateVocab { void Add(WordIndex index, const StringPiece &str); - void Write(int fd); + void Write(int fd, uint64_t start); private: EnumerateVocab *inner_; |