Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-11-05 00:36:42 +0400
committerKenneth Heafield <github@kheafield.com>2012-11-05 00:36:42 +0400
commit14392acc8f41e3ed9e834573cfab433a54d4b68b (patch)
treed698c74d68505c021d6c37a7c75bca4b69e7aac5
parent96f7b42eb9944cc6fb5173c320562ca2dc5a2229 (diff)
KenLM 36b746 including -w mmap <unk> offset fix
-rw-r--r--lm/model.cc2
-rw-r--r--lm/vocab.cc7
-rw-r--r--lm/vocab.hh5
-rw-r--r--util/file.cc6
-rw-r--r--util/file.hh2
5 files changed, 10 insertions, 12 deletions
diff --git a/lm/model.cc b/lm/model.cc
index 2fd204815..d045954e8 100644
--- a/lm/model.cc
+++ b/lm/model.cc
@@ -87,7 +87,7 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
WriteWordsWrapper wrap(config.enumerate_vocab);
vocab_.ConfigureEnumerate(&wrap, counts[0]);
search_.InitializeFromARPA(file, f, counts, config, vocab_, backing_);
- wrap.Write(backing_.file.get(), backing_.vocab.size() + vocab_.UnkCountChangePadding() + backing_.search.size());
+ wrap.Write(backing_.file.get(), backing_.vocab.size() + vocab_.UnkCountChangePadding() + Search::Size(counts, config));
} else {
vocab_.ConfigureEnumerate(config.enumerate_vocab, counts[0]);
search_.InitializeFromARPA(file, f, counts, config, vocab_, backing_);
diff --git a/lm/vocab.cc b/lm/vocab.cc
index 11c27518d..fd7f96dc4 100644
--- a/lm/vocab.cc
+++ b/lm/vocab.cc
@@ -116,7 +116,9 @@ WordIndex SortedVocabulary::Insert(const StringPiece &str) {
}
*end_ = hashed;
if (enumerate_) {
- strings_to_enumerate_[end_ - begin_].assign(str.data(), str.size());
+ void *copied = string_backing_.Allocate(str.size());
+ memcpy(copied, str.data(), str.size());
+ strings_to_enumerate_[end_ - begin_] = StringPiece(static_cast<const char*>(copied), str.size());
}
++end_;
// This is 1 + the offset where it was inserted to make room for unk.
@@ -126,7 +128,7 @@ WordIndex SortedVocabulary::Insert(const StringPiece &str) {
void SortedVocabulary::FinishedLoading(ProbBackoff *reorder_vocab) {
if (enumerate_) {
if (!strings_to_enumerate_.empty()) {
- util::PairedIterator<ProbBackoff*, std::string*> values(reorder_vocab + 1, &*strings_to_enumerate_.begin());
+ util::PairedIterator<ProbBackoff*, StringPiece*> values(reorder_vocab + 1, &*strings_to_enumerate_.begin());
util::JointSort(begin_, end_, values);
}
for (WordIndex i = 0; i < static_cast<WordIndex>(end_ - begin_); ++i) {
@@ -134,6 +136,7 @@ void SortedVocabulary::FinishedLoading(ProbBackoff *reorder_vocab) {
enumerate_->Add(i + 1, strings_to_enumerate_[i]);
}
strings_to_enumerate_.clear();
+ string_backing_.FreeAll();
} else {
util::JointSort(begin_, end_, reorder_vocab + 1);
}
diff --git a/lm/vocab.hh b/lm/vocab.hh
index de54eb064..3902f1174 100644
--- a/lm/vocab.hh
+++ b/lm/vocab.hh
@@ -4,6 +4,7 @@
#include "lm/enumerate_vocab.hh"
#include "lm/lm_exception.hh"
#include "lm/virtual_interface.hh"
+#include "util/pool.hh"
#include "util/probing_hash_table.hh"
#include "util/sorted_uniform.hh"
#include "util/string_piece.hh"
@@ -96,7 +97,9 @@ class SortedVocabulary : public base::Vocabulary {
EnumerateVocab *enumerate_;
// Actual strings. Used only when loading from ARPA and enumerate_ != NULL
- std::vector<std::string> strings_to_enumerate_;
+ util::Pool string_backing_;
+
+ std::vector<StringPiece> strings_to_enumerate_;
};
#pragma pack(push)
diff --git a/util/file.cc b/util/file.cc
index 4f958bb45..45795483e 100644
--- a/util/file.cc
+++ b/util/file.cc
@@ -172,12 +172,6 @@ std::FILE *FDOpenReadOrThrow(scoped_fd &file) {
return ret;
}
-std::FILE *FOpenOrThrow(const char *path, const char *mode) {
- std::FILE *ret;
- UTIL_THROW_IF(!(ret = fopen(path, mode)), util::ErrnoException, "Could not fopen " << path << " for " << mode);
- return ret;
-}
-
TempMaker::TempMaker(const std::string &prefix) : base_(prefix) {
base_ += "XXXXXX";
}
diff --git a/util/file.hh b/util/file.hh
index c4b95eea9..90255bd5d 100644
--- a/util/file.hh
+++ b/util/file.hh
@@ -93,8 +93,6 @@ void SeekEnd(int fd);
std::FILE *FDOpenOrThrow(scoped_fd &file);
std::FILE *FDOpenReadOrThrow(scoped_fd &file);
-std::FILE *FOpenOrThrow(const char *path, const char *mode);
-
class TempMaker {
public:
explicit TempMaker(const std::string &prefix);