diff options
author | Kenneth Heafield <github@kheafield.com> | 2014-01-28 04:51:35 +0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2014-01-28 04:51:35 +0400 |
commit | 14e02978fcbbe9ec6ed3faa232fcb3f30664f40e (patch) | |
tree | 0b0d19a9c1ba1020ae6b3f1230b8b5b9e6a945f9 /lm/search_hashed.cc | |
parent | b68a906fdd97c344ce7d6fa90a2db5a79c5853e6 (diff) |
KenLM 5cc905bc2d214efa7de2db56a9a672b749a95591
Avoid unspecified behavior of mmap when a file is resized reported by Christian Hardmeier
Fixes for Mavericks and a workaround for Boost's broken semaphore
Clean clang compile (of kenlm)
Merged some of 744376b3fbebc41c4a270bf549826d5eb9219ae0 but also undid some of it because it was just masking a fundaemntal problem with pread rather than working around windows limitations
Diffstat (limited to 'lm/search_hashed.cc')
-rw-r--r-- | lm/search_hashed.cc | 33 |
1 files changed, 19 insertions, 14 deletions
diff --git a/lm/search_hashed.cc b/lm/search_hashed.cc index 62275d277..354a56b46 100644 --- a/lm/search_hashed.cc +++ b/lm/search_hashed.cc @@ -204,9 +204,10 @@ template <class Build, class Activate, class Store> void ReadNGrams( namespace detail { template <class Value> uint8_t *HashedSearch<Value>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) { - std::size_t allocated = Unigram::Size(counts[0]); - unigram_ = Unigram(start, counts[0], allocated); - start += allocated; + unigram_ = Unigram(start, counts[0]); + start += Unigram::Size(counts[0]); + std::size_t allocated; + middle_.clear(); for (unsigned int n = 2; n < counts.size(); ++n) { allocated = Middle::Size(counts[n - 1], config.probing_multiplier); middle_.push_back(Middle(start, allocated)); @@ -218,9 +219,21 @@ template <class Value> uint8_t *HashedSearch<Value>::SetupMemory(uint8_t *start, return start; } -template <class Value> void HashedSearch<Value>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, ProbingVocabulary &vocab, Backing &backing) { - // TODO: fix sorted. - SetupMemory(GrowForSearch(config, vocab.UnkCountChangePadding(), Size(counts, config), backing), counts, config); +/*template <class Value> void HashedSearch<Value>::Relocate(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) { + unigram_ = Unigram(start, counts[0]); + start += Unigram::Size(counts[0]); + for (unsigned int n = 2; n < counts.size(); ++n) { + middle[n-2].Relocate(start); + start += Middle::Size(counts[n - 1], config.probing_multiplier) + } + longest_.Relocate(start); +}*/ + +template <class Value> void HashedSearch<Value>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, ProbingVocabulary &vocab, BinaryFormat &backing) { + void *vocab_rebase; + void *search_base = backing.GrowForSearch(Size(counts, config), vocab.UnkCountChangePadding(), vocab_rebase); + vocab.Relocate(vocab_rebase); + SetupMemory(reinterpret_cast<uint8_t*>(search_base), counts, config); PositiveProbWarn warn(config.positive_log_probability); Read1Grams(f, counts[0], vocab, unigram_.Raw(), warn); @@ -277,14 +290,6 @@ template <class Value> template <class Build> void HashedSearch<Value>::ApplyBui ReadEnd(f); } -template <class Value> void HashedSearch<Value>::LoadedBinary() { - unigram_.LoadedBinary(); - for (typename std::vector<Middle>::iterator i = middle_.begin(); i != middle_.end(); ++i) { - i->LoadedBinary(); - } - longest_.LoadedBinary(); -} - template class HashedSearch<BackoffValue>; template class HashedSearch<RestValue>; |