Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-02-28 22:58:00 +0400
committerKenneth Heafield <github@kheafield.com>2012-02-28 22:58:00 +0400
commite48de47c2381547f78f4dbd89f4fa3e76ba0c6bf (patch)
treecdcbb888209bee7dd9c02a7d678cce4262c35416 /lm/binary_format.cc
parent7927979298644923cf02ad6c757c3d7c209e365a (diff)
KenLM 98814b2 including faster malloc-backed building and portability improvements
Diffstat (limited to 'lm/binary_format.cc')
-rw-r--r--lm/binary_format.cc46
1 files changed, 29 insertions, 17 deletions
diff --git a/lm/binary_format.cc b/lm/binary_format.cc
index ab0166a65..4796f6d1b 100644
--- a/lm/binary_format.cc
+++ b/lm/binary_format.cc
@@ -87,7 +87,7 @@ uint8_t *SetupJustVocab(const Config &config, uint8_t order, std::size_t memory_
strncpy(reinterpret_cast<char*>(backing.vocab.get()), kMagicIncomplete, TotalHeaderSize(order));
return reinterpret_cast<uint8_t*>(backing.vocab.get()) + TotalHeaderSize(order);
} else {
- backing.vocab.reset(util::MapAnonymous(memory_size), memory_size, util::scoped_memory::MMAP_ALLOCATED);
+ util::MapAnonymous(memory_size, backing.vocab);
return reinterpret_cast<uint8_t*>(backing.vocab.get());
}
}
@@ -103,32 +103,44 @@ uint8_t *GrowForSearch(const Config &config, std::size_t vocab_pad, std::size_t
throw e;
}
+ if (config.write_method == Config::WRITE_AFTER) {
+ util::MapAnonymous(memory_size, backing.search);
+ return reinterpret_cast<uint8_t*>(backing.search.get());
+ }
+ // mmap it now.
// We're skipping over the header and vocab for the search space mmap. mmap likes page aligned offsets, so some arithmetic to round the offset down.
std::size_t page_size = util::SizePage();
std::size_t alignment_cruft = adjusted_vocab % page_size;
backing.search.reset(util::MapOrThrow(alignment_cruft + memory_size, true, util::kFileFlags, false, backing.file.get(), adjusted_vocab - alignment_cruft), alignment_cruft + memory_size, util::scoped_memory::MMAP_ALLOCATED);
-
return reinterpret_cast<uint8_t*>(backing.search.get()) + alignment_cruft;
} else {
- backing.search.reset(util::MapAnonymous(memory_size), memory_size, util::scoped_memory::MMAP_ALLOCATED);
+ util::MapAnonymous(memory_size, backing.search);
return reinterpret_cast<uint8_t*>(backing.search.get());
}
}
-void FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts, Backing &backing) {
- if (config.write_mmap) {
- util::SyncOrThrow(backing.search.get(), backing.search.size());
- util::SyncOrThrow(backing.vocab.get(), backing.vocab.size());
- // header and vocab share the same mmap. The header is written here because we know the counts.
- Parameters params = Parameters();
- params.counts = counts;
- params.fixed.order = counts.size();
- params.fixed.probing_multiplier = config.probing_multiplier;
- params.fixed.model_type = model_type;
- params.fixed.has_vocabulary = config.include_vocab;
- params.fixed.search_version = search_version;
- WriteHeader(backing.vocab.get(), params);
+void FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector<uint64_t> &counts, std::size_t vocab_pad, Backing &backing) {
+ if (!config.write_mmap) return;
+ util::SyncOrThrow(backing.vocab.get(), backing.vocab.size());
+ switch (config.write_method) {
+ case Config::WRITE_MMAP:
+ util::SyncOrThrow(backing.search.get(), backing.search.size());
+ break;
+ case Config::WRITE_AFTER:
+ util::SeekOrThrow(backing.file.get(), backing.vocab.size() + vocab_pad);
+ util::WriteOrThrow(backing.file.get(), backing.search.get(), backing.search.size());
+ util::FSyncOrThrow(backing.file.get());
+ break;
}
+ // header and vocab share the same mmap. The header is written here because we know the counts.
+ Parameters params = Parameters();
+ params.counts = counts;
+ params.fixed.order = counts.size();
+ params.fixed.probing_multiplier = config.probing_multiplier;
+ params.fixed.model_type = model_type;
+ params.fixed.has_vocabulary = config.include_vocab;
+ params.fixed.search_version = search_version;
+ WriteHeader(backing.vocab.get(), params);
}
namespace detail {
@@ -172,7 +184,7 @@ void ReadHeader(int fd, Parameters &out) {
UTIL_THROW(FormatLoadException, "Binary format claims to have a probing multiplier of " << out.fixed.probing_multiplier << " which is < 1.0.");
out.counts.resize(static_cast<std::size_t>(out.fixed.order));
- util::ReadOrThrow(fd, &*out.counts.begin(), sizeof(uint64_t) * out.fixed.order);
+ if (out.fixed.order) util::ReadOrThrow(fd, &*out.counts.begin(), sizeof(uint64_t) * out.fixed.order);
}
void MatchCheck(ModelType model_type, unsigned int search_version, const Parameters &params) {