Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/LM
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2016-02-20 03:07:48 +0300
committerKenneth Heafield <github@kheafield.com>2016-02-20 03:07:48 +0300
commit7a1baeecda90456532ef54a3c4995082213fc6d0 (patch)
tree893f3f95a4bb4ad755c9d40fe9fd6af7aedd3193 /moses/LM
parent5f06e3310f52923c48326b78ef181eff61ef22f8 (diff)
load= option to KenLM exposing more load_method options
Diffstat (limited to 'moses/LM')
-rw-r--r--moses/LM/Backward.cpp3
-rw-r--r--moses/LM/Ken.cpp102
-rw-r--r--moses/LM/Ken.h7
-rw-r--r--moses/LM/Reloading.h7
4 files changed, 40 insertions, 79 deletions
diff --git a/moses/LM/Backward.cpp b/moses/LM/Backward.cpp
index 2fb7451b5..411f559ca 100644
--- a/moses/LM/Backward.cpp
+++ b/moses/LM/Backward.cpp
@@ -40,7 +40,8 @@ namespace Moses
{
/** Constructs a new backward language model. */
-template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line,file,factorType,lazy)
+// TODO(lane): load_method instead of lazy bool
+template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line,file,factorType, lazy ? util::LAZY : util::POPULATE_OR_READ)
{
//
// This space intentionally left blank
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index c81f3b859..a27940e72 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -69,63 +69,6 @@ struct KenLMState : public FFState {
};
-///*
-// * An implementation of single factor LM using Ken's code.
-// */
-//template <class Model> class LanguageModelKen : public LanguageModel
-//{
-//public:
-// LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
-//
-// const FFState *EmptyHypothesisState(const InputType &/*input*/) const {
-// KenLMState *ret = new KenLMState();
-// ret->state = m_ngram->BeginSentenceState();
-// return ret;
-// }
-//
-// void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
-//
-// FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
-//
-// FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
-//
-// void IncrementalCallback(Incremental::Manager &manager) const {
-// manager.LMCallback(*m_ngram, m_lmIdLookup);
-// }
-//
-// bool IsUseable(const FactorMask &mask) const;
-//private:
-// LanguageModelKen(const LanguageModelKen<Model> &copy_from);
-//
-// lm::WordIndex TranslateID(const Word &word) const {
-// std::size_t factor = word.GetFactor(m_factorType)->GetId();
-// return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
-// }
-//
-// // Convert last words of hypothesis into vocab ids, returning an end pointer.
-// lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
-// lm::WordIndex *index = indices;
-// lm::WordIndex *end = indices + m_ngram->Order() - 1;
-// int position = hypo.GetCurrTargetWordsRange().GetEndPos();
-// for (; ; ++index, --position) {
-// if (index == end) return index;
-// if (position == -1) {
-// *index = m_ngram->GetVocabulary().BeginSentence();
-// return index + 1;
-// }
-// *index = TranslateID(hypo.GetWord(position));
-// }
-// }
-//
-// boost::shared_ptr<Model> m_ngram;
-//
-// std::vector<lm::WordIndex> m_lmIdLookup;
-//
-// FactorType m_factorType;
-//
-// const Factor *m_beginSentenceFactor;
-//};
-
class MappingBuilder : public lm::EnumerateVocab
{
public:
@@ -148,7 +91,7 @@ private:
} // namespace
-template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string &file, bool lazy)
+template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string &file, util::LoadMethod load_method)
{
lm::ngram::Config config;
if(this->m_verbosity >= 1) {
@@ -159,19 +102,19 @@ template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string
FactorCollection &collection = FactorCollection::Instance();
MappingBuilder builder(collection, m_lmIdLookup);
config.enumerate_vocab = &builder;
- config.load_method = lazy ? util::LAZY : util::POPULATE_OR_READ;
+ config.load_method = load_method;
m_ngram.reset(new Model(file.c_str(), config));
m_beginSentenceFactor = collection.AddFactor(BOS_);
}
-template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
:LanguageModel(line)
,m_factorType(factorType)
{
ReadParameters();
- LoadModel(file, lazy);
+ LoadModel(file, load_method);
}
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> &copy_from)
@@ -479,7 +422,7 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig)
{
FactorType factorType = 0;
string filePath;
- bool lazy = false;
+ util::LoadMethod load_method = util::POPULATE_OR_READ;
util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
++argument; // KENLM
@@ -500,38 +443,53 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig)
} else if (name == "path") {
filePath.assign(value.data(), value.size());
} else if (name == "lazyken") {
- lazy = boost::lexical_cast<bool>(value);
+ // deprecated: use load instead.
+ load_method = boost::lexical_cast<bool>(value) ? util::LAZY : util::POPULATE_OR_READ;
+ } else if (name == "load") {
+ if (value == "lazy") {
+ load_method = util::LAZY;
+ } else if (value == "populate_or_lazy") {
+ load_method = util::POPULATE_OR_LAZY;
+ } else if (value == "populate_or_read" || value == "populate") {
+ load_method = util::POPULATE_OR_READ;
+ } else if (value == "read") {
+ load_method = util::READ;
+ } else if (value == "parallel_read") {
+ load_method = util::PARALLEL_READ;
+ } else {
+ UTIL_THROW2("Unknown KenLM load method " << value);
+ }
} else {
// pass to base class to interpret
line << " " << name << "=" << value;
}
}
- return ConstructKenLM(line.str(), filePath, factorType, lazy);
+ return ConstructKenLM(line.str(), filePath, factorType, load_method);
}
-LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
{
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
switch(model_type) {
case lm::ngram::PROBING:
- return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, load_method);
case lm::ngram::REST_PROBING:
- return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, load_method);
case lm::ngram::TRIE:
- return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, load_method);
case lm::ngram::QUANT_TRIE:
- return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, load_method);
case lm::ngram::ARRAY_TRIE:
- return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, load_method);
case lm::ngram::QUANT_ARRAY_TRIE:
- return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, load_method);
default:
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
}
} else {
- return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, load_method);
}
}
diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h
index 3a94e4c0b..4934228c2 100644
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <boost/shared_ptr.hpp>
#include "lm/word_index.hh"
+#include "util/mmap.hh"
#include "moses/LM/Base.h"
#include "moses/Hypothesis.h"
@@ -41,7 +42,7 @@ class FFState;
LanguageModel *ConstructKenLM(const std::string &line);
//! This will also load. Returns a templated KenLM class
-LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
+LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method);
/*
* An implementation of single factor LM using Kenneth's code.
@@ -49,7 +50,7 @@ LanguageModel *ConstructKenLM(const std::string &line, const std::string &file,
template <class Model> class LanguageModelKen : public LanguageModel
{
public:
- LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
+ LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method);
virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
@@ -73,7 +74,7 @@ protected:
FactorType m_factorType;
- void LoadModel(const std::string &file, bool lazy);
+ void LoadModel(const std::string &file, util::LoadMethod load_method);
lm::WordIndex TranslateID(const Word &word) const {
std::size_t factor = word.GetFactor(m_factorType)->GetId();
diff --git a/moses/LM/Reloading.h b/moses/LM/Reloading.h
index 3993fe9d7..d5ae83d17 100644
--- a/moses/LM/Reloading.h
+++ b/moses/LM/Reloading.h
@@ -64,8 +64,8 @@ private:
template <class Model> class ReloadingLanguageModel : public LanguageModelKen<Model>
{
public:
-
- ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy), m_file(file), m_lazy(lazy) {
+ // TODO(Lane) copy less code, update to load_method
+ ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) {
std::cerr << "ReloadingLM constructor: " << m_file << std::endl;
// std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
@@ -74,7 +74,8 @@ public:
virtual void InitializeForInput(ttasksptr const& ttask) {
std::cerr << "ReloadingLM InitializeForInput" << std::endl;
- LanguageModelKen<Model>::LoadModel(m_file, m_lazy);
+ // TODO(lane): load_method
+ LanguageModelKen<Model>::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ);
/*
lm::ngram::Config config;
if(this->m_verbosity >= 1) {