Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/LM
diff options
context:
space:
mode:
authorLane Schwartz <Lane Schwartz>2016-03-23 18:26:07 +0300
committerLane Schwartz <Lane Schwartz>2016-03-23 18:26:07 +0300
commit167def1d528221291ef97a9cee25d5f751e8c530 (patch)
tree221657a15bb261d9ce55d8ffbea6da7978d8c4dc /moses/LM
parent8893524339f15f99e3833785f893885e3c416e66 (diff)
parent4c07496eb2c5d21ca238abdcad11fad7b93657d4 (diff)
Merge branch 'master' of github.com:moses-smt/mosesdecoder
Diffstat (limited to 'moses/LM')
-rw-r--r--moses/LM/Backward.cpp3
-rw-r--r--moses/LM/Ken.cpp102
-rw-r--r--moses/LM/Ken.h7
-rw-r--r--moses/LM/Reloading.cpp2
-rw-r--r--moses/LM/Reloading.h22
5 files changed, 48 insertions, 88 deletions
diff --git a/moses/LM/Backward.cpp b/moses/LM/Backward.cpp
index 2fb7451b5..411f559ca 100644
--- a/moses/LM/Backward.cpp
+++ b/moses/LM/Backward.cpp
@@ -40,7 +40,8 @@ namespace Moses
{
/** Constructs a new backward language model. */
-template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line,file,factorType,lazy)
+// TODO(lane): load_method instead of lazy bool
+template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line,file,factorType, lazy ? util::LAZY : util::POPULATE_OR_READ)
{
//
// This space intentionally left blank
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index ef2b03131..7972cc726 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -69,63 +69,6 @@ struct KenLMState : public FFState {
};
-///*
-// * An implementation of single factor LM using Ken's code.
-// */
-//template <class Model> class LanguageModelKen : public LanguageModel
-//{
-//public:
-// LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
-//
-// const FFState *EmptyHypothesisState(const InputType &/*input*/) const {
-// KenLMState *ret = new KenLMState();
-// ret->state = m_ngram->BeginSentenceState();
-// return ret;
-// }
-//
-// void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
-//
-// FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
-//
-// FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
-//
-// void IncrementalCallback(Incremental::Manager &manager) const {
-// manager.LMCallback(*m_ngram, m_lmIdLookup);
-// }
-//
-// bool IsUseable(const FactorMask &mask) const;
-//private:
-// LanguageModelKen(const LanguageModelKen<Model> &copy_from);
-//
-// lm::WordIndex TranslateID(const Word &word) const {
-// std::size_t factor = word.GetFactor(m_factorType)->GetId();
-// return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
-// }
-//
-// // Convert last words of hypothesis into vocab ids, returning an end pointer.
-// lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
-// lm::WordIndex *index = indices;
-// lm::WordIndex *end = indices + m_ngram->Order() - 1;
-// int position = hypo.GetCurrTargetWordsRange().GetEndPos();
-// for (; ; ++index, --position) {
-// if (index == end) return index;
-// if (position == -1) {
-// *index = m_ngram->GetVocabulary().BeginSentence();
-// return index + 1;
-// }
-// *index = TranslateID(hypo.GetWord(position));
-// }
-// }
-//
-// boost::shared_ptr<Model> m_ngram;
-//
-// std::vector<lm::WordIndex> m_lmIdLookup;
-//
-// FactorType m_factorType;
-//
-// const Factor *m_beginSentenceFactor;
-//};
-
class MappingBuilder : public lm::EnumerateVocab
{
public:
@@ -148,7 +91,7 @@ private:
} // namespace
-template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string &file, bool lazy)
+template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string &file, util::LoadMethod load_method)
{
m_lmIdLookup.clear();
@@ -161,18 +104,18 @@ template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string
FactorCollection &collection = FactorCollection::Instance();
MappingBuilder builder(collection, m_lmIdLookup);
config.enumerate_vocab = &builder;
- config.load_method = lazy ? util::LAZY : util::POPULATE_OR_READ;
+ config.load_method = load_method;
m_ngram.reset(new Model(file.c_str(), config));
}
-template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
:LanguageModel(line)
,m_factorType(factorType)
,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_))
{
ReadParameters();
- LoadModel(file, lazy);
+ LoadModel(file, load_method);
}
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> &copy_from)
@@ -480,7 +423,7 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig)
{
FactorType factorType = 0;
string filePath;
- bool lazy = false;
+ util::LoadMethod load_method = util::POPULATE_OR_READ;
util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
++argument; // KENLM
@@ -501,38 +444,53 @@ LanguageModel *ConstructKenLM(const std::string &lineOrig)
} else if (name == "path") {
filePath.assign(value.data(), value.size());
} else if (name == "lazyken") {
- lazy = boost::lexical_cast<bool>(value);
+ // deprecated: use load instead.
+ load_method = boost::lexical_cast<bool>(value) ? util::LAZY : util::POPULATE_OR_READ;
+ } else if (name == "load") {
+ if (value == "lazy") {
+ load_method = util::LAZY;
+ } else if (value == "populate_or_lazy") {
+ load_method = util::POPULATE_OR_LAZY;
+ } else if (value == "populate_or_read" || value == "populate") {
+ load_method = util::POPULATE_OR_READ;
+ } else if (value == "read") {
+ load_method = util::READ;
+ } else if (value == "parallel_read") {
+ load_method = util::PARALLEL_READ;
+ } else {
+ UTIL_THROW2("Unknown KenLM load method " << value);
+ }
} else {
// pass to base class to interpret
line << " " << name << "=" << value;
}
}
- return ConstructKenLM(line.str(), filePath, factorType, lazy);
+ return ConstructKenLM(line.str(), filePath, factorType, load_method);
}
-LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
{
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
switch(model_type) {
case lm::ngram::PROBING:
- return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, load_method);
case lm::ngram::REST_PROBING:
- return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, load_method);
case lm::ngram::TRIE:
- return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, load_method);
case lm::ngram::QUANT_TRIE:
- return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, load_method);
case lm::ngram::ARRAY_TRIE:
- return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, load_method);
case lm::ngram::QUANT_ARRAY_TRIE:
- return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, load_method);
default:
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
}
} else {
- return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+ return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, load_method);
}
}
diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h
index 3a94e4c0b..4934228c2 100644
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <boost/shared_ptr.hpp>
#include "lm/word_index.hh"
+#include "util/mmap.hh"
#include "moses/LM/Base.h"
#include "moses/Hypothesis.h"
@@ -41,7 +42,7 @@ class FFState;
LanguageModel *ConstructKenLM(const std::string &line);
//! This will also load. Returns a templated KenLM class
-LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
+LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method);
/*
* An implementation of single factor LM using Kenneth's code.
@@ -49,7 +50,7 @@ LanguageModel *ConstructKenLM(const std::string &line, const std::string &file,
template <class Model> class LanguageModelKen : public LanguageModel
{
public:
- LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
+ LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method);
virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
@@ -73,7 +74,7 @@ protected:
FactorType m_factorType;
- void LoadModel(const std::string &file, bool lazy);
+ void LoadModel(const std::string &file, util::LoadMethod load_method);
lm::WordIndex TranslateID(const Word &word) const {
std::size_t factor = word.GetFactor(m_factorType)->GetId();
diff --git a/moses/LM/Reloading.cpp b/moses/LM/Reloading.cpp
index dc4eaadb6..0f9d80a70 100644
--- a/moses/LM/Reloading.cpp
+++ b/moses/LM/Reloading.cpp
@@ -73,7 +73,7 @@ template <class Model> FFState *ReloadingLanguageModel<Model>::EvaluateWhenAppli
std::auto_ptr<FFState> kenlmState(LanguageModelKen<Model>::EvaluateWhenApplied(hypo, ps, out));
const lm::ngram::State &out_state = static_cast<const ReloadingLMState&>(*kenlmState).state;
-
+
std::auto_ptr<ReloadingLMState> ret(new ReloadingLMState());
ret->state = out_state;
diff --git a/moses/LM/Reloading.h b/moses/LM/Reloading.h
index 315b1bee2..d5ae83d17 100644
--- a/moses/LM/Reloading.h
+++ b/moses/LM/Reloading.h
@@ -64,18 +64,18 @@ private:
template <class Model> class ReloadingLanguageModel : public LanguageModelKen<Model>
{
public:
-
- ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy), m_file(file), m_lazy(lazy)
- {
+ // TODO(Lane) copy less code, update to load_method
+ ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) {
std::cerr << "ReloadingLM constructor: " << m_file << std::endl;
// std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
-
+
}
- virtual void InitializeForInput(ttasksptr const& ttask) {
+ virtual void InitializeForInput(ttasksptr const& ttask) {
std::cerr << "ReloadingLM InitializeForInput" << std::endl;
- LanguageModelKen<Model>::LoadModel(m_file, m_lazy);
+ // TODO(lane): load_method
+ LanguageModelKen<Model>::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ);
/*
lm::ngram::Config config;
if(this->m_verbosity >= 1) {
@@ -87,15 +87,15 @@ public:
MappingBuilder builder(collection, m_lmIdLookup);
config.enumerate_vocab = &builder;
config.load_method = m_lazy ? util::LAZY : util::POPULATE_OR_READ;
-
+
m_ngram.reset(new Model(m_file.c_str(), config));
-
+
m_beginSentenceFactor = collection.AddFactor(BOS_);
*/
};
/*
- ReloadingLanguageModel(const std::string &line) : LanguageModelKen<Model>(ConstructKenLM(std::string(line).replace(0,11,"KENLM"))) {
+ ReloadingLanguageModel(const std::string &line) : LanguageModelKen<Model>(ConstructKenLM(std::string(line).replace(0,11,"KENLM"))) {
std::cerr << "ReloadingLM constructor" << std::endl;
std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
}
@@ -138,12 +138,12 @@ public:
}
-private:
+ private:
LanguageModel *m_lm;
*/
- protected:
+protected:
using LanguageModelKen<Model>::m_ngram;
using LanguageModelKen<Model>::m_lmIdLookup;