Add InMemoryPerSentenceOnDemandLM

author: Lane Schwartz <dowobeha@gmail.com> 2017-01-02 21:57:52 +0300
committer: Lane Schwartz <dowobeha@gmail.com> 2017-01-02 21:57:52 +0300
commit: 578e65298f365b7844665d5f2a0f9e298c832ae7 (patch)
tree: fc0ccd66f7a39308eba6ce7fc8d1afe71cc6d1c8 /moses
parent: 999d6b6371437862e9309c5bcfe5ccf78a9782ab (diff)
7 files changed, 246 insertions, 2 deletions
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index a048410d0..9ae145504 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -68,6 +68,7 @@
 #include "moses/FF/SkeletonStatelessFF.h"
 #include "moses/FF/SkeletonStatefulFF.h"
 #include "moses/LM/SkeletonLM.h"
+#include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
 #include "moses/FF/SkeletonTranslationOptionListFeature.h"
 #include "moses/LM/BilingualLM.h"
 #include "moses/TranslationModel/SkeletonPT.h"
@@ -299,6 +300,7 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME(SkeletonStatelessFF);
   MOSES_FNAME(SkeletonStatefulFF);
   MOSES_FNAME(SkeletonLM);
+  MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
   MOSES_FNAME(SkeletonTranslationOptionListFeature);
   MOSES_FNAME(SkeletonPT);
 
diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp
index eb67100ca..c0a69994d 100644
--- a/moses/LM/Implementation.cpp
+++ b/moses/LM/Implementation.cpp
@@ -61,7 +61,7 @@ void LanguageModelImplementation::ShiftOrPush(std::vector<const Word*> &contextF
 {
   if (contextFactor.size() < GetNGramOrder()) {
     contextFactor.push_back(&word);
-  } else {
+  } else if (GetNGramOrder() > 0) {
     // shift
     for (size_t currNGramOrder = 0 ; currNGramOrder < GetNGramOrder() - 1 ; currNGramOrder++) {
       contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];
diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
new file mode 100644
index 000000000..12ef78f4e
--- /dev/null
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
@@ -0,0 +1,91 @@
+#include <boost/foreach.hpp>
+#include "InMemoryPerSentenceOnDemandLM.h"
+#include "moses/FactorCollection.h"
+#include "moses/Util.h"
+#include "moses/StaticData.h"
+#include "moses/TranslationTask.h"
+#include "moses/ContextScope.h"
+#include "moses/LM/Ken.h"
+#include "lm/model.hh"
+#include "util/mmap.hh"
+
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+namespace Moses
+{
+  InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false)
+{
+  ReadParameters();
+}
+
+InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM()
+{
+}
+
+void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
+
+  // The context scope object for this translation task
+  //     contains a map of translation task-specific data
+  boost::shared_ptr<Moses::ContextScope> contextScope = ttask->GetScope();
+
+  // The key to the map is this object
+  void const* key = static_cast<void const*>(this);
+
+  // The value stored in the map is a string representing a phrase table
+  boost::shared_ptr<string> value = contextScope->get<string>(key);
+
+  // Create a stream to read the phrase table data
+  stringstream strme(*(value.get()));
+
+  char * nullpointer = (char *) 0;
+  const char * filename = std::tmpnam(nullpointer);
+  ofstream tmp;
+  tmp.open(filename);
+
+  // Read the phrase table data, one line at a time
+  string line;
+  while (getline(strme, line)) {
+
+    tmp << line << "\n";
+
+  }
+
+  tmp.close();
+
+  LanguageModelKen<lm::ngram::ProbingModel> & lm = GetPerThreadLM();
+  lm.LoadModel("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm", util::POPULATE_OR_READ);
+
+  initialized = true;
+
+  VERBOSE(1, filename);
+  if (initialized) {
+    VERBOSE(1, "\tLM initialized\n"); 
+  }
+
+  //  std::remove(filename);
+
+}
+
+LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const {
+
+  LanguageModelKen<lm::ngram::ProbingModel> *lm;
+  lm = m_perThreadLM.get();
+  if (lm == NULL) {
+    lm = new LanguageModelKen<lm::ngram::ProbingModel>();
+    m_perThreadLM.reset(lm);
+  }
+  assert(lm);
+  return *lm;
+
+}
+
+
+
+}
+
+
+
diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.h b/moses/LM/InMemoryPerSentenceOnDemandLM.h
new file mode 100644
index 000000000..f0c1effa7
--- /dev/null
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h
@@ -0,0 +1,135 @@
+// $Id$
+#pragma once
+
+#include <vector>
+#include "SingleFactor.h"
+#include <boost/thread/tss.hpp>
+#include "lm/model.hh"
+#include "moses/LM/Ken.h"
+#include "moses/FF/FFState.h"
+
+namespace Moses
+{
+
+struct InMemoryPerSentenceOnDemandLMState : public FFState {
+  lm::ngram::State state;
+  virtual size_t hash() const {
+    size_t ret = hash_value(state);
+    return ret;
+  }
+  virtual bool operator==(const FFState& o) const {
+    const InMemoryPerSentenceOnDemandLMState &other = static_cast<const InMemoryPerSentenceOnDemandLMState &>(o);
+    bool ret = state == other.state;
+    return ret;
+  }
+
+};
+
+class InMemoryPerSentenceOnDemandLM : public LanguageModel
+{
+public:
+  InMemoryPerSentenceOnDemandLM(const std::string &line);
+  ~InMemoryPerSentenceOnDemandLM();
+
+  void InitializeForInput(ttasksptr const& ttask);
+
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    GetPerThreadLM().SetParameter(key, value);
+  }
+
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+    if (initialized) {
+      return GetPerThreadLM().EmptyHypothesisState(input);
+    } else {
+      return new InMemoryPerSentenceOnDemandLMState();
+    }
+  }
+
+  virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
+    if (initialized) {
+      return GetPerThreadLM().EvaluateWhenApplied(hypo, ps, out);
+    } else {
+      UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
+    }
+  }
+
+  virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const {
+    if (initialized) {
+      return GetPerThreadLM().EvaluateWhenApplied(cur_hypo, featureID, accumulator);
+    } else {
+      UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
+    }
+  }
+
+  virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const {
+    if (initialized) {
+      return GetPerThreadLM().EvaluateWhenApplied(hyperedge, featureID, accumulator);
+    } else {
+      UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
+    }
+  }
+
+
+  virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
+    if (initialized) {
+      GetPerThreadLM().CalcScore(phrase, fullScore, ngramScore, oovCount);
+    }
+  }
+
+  virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
+    if (initialized) {
+      GetPerThreadLM().CalcScoreFromCache(phrase, fullScore, ngramScore, oovCount);
+    }
+  }
+
+  virtual void IssueRequestsFor(Hypothesis& hypo, const FFState* input_state) {
+    GetPerThreadLM().IssueRequestsFor(hypo, input_state);
+  }
+
+  virtual void sync() {
+    GetPerThreadLM().sync();
+  }
+ 
+  virtual void SetFFStateIdx(int state_idx) {
+    if (initialized) {
+      GetPerThreadLM().SetFFStateIdx(state_idx);
+    }
+  }
+
+  virtual void IncrementalCallback(Incremental::Manager &manager) const {
+    if (initialized) {
+      GetPerThreadLM().IncrementalCallback(manager);
+    }
+  }
+
+  virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const {
+    if (initialized) {
+      GetPerThreadLM().ReportHistoryOrder(out, phrase);
+    }
+  }
+  
+  virtual void EvaluateInIsolation(const Phrase &source
+                                   , const TargetPhrase &targetPhrase
+                                   , ScoreComponentCollection &scoreBreakdown
+                                   , ScoreComponentCollection &estimatedScores) const {
+    if (initialized) {
+      GetPerThreadLM().EvaluateInIsolation(source, targetPhrase, scoreBreakdown, estimatedScores);
+    }
+  }
+
+  bool IsUseable(const FactorMask &mask) const {
+    return GetPerThreadLM().IsUseable(mask);
+  }
+
+
+protected:
+  LanguageModelKen<lm::ngram::ProbingModel> & GetPerThreadLM() const;
+
+  mutable boost::thread_specific_ptr<LanguageModelKen<lm::ngram::ProbingModel> > m_perThreadLM;
+
+  bool initialized;
+
+};
+
+
+}
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 75b66603c..4eafbd632 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -138,7 +138,7 @@ if $(with-dalm) {
 
 #Top-level LM library.  If you've added a file that doesn't depend on external
 #libraries, put it here.  
-alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp 
+alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
   ../../lm//kenlm ..//headers $(dependencies) ;
 
 alias macros : : : : <define>$(lmmacros) ;
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index c7ac663cc..e42e60274 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -105,6 +105,7 @@ template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string
   config.load_method = load_method;
 
   m_ngram.reset(new Model(file.c_str(), config));
+  VERBOSE(2, "LanguageModelKen " << m_description << " reset to " << file << "\n");
 }
 
 template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
@@ -116,6 +117,15 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
   LoadModel(file, load_method);
 }
 
+template <class Model> LanguageModelKen<Model>::LanguageModelKen()
+  :LanguageModel("KENLM")
+  ,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_))
+  ,m_factorType(0)
+{
+  ReadParameters();
+}
+
+
 template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> &copy_from)
   :LanguageModel(copy_from.GetArgLine()),
    m_ngram(copy_from.m_ngram),
diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h
index 4934228c2..33590d659 100644
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@@ -33,11 +33,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "moses/TypeDef.h"
 #include "moses/Word.h"
 
+
+
 namespace Moses
 {
 
 //class LanguageModel;
 class FFState;
+class InMemoryPerSentenceOnDemandLM;
 
 LanguageModel *ConstructKenLM(const std::string &line);
 
@@ -67,6 +70,8 @@ public:
 
   virtual bool IsUseable(const FactorMask &mask) const;
 
+  friend class InMemoryPerSentenceOnDemandLM;
+
 protected:
   boost::shared_ptr<Model> m_ngram;
 
@@ -84,6 +89,7 @@ protected:
   std::vector<lm::WordIndex> m_lmIdLookup;
 
 private:
+  LanguageModelKen();
   LanguageModelKen(const LanguageModelKen<Model> &copy_from);
 
   // Convert last words of hypothesis into vocab ids, returning an end pointer.
author	Lane Schwartz <dowobeha@gmail.com>	2017-01-02 21:57:52 +0300
committer	Lane Schwartz <dowobeha@gmail.com>	2017-01-02 21:57:52 +0300
commit	578e65298f365b7844665d5f2a0f9e298c832ae7 (patch)
tree	fc0ccd66f7a39308eba6ce7fc8d1afe71cc6d1c8 /moses
parent	999d6b6371437862e9309c5bcfe5ccf78a9782ab (diff)