Cut the middle men out of the language model interface.

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4348 1f5c12ca-751b-0410-a591-d2e778427230
author: heafield <heafield@1f5c12ca-751b-0410-a591-d2e778427230> 2011-10-13 16:33:05 +0400
committer: heafield <heafield@1f5c12ca-751b-0410-a591-d2e778427230> 2011-10-13 16:33:05 +0400
commit: f08424840530575aaf349fc01397a07678b8cbf5 (patch)
tree: 92e05ed9bb3473d542b4a798738dc0179c3d25f8 /moses
parent: 7d9bc523a6a5f3151254d8bf95d99e3307394173 (diff)
8 files changed, 242 insertions, 338 deletions
diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h
index 66a7fe86a..d05a17b8d 100644
--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@@ -32,7 +32,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "Phrase.h"
 #include "PhraseDictionaryMemory.h"
 #include "GenerationDictionary.h"
-#include "LanguageModelSingleFactor.h"
 #include "ScoreComponentCollection.h"
 #include "InputType.h"
 #include "ObjectPool.h"
diff --git a/moses/src/LanguageModel.cpp b/moses/src/LanguageModel.cpp
index 336d214fc..d3b031268 100644
--- a/moses/src/LanguageModel.cpp
+++ b/moses/src/LanguageModel.cpp
@@ -19,15 +19,7 @@ License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
 
-#include <cassert>
-#include <limits>
-#include <iostream>
-#include <memory>
-#include <sstream>
-
-#include "FFState.h"
 #include "LanguageModel.h"
-#include "LanguageModelImplementation.h"
 #include "TypeDef.h"
 #include "Util.h"
 #include "Manager.h"
@@ -38,41 +30,20 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 using namespace std;
 
-namespace Moses
-{
-LanguageModel::LanguageModel(ScoreIndexManager &scoreIndexManager, LanguageModelImplementation *implementation) :
-  m_implementation(implementation)
-{
+namespace Moses {
+
+LanguageModel::LanguageModel() {
   m_enableOOVFeature = StaticData::Instance().GetLMEnableOOVFeature(); 
-  scoreIndexManager.AddScoreProducer(this);
-#ifndef WITH_THREADS
-  // ref counting handled by boost otherwise
-  m_implementation->IncrementReferenceCount();
-#endif
 }
 
-LanguageModel::LanguageModel(ScoreIndexManager &scoreIndexManager, LanguageModel *loadedLM) :
-  m_implementation(loadedLM->m_implementation)
-{
-  m_enableOOVFeature = StaticData::Instance().GetLMEnableOOVFeature(); 
+void LanguageModel::Init(ScoreIndexManager &scoreIndexManager) {
   scoreIndexManager.AddScoreProducer(this);
-#ifndef WITH_THREADS
-  // ref counting handled by boost otherwise
-  m_implementation->IncrementReferenceCount();
-#endif
 }
 
-LanguageModel::~LanguageModel()
-{
-#ifndef WITH_THREADS
-  if(m_implementation->DecrementReferenceCount() == 0)
-    delete m_implementation;
-#endif
-}
+LanguageModel::~LanguageModel() {}
 
 // don't inline virtual funcs...
-size_t LanguageModel::GetNumScoreComponents() const
-{
+size_t LanguageModel::GetNumScoreComponents() const {
   if (m_enableOOVFeature) {
     return 2;
   } else {
@@ -80,26 +51,17 @@ size_t LanguageModel::GetNumScoreComponents() const
   }
 }
 
-float LanguageModel::GetWeight() const
-{
+float LanguageModel::GetWeight() const {
   size_t lmIndex = StaticData::Instance().GetScoreIndexManager().
                    GetBeginIndex(GetScoreBookkeepingID());
   return StaticData::Instance().GetAllWeights()[lmIndex];
 }
 
-float LanguageModel::GetOOVWeight() const
-{
+float LanguageModel::GetOOVWeight() const {
   if (!m_enableOOVFeature) return 0;
   size_t lmIndex = StaticData::Instance().GetScoreIndexManager().
                    GetBeginIndex(GetScoreBookkeepingID());
   return StaticData::Instance().GetAllWeights()[lmIndex+1];
-  
-}
-
-const FFState* LanguageModel::EmptyHypothesisState(const InputType &/*input*/) const
-{
-  // This is actually correct.  The empty _hypothesis_ has <s> in it.  Phrases use m_emptyContextState.
-  return m_implementation->NewState(m_implementation->GetBeginSentenceState());
 }
 
-}
+} // namespace Moses
diff --git a/moses/src/LanguageModel.h b/moses/src/LanguageModel.h
index db42d1896..41472a3b8 100644
--- a/moses/src/LanguageModel.h
+++ b/moses/src/LanguageModel.h
@@ -29,11 +29,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "Util.h"
 #include "FeatureFunction.h"
 #include "Word.h"
-#include "LanguageModelImplementation.h"
-
-#ifdef WITH_THREADS
-#include <boost/shared_ptr.hpp>
-#endif
 
 namespace Moses
 {
@@ -43,56 +38,24 @@ class Factor;
 class Phrase;
 
 //! Abstract base class which represent a language model on a contiguous phrase
-class LanguageModel : public StatefulFeatureFunction
-{
+class LanguageModel : public StatefulFeatureFunction {
 protected:
-#ifdef WITH_THREADS
-  // if we have threads, we also have boost and can let it handle thread-safe reference counting
-  boost::shared_ptr<LanguageModelImplementation> m_implementation;
-#else
-  LanguageModelImplementation *m_implementation;
-#endif
+  LanguageModel();
+
+  // This can't be in the constructor for virual function dispatch reasons
+  void Init(ScoreIndexManager &scoreIndexManager);
+
   bool m_enableOOVFeature;
   
 public:
-  /**
-   * Create a new language model
-   */
-  LanguageModel(ScoreIndexManager &scoreIndexManager, LanguageModelImplementation *implementation);
-
-  /**
-   * Create a new language model reusing an already loaded implementation
-   */
-  LanguageModel(ScoreIndexManager &scoreIndexManager, LanguageModel *implementation);
-
   virtual ~LanguageModel();
 
+  // Make another feature without copying the underlying model data.  
+  virtual LanguageModel *Duplicate(ScoreIndexManager &scoreIndexManager) const = 0;
+
   //! see ScoreProducer.h
   size_t GetNumScoreComponents() const;
 
-  /* whether this LM can be used on a particular phrase.
-   * Should return false if phrase size = 0 or factor types required don't exists
-   */
-  bool Useable(const Phrase &phrase) const {
-    return m_implementation->Useable(phrase);
-  }
-
-  /* calc total unweighted LM score of this phrase and return score via arguments.
-   * Return scores should always be in natural log, regardless of representation with LM implementation.
-   * Uses GetValue() of inherited class.
-   * Useable() should be called beforehand on the phrase
-   * \param fullScore scores of all unigram, bigram... of contiguous n-gram of the phrase
-   * \param ngramScore score of only n-gram of order m_nGramOrder
-   * \param oovCount number of LM OOVs
-   */
-  void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const {
-    return m_implementation->CalcScore(phrase, fullScore, ngramScore, oovCount);
-  }
-
-  virtual std::string GetScoreProducerDescription(unsigned idx=0) const {
-    return m_implementation->GetScoreProducerDescription(idx);
-  }
-
   bool OOVFeatureEnabled() const {
     return m_enableOOVFeature;
   }
@@ -104,29 +67,26 @@ public:
     return "lm";
   }
 
-  void InitializeBeforeSentenceProcessing() {
-    m_implementation->InitializeBeforeSentenceProcessing();
-  }
+  virtual void InitializeBeforeSentenceProcessing() {}
 
-  void CleanUpAfterSentenceProcessing() {
-    m_implementation->CleanUpAfterSentenceProcessing();
-  }
+  virtual void CleanUpAfterSentenceProcessing() {}
 
-  virtual const FFState* EmptyHypothesisState(const InputType &input) const;
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;
 
-  FFState* Evaluate(
-    const Hypothesis& cur_hypo,
-    const FFState* prev_state,
-    ScoreComponentCollection* accumulator) const {
-    return m_implementation->Evaluate(cur_hypo, prev_state, accumulator, this);
-  }
+  /* whether this LM can be used on a particular phrase.
+   * Should return false if phrase size = 0 or factor types required don't exists
+   */
+  virtual bool Useable(const Phrase &phrase) const = 0;
 
-  FFState* EvaluateChart(
-    const ChartHypothesis& cur_hypo,
-    int featureID,
-    ScoreComponentCollection* accumulator) const {
-    return m_implementation->EvaluateChart(cur_hypo, featureID, accumulator, this);
-  }
+  /* calc total unweighted LM score of this phrase and return score via arguments.
+   * Return scores should always be in natural log, regardless of representation with LM implementation.
+   * Uses GetValue() of inherited class.
+   * Useable() should be called beforehand on the phrase
+   * \param fullScore scores of all unigram, bigram... of contiguous n-gram of the phrase
+   * \param ngramScore score of only n-gram of order m_nGramOrder
+   * \param oovCount number of LM OOVs
+   */
+  virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const = 0;
 };
 
 }
diff --git a/moses/src/LanguageModelFactory.cpp b/moses/src/LanguageModelFactory.cpp
index 92d5597e8..0087ac3a8 100644
--- a/moses/src/LanguageModelFactory.cpp
+++ b/moses/src/LanguageModelFactory.cpp
@@ -70,6 +70,14 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
                                    , ScoreIndexManager &scoreIndexManager
                                    , int dub )
 {
+  if (lmImplementation == Ken || lmImplementation == LazyKen) {
+#ifdef LM_KEN
+    return ConstructKenLM(languageModelFile, scoreIndexManager, factorTypes[0], lmImplementation == LazyKen);
+#else
+    UserMessage::Add("KenLM isn't compiled in but your config asked for it");
+    return NULL;
+#endif
+  }
   LanguageModelImplementation *lm = NULL;
   switch (lmImplementation) {
   case RandLM:
@@ -105,16 +113,6 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
     lm = new LanguageModelSkip(new LanguageModelInternal());
 #endif
     break;
-  case Ken:
-#ifdef LM_KEN
-    lm = ConstructKenLM(languageModelFile, false);
-#endif
-    break;
-  case LazyKen:
-#ifdef LM_KEN
-    lm = ConstructKenLM(languageModelFile, true);
-#endif
-    break;
   case Joint:
 #ifdef LM_SRI
     lm = new LanguageModelJoint(new LanguageModelSRI());
@@ -137,10 +135,13 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
     lm = new LanguageModelDMapLM();
 #endif
     break;
+  default:
+    break;
   }
 
   if (lm == NULL) {
     UserMessage::Add("Language model type unknown. Probably not compiled into library");
+    return NULL;
   } else {
     switch (lm->GetLMType()) {
     case SingleFactor:
@@ -160,7 +161,7 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
     }
   }
 
-  return new LanguageModel(scoreIndexManager, lm);
+  return new LMRefCount(scoreIndexManager, lm);
 }
 }
 
diff --git a/moses/src/LanguageModelImplementation.h b/moses/src/LanguageModelImplementation.h
index bd1ea41fd..8af54cc5d 100644
--- a/moses/src/LanguageModelImplementation.h
+++ b/moses/src/LanguageModelImplementation.h
@@ -29,6 +29,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "Util.h"
 #include "FeatureFunction.h"
 #include "Word.h"
+#include "LanguageModel.h"
+
+#ifdef WITH_THREADS
+#include <boost/shared_ptr.hpp>
+#endif
 
 namespace Moses
 {
@@ -145,6 +150,72 @@ public:
 #endif
 };
 
+class LMRefCount : public LanguageModel {
+  public:
+    LMRefCount(ScoreIndexManager &scoreIndexManager, LanguageModelImplementation *impl) : m_impl(impl) {
+#ifndef WITH_THREADS
+      impl->IncrementReferenceCount();
+#endif
+      Init(scoreIndexManager);
+    }
+
+    ~LMRefCount() {
+#ifndef WITH_THREADS
+      if (!m_impl->DecrementReferenceCount()) delete m_impl;
+#endif
+    }
+
+    LanguageModel *Duplicate(ScoreIndexManager &scoreIndexManager) const {
+      return new LMRefCount(scoreIndexManager, *this);
+    }
+
+    void InitializeBeforeSentenceProcessing() {
+      m_impl->InitializeBeforeSentenceProcessing();
+    }
+
+    void CleanUpAfterSentenceProcessing() {
+      m_impl->CleanUpAfterSentenceProcessing();
+    }
+
+    const FFState* EmptyHypothesisState(const InputType &/*input*/) const {
+      return m_impl->NewState(m_impl->GetBeginSentenceState());
+    }
+
+    bool Useable(const Phrase &phrase) const {
+      return m_impl->Useable(phrase);
+    }
+
+    void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const {
+      return m_impl->CalcScore(phrase, fullScore, ngramScore, oovCount);
+    }
+
+    FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const {
+      return m_impl->Evaluate(cur_hypo, prev_state, accumulator, this);
+    }
+
+    FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const {
+      return m_impl->EvaluateChart(cur_hypo, featureID, accumulator, this);
+    }
+
+    std::string GetScoreProducerDescription(unsigned int param) const {
+      return m_impl->GetScoreProducerDescription(param);
+    }
+
+  private:
+    LMRefCount(ScoreIndexManager &scoreIndexManager, const LMRefCount &copy_from) : m_impl(copy_from.m_impl) {
+#ifndef WITH_THREADS
+      m_impl->IncrementReferenceCount();
+#endif
+      Init(scoreIndexManager);
+    }
+
+#ifdef WITH_THREADS
+    boost::shared_ptr<LanguageModelImplementation> m_impl;
+#else
+    LanguageModelImplementation *m_impl;
+#endif
+};
+
 }
 
 #endif
diff --git a/moses/src/LanguageModelKen.cpp b/moses/src/LanguageModelKen.cpp
index d3548df15..596378f17 100644
--- a/moses/src/LanguageModelKen.cpp
+++ b/moses/src/LanguageModelKen.cpp
@@ -26,9 +26,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <stdlib.h>
 #include "lm/binary_format.hh"
 #include "lm/enumerate_vocab.hh"
+#include "lm/left.hh"
 #include "lm/model.hh"
 
 #include "LanguageModelKen.h"
+#include "LanguageModel.h"
 #include "FFState.h"
 #include "TypeDef.h"
 #include "Util.h"
@@ -38,12 +40,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "StaticData.h"
 #include "ChartHypothesis.h"
 
+#ifdef WITH_THREADS
+#include <boost/scoped_ptr.hpp>
+#endif
+
 using namespace std;
 
 namespace Moses {
-
-LanguageModelKenBase::~LanguageModelKenBase() {}
-
 namespace {
 
 struct KenLMState : public FFState {
@@ -59,56 +62,51 @@ struct KenLMState : public FFState {
 /*
  * An implementation of single factor LM using Ken's code.
  */
-template <class Model> class LanguageModelKen : public LanguageModelKenBase {
+template <class Model> class LanguageModelKen : public LanguageModel {
   public:
-    LanguageModelKen(bool lazy);
-    ~LanguageModelKen();
-
-    bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
+    LanguageModelKen(const std::string &file, ScoreIndexManager &manager, FactorType factorType, bool lazy);
 
-    LMResult GetValueGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const {
-      return GetKenFullScoreGivenState(contextFactor, state);
+    ~LanguageModelKen() {
+#ifndef WITH_THREADS
+      if (!--*m_refcount) {
+        delete m_ngram;
+        delete m_refcount;
+      }
+#endif
     }
-    LMKenResult GetKenFullScoreGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const;
 
-    LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const {
-      return GetKenFullScoreForgotState(contextFactor, outState);
-    }
-    LMKenResult GetKenFullScoreForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
+    LanguageModel *Duplicate(ScoreIndexManager &scoreIndexManager) const;
 
-    void GetState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
+    bool Useable(const Phrase &phrase) const {
+      return (phrase.GetSize()>0 && phrase.GetFactor(0, m_factorType) != NULL);
+    }
 
-    const FFState *GetNullContextState() const;
-    const FFState *GetBeginSentenceState() const;
-    FFState *NewState(const FFState *from = NULL) const;
+    std::string GetScoreProducerDescription(unsigned) const {
+      std::ostringstream oss;
+      oss << "LM_" << m_ngram->Order() << "gram";
+      return oss.str();
+    }
 
-    void CleanUpAfterSentenceProcessing() {}
-    void InitializeBeforeSentenceProcessing() {}
+    const FFState *EmptyHypothesisState(const InputType &/*input*/) const {
+      KenLMState *ret = new KenLMState();
+      ret->state = m_ngram->BeginSentenceState();
+      return ret;
+    }
 
-    FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out, const LanguageModel *feature) const;
+    void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
 
-    FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator, const LanguageModel *feature) const;
+    FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
 
-    void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
+    FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
 
   private:
-    Model *m_ngram;
-    std::vector<lm::WordIndex> m_lmIdLookup;
-    bool m_lazy;
-    KenLMState m_nullContextState;
-    KenLMState m_beginSentenceState;
+    LanguageModelKen(ScoreIndexManager &manager, const LanguageModelKen<Model> &copy_from);
 
     lm::WordIndex TranslateID(const Word &word) const {
-      std::size_t factor = word.GetFactor(GetFactorType())->GetId();
+      std::size_t factor = word.GetFactor(m_factorType)->GetId();
       return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
     }
 
-    void TranslateIDs(const std::vector<const Word*> &contextFactor, lm::WordIndex *indices) const {
-      for (size_t i = 0 ; i < contextFactor.size(); i++) {
-        indices[contextFactor.size() - 1 - i] = TranslateID(*contextFactor[i]);
-      }
-    }
-
     // Convert last words of hypothesis into vocab ids, returning an end pointer.  
     lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
       lm::WordIndex *index = indices;
@@ -123,17 +121,19 @@ template <class Model> class LanguageModelKen : public LanguageModelKenBase {
         *index = TranslateID(hypo.GetWord(position));
       }
     }
-};
 
-template <class Model> LanguageModelKen<Model>::LanguageModelKen(bool lazy)
-  :m_ngram(NULL), m_lazy(lazy)
-{
-}
+#ifdef WITH_THREADS
+    boost::shared_ptr<Model> m_ngram;
+#else
+    Model *m_ngram;
+    mutable unsigned int *m_refcount;
+#endif
+    std::vector<lm::WordIndex> m_lmIdLookup;
 
-template <class Model> LanguageModelKen<Model>::~LanguageModelKen()
-{
-  delete m_ngram;
-}
+    FactorType m_factorType;
+
+    const Factor *m_beginSentenceFactor;
+};
 
 class MappingBuilder : public lm::EnumerateVocab {
 public:
@@ -157,119 +157,88 @@ private:
   std::string str_;
 };
 
-template <class Model> bool LanguageModelKen<Model>::Load(const std::string &filePath,
-    FactorType factorType,
-    size_t /*nGramOrder*/)
-{
-  m_factorType = factorType;
-  m_filePath   = filePath;
-
-  FactorCollection &factorCollection = FactorCollection::Instance();
-  m_sentenceStart = factorCollection.AddFactor(BOS_);
-  m_sentenceStartArray[m_factorType] = m_sentenceStart;
-  m_sentenceEnd = factorCollection.AddFactor(EOS_);
-  m_sentenceEndArray[m_factorType] = m_sentenceEnd;
-
-  MappingBuilder builder(factorCollection, m_lmIdLookup);
+template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &file, ScoreIndexManager &manager, FactorType factorType, bool lazy) : m_factorType(factorType) {
   lm::ngram::Config config;
-
   IFVERBOSE(1) {
     config.messages = &std::cerr;
-  }
-  else {
+  } else {
     config.messages = NULL;
   }
-
+  FactorCollection &collection = FactorCollection::Instance();
+  MappingBuilder builder(collection, m_lmIdLookup);
   config.enumerate_vocab = &builder;
-  config.load_method = m_lazy ? util::LAZY : util::POPULATE_OR_READ;
+  config.load_method = lazy ? util::LAZY : util::POPULATE_OR_READ;
 
   try {
-    m_ngram = new Model(filePath.c_str(), config);
+#ifdef WITH_THREADS
+    m_ngram.reset(new Model(file.c_str(), config));
+#else
+    m_ngram = new Model(file.c_str(), config);
+    m_refcount = new unsigned int();
+    *m_refcount = 1;
+#endif
   } catch (std::exception &e) {
     std::cerr << e.what() << std::endl;
     abort();
   }
-  m_nGramOrder  = m_ngram->Order();
 
-  m_nullContextState.state = m_ngram->NullContextState();
-  m_beginSentenceState.state = m_ngram->BeginSentenceState();
-  return true;
+  m_beginSentenceFactor = collection.AddFactor(BOS_);
+  Init(manager);
 }
 
-template <class Model> LMKenResult LanguageModelKen<Model>::GetKenFullScoreGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const
-{
-  LMKenResult result;
-  if (contextFactor.empty()) {
-    result.score = 0.0;
-    result.unknown = false;
-    result.ngram_length = 0;
-    return result;
-  }
-  lm::ngram::State &realState = static_cast<KenLMState&>(state).state;
-  std::size_t factor = contextFactor.back()->GetFactor(GetFactorType())->GetId();
-  lm::WordIndex new_word = (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
-  lm::ngram::State copied(realState);
-  lm::FullScoreReturn ret(m_ngram->FullScore(copied, new_word, realState));
-
-  result.score = TransformLMScore(ret.prob);
-  result.unknown = (new_word == 0);
-  result.ngram_length = ret.ngram_length;
-  return result;
+template <class Model> LanguageModel *LanguageModelKen<Model>::Duplicate(ScoreIndexManager &manager) const {
+  return new LanguageModelKen<Model>(manager, *this);
 }
 
-template <class Model> LMKenResult LanguageModelKen<Model>::GetKenFullScoreForgotState(const vector<const Word*> &contextFactor, FFState &outState) const
-{
-  LMKenResult result;
-  if (contextFactor.empty()) {
-    static_cast<KenLMState&>(outState).state = m_ngram->NullContextState();
-    result.score = 0.0;
-    result.unknown = false;
-    result.ngram_length = 0;
-    return result;
-  }
-
-  lm::WordIndex indices[contextFactor.size()];
-  TranslateIDs(contextFactor, indices);
-
-  lm::FullScoreReturn ret(m_ngram->FullScoreForgotState(indices + 1, indices + contextFactor.size(), indices[0], static_cast<KenLMState&>(outState).state));
-
-  result.score = TransformLMScore(ret.prob);
-  result.unknown = (indices[0] == 0);
-  result.ngram_length = ret.ngram_length;
-  return result;
+template <class Model> LanguageModelKen<Model>::LanguageModelKen(ScoreIndexManager &manager, const LanguageModelKen<Model> &copy_from) :
+    m_ngram(copy_from.m_ngram),
+    // TODO: don't copy this.  
+    m_lmIdLookup(copy_from.m_lmIdLookup),
+    m_factorType(copy_from.m_factorType),
+    m_beginSentenceFactor(copy_from.m_beginSentenceFactor) {
+#ifndef WITH_THREADS
+  m_refcount = copy_from.m_refcount;
+  ++*m_refcount;
+#endif
+  Init(manager);
 }
 
-template <class Model> void LanguageModelKen<Model>::GetState(const std::vector<const Word*> &contextFactor, FFState &outState) const
-{
-  if (contextFactor.empty()) {
-    static_cast<KenLMState&>(outState).state = m_ngram->NullContextState();
-    return;
-  }
-  lm::WordIndex indices[contextFactor.size()];
-  TranslateIDs(contextFactor, indices);
-  m_ngram->GetState(indices, indices + contextFactor.size(), static_cast<KenLMState&>(outState).state);
-}
+template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const {
+  fullScore = 0;
+  ngramScore = 0;
+  oovCount = 0;
 
-template <class Model> const FFState *LanguageModelKen<Model>::GetNullContextState() const
-{
-  return &m_nullContextState;
-}
+  if (!phrase.GetSize()) return;
 
-template <class Model> const FFState *LanguageModelKen<Model>::GetBeginSentenceState() const
-{
-  return &m_beginSentenceState;
-}
+  typename Model::State state_backing[2];
+  typename Model::State *state0 = &state_backing[0], *state1 = &state_backing[1];
+  size_t position;
+  if (m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType)) {
+    *state0 = m_ngram->BeginSentenceState();
+    position = 1;
+  } else {
+    *state0 = m_ngram->NullContextState();
+    position = 0;
+  }
+  
+  size_t ngramBoundary = m_ngram->Order() - 1;
 
-template <class Model> FFState *LanguageModelKen<Model>::NewState(const FFState *from) const
-{
-  KenLMState *ret = new KenLMState;
-  if (from) {
-    ret->state = static_cast<const KenLMState&>(*from).state;
+  for (; position < phrase.GetSize(); ++position) {
+    const Word &word = phrase.GetWord(position);
+    if (word.IsNonTerminal()) {
+      *state0 = m_ngram->NullContextState();
+    } else {
+      lm::WordIndex index = TranslateID(word);
+      float score = TransformLMScore(m_ngram->Score(*state0, index, *state1));
+      std::swap(state0, state1);
+      if (position >= ngramBoundary) ngramScore += score;
+      fullScore += score;
+      if (!index) ++oovCount;
+    }
   }
-  return ret;
 }
 
-template <class Model> FFState *LanguageModelKen<Model>::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out, const LanguageModel *feature) const {
+template <class Model> FFState *LanguageModelKen<Model>::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
   const lm::ngram::State &in_state = static_cast<const KenLMState&>(*ps).state;
 
   std::auto_ptr<KenLMState> ret(new KenLMState());
@@ -312,13 +281,13 @@ template <class Model> FFState *LanguageModelKen<Model>::Evaluate(const Hypothes
 
   score = TransformLMScore(score);
 
-  if (feature->OOVFeatureEnabled()) {
+  if (OOVFeatureEnabled()) {
     std::vector<float> scores(2);
     scores[0] = score;
     scores[1] = 0.0;
-    out->PlusEquals(feature, scores);
+    out->PlusEquals(this, scores);
   } else {
-    out->PlusEquals(feature, score);
+    out->PlusEquals(this, score);
   }
 
   return ret.release();
@@ -342,11 +311,7 @@ class LanguageModelChartStateKenLM : public FFState {
     lm::ngram::ChartState m_state;
 };
 
-template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(
-    const ChartHypothesis& hypo,
-    int featureID,
-    ScoreComponentCollection *accumulator,
-    const LanguageModel *feature) const {
+template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const {
   LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
   lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
   const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
@@ -356,7 +321,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(
   // Special cases for first word.  
   if (size) {
     const Word &word = hypo.GetCurrTargetPhrase().GetWord(0);
-    if (word == GetSentenceStartArray()) {
+    if (word.GetFactor(m_factorType) == m_beginSentenceFactor) {
       // Begin of sentence
       ruleScore.BeginSentence();
       phrasePos++;
@@ -364,7 +329,7 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(
       // Non-terminal is first so we can copy instead of rescoring.  
       const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
       const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
-      ruleScore.BeginNonTerminal(prevState, prevHypo->GetScoreBreakdown().GetScoresForProducer(feature)[0]);
+      ruleScore.BeginNonTerminal(prevState, prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
       phrasePos++;
     }
   }
@@ -374,76 +339,38 @@ template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(
     if (word.IsNonTerminal()) {
       const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
       const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
-      ruleScore.NonTerminal(prevState, prevHypo->GetScoreBreakdown().GetScoresForProducer(feature)[0]);
+      ruleScore.NonTerminal(prevState, prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
     } else {
       ruleScore.Terminal(TranslateID(word));
     }
   }
 
-  accumulator->Assign(feature, ruleScore.Finish());
+  accumulator->Assign(this, ruleScore.Finish());
   return newState;
 }
 
-template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const {
-  fullScore = 0;
-  ngramScore = 0;
-  oovCount = 0;
-
-  if (!phrase.GetSize()) return;
-
-  typename Model::State state_backing[2];
-  typename Model::State *state0 = &state_backing[0], *state1 = &state_backing[1];
-  size_t position;
-  if (phrase.GetWord(0) == GetSentenceStartArray()) {
-    *state0 = m_ngram->BeginSentenceState();
-    position = 1;
-  } else {
-    *state0 = m_ngram->NullContextState();
-    position = 0;
-  }
-  
-  FactorType factorType = GetFactorType();
-  size_t ngramBoundary = m_ngram->Order() - 1;
-
-  for (; position < phrase.GetSize(); ++position) {
-    const Word &word = phrase.GetWord(position);
-    if (word.IsNonTerminal()) {
-      *state0 = m_ngram->NullContextState();
-    } else {
-      std::size_t factor = word.GetFactor(factorType)->GetId();
-      lm::WordIndex index = factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor];
-      float score = TransformLMScore(m_ngram->Score(*state0, index, *state1));
-      std::swap(state0, state1);
-      if (position >= ngramBoundary) ngramScore += score;
-      fullScore += score;
-      if (!index) ++oovCount;
-    }
-  }
-}
-
 } // namespace
 
-LanguageModelSingleFactor *ConstructKenLM(const std::string &file, bool lazy)
-{
+LanguageModel *ConstructKenLM(const std::string &file, ScoreIndexManager &manager, FactorType factorType, bool lazy) {
   lm::ngram::ModelType model_type;
   if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
     switch(model_type) {
     case lm::ngram::HASH_PROBING:
-      return new LanguageModelKen<lm::ngram::ProbingModel>(lazy);
+      return new LanguageModelKen<lm::ngram::ProbingModel>(file, manager, factorType, lazy);
     case lm::ngram::TRIE_SORTED:
-      return new LanguageModelKen<lm::ngram::TrieModel>(lazy);
+      return new LanguageModelKen<lm::ngram::TrieModel>(file, manager, factorType, lazy);
     case lm::ngram::QUANT_TRIE_SORTED:
-      return new LanguageModelKen<lm::ngram::QuantTrieModel>(lazy);
+      return new LanguageModelKen<lm::ngram::QuantTrieModel>(file, manager, factorType, lazy);
     case lm::ngram::ARRAY_TRIE_SORTED:
-      return new LanguageModelKen<lm::ngram::ArrayTrieModel>(lazy);
+      return new LanguageModelKen<lm::ngram::ArrayTrieModel>(file, manager, factorType, lazy);
     case lm::ngram::QUANT_ARRAY_TRIE_SORTED:
-      return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(lazy);
+      return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(file, manager, factorType, lazy);
     default:
       std::cerr << "Unrecognized kenlm model type " << model_type << std::endl;
       abort();
     }
   } else {
-    return new LanguageModelKen<lm::ngram::ProbingModel>(lazy);
+    return new LanguageModelKen<lm::ngram::ProbingModel>(file, manager, factorType, lazy);
   }
 }
 
diff --git a/moses/src/LanguageModelKen.h b/moses/src/LanguageModelKen.h
index 1b3d8def1..0a4076111 100644
--- a/moses/src/LanguageModelKen.h
+++ b/moses/src/LanguageModelKen.h
@@ -24,32 +24,16 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include <string>
 
-#include "LanguageModelSingleFactor.h"
-#include "kenlm/lm/left.hh"
-
-namespace Moses
-{
-
-  // kenlm specific score value
-  struct LMKenResult : public LMResult {
-    unsigned char ngram_length;
-  };
-
-  // base-class for the actual LanguageModelKen; only here to provide a specific behaviour without exposing the implementation
-  class LanguageModelKenBase : public LanguageModelSingleFactor {
-    public:
-      virtual ~LanguageModelKenBase();
-      // scoring functions which provide more info than the common interface of LanguageModel
-      virtual LMKenResult GetKenFullScoreGivenState(const std::vector<const Word*> &contextFactor, FFState &state) const = 0;
-      virtual LMKenResult GetKenFullScoreForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const = 0;
-};
+#include "TypeDef.h"
 
-class ScoreIndexManager;
+namespace Moses {
 
-// Doesn't actually load; moses wants the Load method for that.  It needs the file to autodetect binary format.
-LanguageModelSingleFactor *ConstructKenLM(const std::string &file, bool lazy);
+class ScoreIndexManager;
+class LanguageModel;
 
-}
+// This will also load.  
+LanguageModel *ConstructKenLM(const std::string &file, ScoreIndexManager &manager, FactorType factorType, bool lazy);
 
+} // namespace Moses
 
 #endif
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index a9a521cb8..52682641c 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -812,7 +812,7 @@ bool StaticData::LoadLanguageModels()
     for(size_t i=0; i<lmVector.size(); i++) {
       LanguageModel* lm = NULL;
       if (languageModelsLoaded.find(lmVector[i]) != languageModelsLoaded.end()) {
-        lm = new LanguageModel(m_scoreIndexManager, languageModelsLoaded[lmVector[i]]);
+        lm = languageModelsLoaded[lmVector[i]]->Duplicate(m_scoreIndexManager); 
       } else {
         vector<string>	token		= Tokenize(lmVector[i]);
         if (token.size() != 4 && token.size() != 5 ) {
author	heafield <heafield@1f5c12ca-751b-0410-a591-d2e778427230>	2011-10-13 16:33:05 +0400
committer	heafield <heafield@1f5c12ca-751b-0410-a591-d2e778427230>	2011-10-13 16:33:05 +0400
commit	f08424840530575aaf349fc01397a07678b8cbf5 (patch)
tree	92e05ed9bb3473d542b4a798738dc0179c3d25f8 /moses
parent	7d9bc523a6a5f3151254d8bf95d99e3307394173 (diff)