Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorLane Schwartz <dowobeha@gmail.com>2017-01-02 21:57:52 +0300
committerLane Schwartz <dowobeha@gmail.com>2017-01-02 21:57:52 +0300
commit578e65298f365b7844665d5f2a0f9e298c832ae7 (patch)
treefc0ccd66f7a39308eba6ce7fc8d1afe71cc6d1c8 /moses
parent999d6b6371437862e9309c5bcfe5ccf78a9782ab (diff)
Add InMemoryPerSentenceOnDemandLM
Diffstat (limited to 'moses')
-rw-r--r--moses/FF/Factory.cpp2
-rw-r--r--moses/LM/Implementation.cpp2
-rw-r--r--moses/LM/InMemoryPerSentenceOnDemandLM.cpp91
-rw-r--r--moses/LM/InMemoryPerSentenceOnDemandLM.h135
-rw-r--r--moses/LM/Jamfile2
-rw-r--r--moses/LM/Ken.cpp10
-rw-r--r--moses/LM/Ken.h6
7 files changed, 246 insertions, 2 deletions
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index a048410d0..9ae145504 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -68,6 +68,7 @@
#include "moses/FF/SkeletonStatelessFF.h"
#include "moses/FF/SkeletonStatefulFF.h"
#include "moses/LM/SkeletonLM.h"
+#include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
#include "moses/FF/SkeletonTranslationOptionListFeature.h"
#include "moses/LM/BilingualLM.h"
#include "moses/TranslationModel/SkeletonPT.h"
@@ -299,6 +300,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SkeletonStatelessFF);
MOSES_FNAME(SkeletonStatefulFF);
MOSES_FNAME(SkeletonLM);
+ MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
MOSES_FNAME(SkeletonTranslationOptionListFeature);
MOSES_FNAME(SkeletonPT);
diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp
index eb67100ca..c0a69994d 100644
--- a/moses/LM/Implementation.cpp
+++ b/moses/LM/Implementation.cpp
@@ -61,7 +61,7 @@ void LanguageModelImplementation::ShiftOrPush(std::vector<const Word*> &contextF
{
if (contextFactor.size() < GetNGramOrder()) {
contextFactor.push_back(&word);
- } else {
+ } else if (GetNGramOrder() > 0) {
// shift
for (size_t currNGramOrder = 0 ; currNGramOrder < GetNGramOrder() - 1 ; currNGramOrder++) {
contextFactor[currNGramOrder] = contextFactor[currNGramOrder + 1];
diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.cpp b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
new file mode 100644
index 000000000..12ef78f4e
--- /dev/null
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.cpp
@@ -0,0 +1,91 @@
+#include <boost/foreach.hpp>
+#include "InMemoryPerSentenceOnDemandLM.h"
+#include "moses/FactorCollection.h"
+#include "moses/Util.h"
+#include "moses/StaticData.h"
+#include "moses/TranslationTask.h"
+#include "moses/ContextScope.h"
+#include "moses/LM/Ken.h"
+#include "lm/model.hh"
+#include "util/mmap.hh"
+
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+namespace Moses
+{
+ InMemoryPerSentenceOnDemandLM::InMemoryPerSentenceOnDemandLM(const std::string &line) : LanguageModel(line), initialized(false)
+{
+ ReadParameters();
+}
+
+InMemoryPerSentenceOnDemandLM::~InMemoryPerSentenceOnDemandLM()
+{
+}
+
+void InMemoryPerSentenceOnDemandLM::InitializeForInput(ttasksptr const& ttask) {
+
+ // The context scope object for this translation task
+ // contains a map of translation task-specific data
+ boost::shared_ptr<Moses::ContextScope> contextScope = ttask->GetScope();
+
+ // The key to the map is this object
+ void const* key = static_cast<void const*>(this);
+
+ // The value stored in the map is a string representing a phrase table
+ boost::shared_ptr<string> value = contextScope->get<string>(key);
+
+ // Create a stream to read the phrase table data
+ stringstream strme(*(value.get()));
+
+ char * nullpointer = (char *) 0;
+ const char * filename = std::tmpnam(nullpointer);
+ ofstream tmp;
+ tmp.open(filename);
+
+ // Read the phrase table data, one line at a time
+ string line;
+ while (getline(strme, line)) {
+
+ tmp << line << "\n";
+
+ }
+
+ tmp.close();
+
+ LanguageModelKen<lm::ngram::ProbingModel> & lm = GetPerThreadLM();
+ lm.LoadModel("/home/lanes/mosesdecoder/tiny.with_per_sentence/europarl.en.srilm", util::POPULATE_OR_READ);
+
+ initialized = true;
+
+ VERBOSE(1, filename);
+ if (initialized) {
+ VERBOSE(1, "\tLM initialized\n");
+ }
+
+ // std::remove(filename);
+
+}
+
+LanguageModelKen<lm::ngram::ProbingModel>& InMemoryPerSentenceOnDemandLM::GetPerThreadLM() const {
+
+ LanguageModelKen<lm::ngram::ProbingModel> *lm;
+ lm = m_perThreadLM.get();
+ if (lm == NULL) {
+ lm = new LanguageModelKen<lm::ngram::ProbingModel>();
+ m_perThreadLM.reset(lm);
+ }
+ assert(lm);
+ return *lm;
+
+}
+
+
+
+}
+
+
+
diff --git a/moses/LM/InMemoryPerSentenceOnDemandLM.h b/moses/LM/InMemoryPerSentenceOnDemandLM.h
new file mode 100644
index 000000000..f0c1effa7
--- /dev/null
+++ b/moses/LM/InMemoryPerSentenceOnDemandLM.h
@@ -0,0 +1,135 @@
+// $Id$
+#pragma once
+
+#include <vector>
+#include "SingleFactor.h"
+#include <boost/thread/tss.hpp>
+#include "lm/model.hh"
+#include "moses/LM/Ken.h"
+#include "moses/FF/FFState.h"
+
+namespace Moses
+{
+
+struct InMemoryPerSentenceOnDemandLMState : public FFState {
+ lm::ngram::State state;
+ virtual size_t hash() const {
+ size_t ret = hash_value(state);
+ return ret;
+ }
+ virtual bool operator==(const FFState& o) const {
+ const InMemoryPerSentenceOnDemandLMState &other = static_cast<const InMemoryPerSentenceOnDemandLMState &>(o);
+ bool ret = state == other.state;
+ return ret;
+ }
+
+};
+
+class InMemoryPerSentenceOnDemandLM : public LanguageModel
+{
+public:
+ InMemoryPerSentenceOnDemandLM(const std::string &line);
+ ~InMemoryPerSentenceOnDemandLM();
+
+ void InitializeForInput(ttasksptr const& ttask);
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ GetPerThreadLM().SetParameter(key, value);
+ }
+
+ virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+ if (initialized) {
+ return GetPerThreadLM().EmptyHypothesisState(input);
+ } else {
+ return new InMemoryPerSentenceOnDemandLMState();
+ }
+ }
+
+ virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
+ if (initialized) {
+ return GetPerThreadLM().EvaluateWhenApplied(hypo, ps, out);
+ } else {
+ UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
+ }
+ }
+
+ virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const {
+ if (initialized) {
+ return GetPerThreadLM().EvaluateWhenApplied(cur_hypo, featureID, accumulator);
+ } else {
+ UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
+ }
+ }
+
+ virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const {
+ if (initialized) {
+ return GetPerThreadLM().EvaluateWhenApplied(hyperedge, featureID, accumulator);
+ } else {
+ UTIL_THROW(util::Exception, "Can't evaluate an uninitialized LM\n");
+ }
+ }
+
+
+ virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
+ if (initialized) {
+ GetPerThreadLM().CalcScore(phrase, fullScore, ngramScore, oovCount);
+ }
+ }
+
+ virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
+ if (initialized) {
+ GetPerThreadLM().CalcScoreFromCache(phrase, fullScore, ngramScore, oovCount);
+ }
+ }
+
+ virtual void IssueRequestsFor(Hypothesis& hypo, const FFState* input_state) {
+ GetPerThreadLM().IssueRequestsFor(hypo, input_state);
+ }
+
+ virtual void sync() {
+ GetPerThreadLM().sync();
+ }
+
+ virtual void SetFFStateIdx(int state_idx) {
+ if (initialized) {
+ GetPerThreadLM().SetFFStateIdx(state_idx);
+ }
+ }
+
+ virtual void IncrementalCallback(Incremental::Manager &manager) const {
+ if (initialized) {
+ GetPerThreadLM().IncrementalCallback(manager);
+ }
+ }
+
+ virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const {
+ if (initialized) {
+ GetPerThreadLM().ReportHistoryOrder(out, phrase);
+ }
+ }
+
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedScores) const {
+ if (initialized) {
+ GetPerThreadLM().EvaluateInIsolation(source, targetPhrase, scoreBreakdown, estimatedScores);
+ }
+ }
+
+ bool IsUseable(const FactorMask &mask) const {
+ return GetPerThreadLM().IsUseable(mask);
+ }
+
+
+protected:
+ LanguageModelKen<lm::ngram::ProbingModel> & GetPerThreadLM() const;
+
+ mutable boost::thread_specific_ptr<LanguageModelKen<lm::ngram::ProbingModel> > m_perThreadLM;
+
+ bool initialized;
+
+};
+
+
+}
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 75b66603c..4eafbd632 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -138,7 +138,7 @@ if $(with-dalm) {
#Top-level LM library. If you've added a file that doesn't depend on external
#libraries, put it here.
-alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
+alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
../../lm//kenlm ..//headers $(dependencies) ;
alias macros : : : : <define>$(lmmacros) ;
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index c7ac663cc..e42e60274 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -105,6 +105,7 @@ template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string
config.load_method = load_method;
m_ngram.reset(new Model(file.c_str(), config));
+ VERBOSE(2, "LanguageModelKen " << m_description << " reset to " << file << "\n");
}
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method)
@@ -116,6 +117,15 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
LoadModel(file, load_method);
}
+template <class Model> LanguageModelKen<Model>::LanguageModelKen()
+ :LanguageModel("KENLM")
+ ,m_beginSentenceFactor(FactorCollection::Instance().AddFactor(BOS_))
+ ,m_factorType(0)
+{
+ ReadParameters();
+}
+
+
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> &copy_from)
:LanguageModel(copy_from.GetArgLine()),
m_ngram(copy_from.m_ngram),
diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h
index 4934228c2..33590d659 100644
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@@ -33,11 +33,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/TypeDef.h"
#include "moses/Word.h"
+
+
namespace Moses
{
//class LanguageModel;
class FFState;
+class InMemoryPerSentenceOnDemandLM;
LanguageModel *ConstructKenLM(const std::string &line);
@@ -67,6 +70,8 @@ public:
virtual bool IsUseable(const FactorMask &mask) const;
+ friend class InMemoryPerSentenceOnDemandLM;
+
protected:
boost::shared_ptr<Model> m_ngram;
@@ -84,6 +89,7 @@ protected:
std::vector<lm::WordIndex> m_lmIdLookup;
private:
+ LanguageModelKen();
LanguageModelKen(const LanguageModelKen<Model> &copy_from);
// Convert last words of hypothesis into vocab ids, returning an end pointer.