Merge branch 'master' of github.com:moses-smt/mosesdecoder

author: Hieu Hoang <hieuhoang@gmail.com> 2016-10-03 14:04:01 +0300
committer: Hieu Hoang <hieuhoang@gmail.com> 2016-10-03 14:04:01 +0300
commit: 88e13e3b833c4221e7417ce29c973af8c867c82b (patch)
tree: da211936f591bffa8e6f4ffad68a404a6b50804f
parent: 968c72538f6976f6558f0799e7d0b1293b7e3d96 (diff)
parent: fc0d9900ffe2bf6dfb5243c642d8686f450c2af9 (diff)
31 files changed, 1455 insertions, 17 deletions
diff --git a/BUILD-INSTRUCTIONS.txt b/BUILD-INSTRUCTIONS.txt
index a41582bfa..7b9bc3a8a 100644
--- a/BUILD-INSTRUCTIONS.txt
+++ b/BUILD-INSTRUCTIONS.txt
@@ -7,4 +7,3 @@ into the source tree from elsewhere:
 
  * "bjam-files" is taken from Boost.
  * "util" and "lm" are taken from KenLM: https://github.com/kpu/kenlm
-
diff --git a/contrib/moses2/legacy/FFState.cpp b/contrib/moses2/FF/FFState.cpp
index e69de29bb..e69de29bb 100644
--- a/contrib/moses2/legacy/FFState.cpp
+++ b/contrib/moses2/FF/FFState.cpp
diff --git a/contrib/moses2/legacy/FFState.h b/contrib/moses2/FF/FFState.h
index 33ef5d1f6..33ef5d1f6 100644
--- a/contrib/moses2/legacy/FFState.h
+++ b/contrib/moses2/FF/FFState.h
diff --git a/contrib/moses2/FF/FeatureRegistry.cpp b/contrib/moses2/FF/FeatureRegistry.cpp
index 28aa4258d..b040eb8a1 100644
--- a/contrib/moses2/FF/FeatureRegistry.cpp
+++ b/contrib/moses2/FF/FeatureRegistry.cpp
@@ -3,6 +3,7 @@
 #include "../TranslationModel/Memory/PhraseTableMemory.h"
 #include "../TranslationModel/ProbingPT.h"
 #include "../TranslationModel/UnknownWordPenalty.h"
+#include "../TranslationModel/Transliteration.h"
 
 #include "../LM/KENLM.h"
 #include "../LM/KENLMBatch.h"
@@ -13,6 +14,7 @@
 #include "LexicalReordering/LexicalReordering.h"
 #include "PhrasePenalty.h"
 #include "WordPenalty.h"
+#include "OSM/OpSequenceModel.h"
 
 #include "SkeletonStatefulFF.h"
 #include "SkeletonStatelessFF.h"
@@ -51,6 +53,7 @@ FeatureRegistry::FeatureRegistry()
 
   MOSES_FNAME2("PhraseDictionaryMemory", PhraseTableMemory);
   MOSES_FNAME(ProbingPT);
+  MOSES_FNAME2("PhraseDictionaryTransliteration", Transliteration);
   MOSES_FNAME(UnknownWordPenalty);
 
   Add("KENLM", new KenFactory());
@@ -64,6 +67,8 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME(LexicalReordering);
   MOSES_FNAME(PhrasePenalty);
   MOSES_FNAME(WordPenalty);
+  MOSES_FNAME(OpSequenceModel);
+
   MOSES_FNAME(SkeletonStatefulFF);
   MOSES_FNAME(SkeletonStatelessFF);
 }
diff --git a/contrib/moses2/FF/LexicalReordering/LRState.h b/contrib/moses2/FF/LexicalReordering/LRState.h
index 846acb092..0e906d09a 100644
--- a/contrib/moses2/FF/LexicalReordering/LRState.h
+++ b/contrib/moses2/FF/LexicalReordering/LRState.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "../../legacy/FFState.h"
+#include "../FFState.h"
 #include "LRModel.h"
 
 namespace Moses2
diff --git a/contrib/moses2/FF/OSM/KenOSM.cpp b/contrib/moses2/FF/OSM/KenOSM.cpp
new file mode 100644
index 000000000..6b410fc9e
--- /dev/null
+++ b/contrib/moses2/FF/OSM/KenOSM.cpp
@@ -0,0 +1,33 @@
+#include "KenOSM.h"
+
+namespace Moses2
+{
+
+OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method)
+{
+  lm::ngram::ModelType model_type;
+  lm::ngram::Config config;
+  config.load_method = load_method;
+  if (lm::ngram::RecognizeBinary(file, model_type)) {
+    switch(model_type) {
+    case lm::ngram::PROBING:
+      return new KenOSM<lm::ngram::ProbingModel>(file, config);
+    case lm::ngram::REST_PROBING:
+      return new KenOSM<lm::ngram::RestProbingModel>(file, config);
+    case lm::ngram::TRIE:
+      return new KenOSM<lm::ngram::TrieModel>(file, config);
+    case lm::ngram::QUANT_TRIE:
+      return new KenOSM<lm::ngram::QuantTrieModel>(file, config);
+    case lm::ngram::ARRAY_TRIE:
+      return new KenOSM<lm::ngram::ArrayTrieModel>(file, config);
+    case lm::ngram::QUANT_ARRAY_TRIE:
+      return new KenOSM<lm::ngram::QuantArrayTrieModel>(file, config);
+    default:
+      UTIL_THROW2("Unrecognized kenlm model type " << model_type);
+    }
+  } else {
+    return new KenOSM<lm::ngram::ProbingModel>(file, config);
+  }
+}
+
+} // namespace
diff --git a/contrib/moses2/FF/OSM/KenOSM.h b/contrib/moses2/FF/OSM/KenOSM.h
new file mode 100644
index 000000000..f1275232f
--- /dev/null
+++ b/contrib/moses2/FF/OSM/KenOSM.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <string>
+#include "lm/model.hh"
+
+namespace Moses2
+{
+
+class KenOSMBase
+{
+public:
+  virtual ~KenOSMBase() {}
+
+  virtual float Score(const lm::ngram::State&, StringPiece,
+                      lm::ngram::State&) const = 0;
+
+  virtual const lm::ngram::State &BeginSentenceState() const = 0;
+
+  virtual const lm::ngram::State &NullContextState() const = 0;
+};
+
+template <class KenModel>
+class KenOSM : public KenOSMBase
+{
+public:
+  KenOSM(const char *file, const lm::ngram::Config &config)
+    : m_kenlm(file, config) {}
+
+  float Score(const lm::ngram::State &in_state,
+              StringPiece word,
+              lm::ngram::State &out_state) const {
+    return m_kenlm.Score(in_state, m_kenlm.GetVocabulary().Index(word),
+                         out_state);
+  }
+
+  const lm::ngram::State &BeginSentenceState() const {
+    return m_kenlm.BeginSentenceState();
+  }
+
+  const lm::ngram::State &NullContextState() const {
+    return m_kenlm.NullContextState();
+  }
+
+private:
+  KenModel m_kenlm;
+};
+
+typedef KenOSMBase OSMLM;
+
+OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method);
+
+
+} // namespace
diff --git a/contrib/moses2/FF/OSM/OpSequenceModel.cpp b/contrib/moses2/FF/OSM/OpSequenceModel.cpp
new file mode 100644
index 000000000..572065813
--- /dev/null
+++ b/contrib/moses2/FF/OSM/OpSequenceModel.cpp
@@ -0,0 +1,248 @@
+#include <sstream>
+#include "OpSequenceModel.h"
+#include "osmHyp.h"
+#include "lm/state.hh"
+#include "../../PhraseBased/Manager.h"
+#include "../../PhraseBased/Hypothesis.h"
+#include "../../PhraseBased/TargetPhraseImpl.h"
+#include "../../PhraseBased/Sentence.h"
+#include "../../TranslationModel/UnknownWordPenalty.h"
+#include "../../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+////////////////////////////////////////////////////////////////////////////////////////
+
+OpSequenceModel::OpSequenceModel(size_t startInd, const std::string &line) :
+    StatefulFeatureFunction(startInd, line)
+{
+  sFactor = 0;
+  tFactor = 0;
+  numFeatures = 5;
+  load_method = util::READ;
+
+  ReadParameters();
+}
+
+OpSequenceModel::~OpSequenceModel()
+{
+  // TODO Auto-generated destructor stub
+}
+
+void OpSequenceModel::Load(System &system)
+{
+  readLanguageModel(m_lmPath.c_str());
+}
+
+FFState* OpSequenceModel::BlankState(MemPool &pool, const System &sys) const
+{
+  return new (pool.Allocate<osmState>()) osmState();
+}
+
+void OpSequenceModel::EmptyHypothesisState(FFState &state,
+    const ManagerBase &mgr, const InputType &input,
+    const Hypothesis &hypo) const
+{
+  lm::ngram::State startState = OSM->BeginSentenceState();
+
+  osmState &stateCast = static_cast<osmState&>(state);
+  stateCast.setState(startState);
+}
+
+void OpSequenceModel::EvaluateInIsolation(MemPool &pool,
+    const System &system, const Phrase<Moses2::Word> &source,
+    const TargetPhraseImpl &targetPhrase, Scores &scores,
+    SCORE &estimatedScore) const
+{
+  osmHypothesis obj;
+  obj.setState(OSM->NullContextState());
+
+  Bitmap myBitmap (pool, source.GetSize());
+  myBitmap.Init(std::vector<bool>());
+
+  vector <string> mySourcePhrase;
+  vector <string> myTargetPhrase;
+  vector<float> scoresVec;
+  vector <int> alignments;
+  int startIndex = 0;
+  int endIndex = source.GetSize();
+
+  const AlignmentInfo &align = targetPhrase.GetAlignTerm();
+  AlignmentInfo::const_iterator iter;
+
+  for (iter = align.begin(); iter != align.end(); ++iter) {
+    alignments.push_back(iter->first);
+    alignments.push_back(iter->second);
+  }
+
+  for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
+    if (&targetPhrase.pt == system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
+      myTargetPhrase.push_back("_TRANS_SLF_");
+    else
+      myTargetPhrase.push_back(targetPhrase[i][tFactor]->GetString().as_string());
+  }
+
+  for (size_t i = 0; i < source.GetSize(); i++) {
+    mySourcePhrase.push_back(source[i][sFactor]->GetString().as_string());
+  }
+
+  obj.setPhrases(mySourcePhrase , myTargetPhrase);
+  obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
+  obj.computeOSMFeature(startIndex,myBitmap);
+  obj.calculateOSMProb(*OSM);
+  obj.populateScores(scoresVec,numFeatures);
+
+  SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
+      scoresVec.data());
+  estimatedScore += weightedScore;
+
+}
+
+void OpSequenceModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
+    const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+    SCORE &estimatedScore) const
+{
+  UTIL_THROW2("Not implemented");
+}
+
+void OpSequenceModel::EvaluateWhenApplied(const ManagerBase &mgr,
+    const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+    FFState &state) const
+{
+  const TargetPhrase<Moses2::Word> &target = hypo.GetTargetPhrase();
+  const Bitmap &bitmap = hypo.GetBitmap();
+  Bitmap myBitmap(bitmap);
+  const ManagerBase &manager = hypo.GetManager();
+  const InputType &source = manager.GetInput();
+  const Sentence &sourceSentence = static_cast<const Sentence&>(source);
+
+  osmHypothesis obj;
+  vector <string> mySourcePhrase;
+  vector <string> myTargetPhrase;
+  vector<float> scoresVec;
+
+
+  //target.GetWord(0)
+
+  //cerr << target <<" --- "<<target.GetSourcePhrase()<< endl;  // English ...
+
+  //cerr << align << endl;   // Alignments ...
+  //cerr << cur_hypo.GetCurrSourceWordsRange() << endl;
+
+  //cerr << source <<endl;
+
+// int a = sourceRange.GetStartPos();
+// cerr << source.GetWord(a);
+  //cerr <<a<<endl;
+
+  //const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());
+
+
+  const Range & sourceRange = hypo.GetInputPath().range;
+  int startIndex  = sourceRange.GetStartPos();
+  int endIndex = sourceRange.GetEndPos();
+  const AlignmentInfo &align = hypo.GetTargetPhrase().GetAlignTerm();
+  // osmState * statePtr;
+
+  vector <int> alignments;
+
+
+
+  AlignmentInfo::const_iterator iter;
+
+  for (iter = align.begin(); iter != align.end(); ++iter) {
+    //cerr << iter->first << "----" << iter->second << " ";
+    alignments.push_back(iter->first);
+    alignments.push_back(iter->second);
+  }
+
+
+  //cerr<<bitmap<<endl;
+  //cerr<<startIndex<<" "<<endIndex<<endl;
+
+
+  for (int i = startIndex; i <= endIndex; i++) {
+    myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
+    mySourcePhrase.push_back(sourceSentence[i][sFactor]->GetString().as_string());
+    // cerr<<mySourcePhrase[i]<<endl;
+  }
+
+  for (size_t i = 0; i < target.GetSize(); i++) {
+    if (&target.pt == mgr.system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
+      myTargetPhrase.push_back("_TRANS_SLF_");
+    else
+      myTargetPhrase.push_back(target[i][tFactor]->GetString().as_string());
+
+  }
+
+
+  //cerr<<myBitmap<<endl;
+
+  obj.setState(&prevState);
+  obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
+  obj.setPhrases(mySourcePhrase , myTargetPhrase);
+  obj.computeOSMFeature(startIndex,myBitmap);
+  obj.calculateOSMProb(*OSM);
+  obj.populateScores(scoresVec,numFeatures);
+  //obj.print();
+
+  scores.PlusEquals(mgr.system, *this, scoresVec);
+
+  osmState &stateCast = static_cast<osmState&>(state);
+  obj.saveState(stateCast);
+}
+
+void OpSequenceModel::EvaluateWhenApplied(const SCFG::Manager &mgr,
+    const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+    FFState &state) const
+{
+  UTIL_THROW2("Not implemented");
+}
+
+void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
+{
+
+  if (key == "path") {
+    m_lmPath = value;
+  } else if (key == "support-features") {
+    if(value == "no")
+      numFeatures = 1;
+    else
+      numFeatures = 5;
+  } else if (key == "input-factor") {
+    sFactor = Scan<int>(value);
+  } else if (key == "output-factor") {
+    tFactor = Scan<int>(value);
+  } else if (key == "load") {
+    if (value == "lazy") {
+      load_method = util::LAZY;
+    } else if (value == "populate_or_lazy") {
+      load_method = util::POPULATE_OR_LAZY;
+    } else if (value == "populate_or_read" || value == "populate") {
+      load_method = util::POPULATE_OR_READ;
+    } else if (value == "read") {
+      load_method = util::READ;
+    } else if (value == "parallel_read") {
+      load_method = util::PARALLEL_READ;
+    } else {
+      UTIL_THROW2("Unknown KenLM load method " << value);
+    }
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+
+void OpSequenceModel :: readLanguageModel(const char *lmFile)
+{
+  string unkOp = "_TRANS_SLF_";
+  OSM = ConstructOSMLM(m_lmPath.c_str(), load_method);
+
+  lm::ngram::State startState = OSM->NullContextState();
+  lm::ngram::State endState;
+  unkOpProb = OSM->Score(startState,unkOp,endState);
+}
+
+}
diff --git a/contrib/moses2/FF/OSM/OpSequenceModel.h b/contrib/moses2/FF/OSM/OpSequenceModel.h
new file mode 100644
index 000000000..d46cc82fb
--- /dev/null
+++ b/contrib/moses2/FF/OSM/OpSequenceModel.h
@@ -0,0 +1,57 @@
+#include "../StatefulFeatureFunction.h"
+#include "util/mmap.hh"
+#include "KenOSM.h"
+
+namespace Moses2
+{
+
+
+class OpSequenceModel : public StatefulFeatureFunction
+{
+public:
+  OSMLM* OSM;
+  float unkOpProb;
+  int numFeatures;   // Number of features used ...
+  int sFactor;  // Source Factor ...
+  int tFactor;  // Target Factor ...
+  util::LoadMethod load_method; // method to load model
+
+	OpSequenceModel(size_t startInd, const std::string &line);
+  virtual ~OpSequenceModel();
+
+  virtual void Load(System &system);
+
+  virtual FFState* BlankState(MemPool &pool, const System &sys) const;
+  virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
+      const InputType &input, const Hypothesis &hypo) const;
+
+  virtual void
+  EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
+      const TargetPhraseImpl &targetPhrase, Scores &scores,
+      SCORE &estimatedScore) const;
+
+  virtual void
+  EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
+      const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
+      SCORE &estimatedScore) const;
+
+  virtual void EvaluateWhenApplied(const ManagerBase &mgr,
+      const Hypothesis &hypo, const FFState &prevState, Scores &scores,
+      FFState &state) const;
+
+  virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
+      const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
+      FFState &state) const;
+
+  void SetParameter(const std::string& key, const std::string& value);
+
+protected:
+  std::string m_lmPath;
+
+  void readLanguageModel(const char *);
+
+};
+
+}
+
+
diff --git a/contrib/moses2/FF/OSM/osmHyp.cpp b/contrib/moses2/FF/OSM/osmHyp.cpp
new file mode 100644
index 000000000..ede841a80
--- /dev/null
+++ b/contrib/moses2/FF/OSM/osmHyp.cpp
@@ -0,0 +1,601 @@
+#include "osmHyp.h"
+#include <sstream>
+
+using namespace std;
+using namespace lm::ngram;
+
+namespace Moses2
+{
+void osmState::setState(const lm::ngram::State & val)
+{
+  j = 0;
+  E = 0;
+  lmState = val;
+}
+
+void osmState::saveState(int jVal, int eVal, map <int , string> & gapVal)
+{
+  gap.clear();
+  gap = gapVal;
+  j = jVal;
+  E = eVal;
+}
+
+size_t osmState::hash() const
+{
+  size_t ret = j;
+
+  boost::hash_combine(ret, E);
+  boost::hash_combine(ret, gap);
+  boost::hash_combine(ret, lmState.length);
+
+  return ret;
+}
+
+bool osmState::operator==(const FFState& otherBase) const
+{
+  const osmState &other = static_cast<const osmState&>(otherBase);
+  if (j != other.j)
+    return false;
+  if (E != other.E)
+    return false;
+  if (gap != other.gap)
+    return false;
+  if (lmState.length != other.lmState.length)
+    return false;
+
+  return true;
+}
+
+std::string osmState :: getName() const
+{
+
+  return "done";
+}
+
+//////////////////////////////////////////////////
+
+osmHypothesis :: osmHypothesis()
+{
+  opProb = 0;
+  gapWidth = 0;
+  gapCount = 0;
+  openGapCount = 0;
+  deletionCount = 0;
+  gapCount = 0;
+  j = 0;
+  E = 0;
+  gap.clear();
+}
+
+void osmHypothesis :: setState(const FFState* prev_state)
+{
+
+  if(prev_state != NULL) {
+
+    j = static_cast <const osmState *> (prev_state)->getJ();
+    E =  static_cast <const osmState *> (prev_state)->getE();
+    gap = static_cast <const osmState *> (prev_state)->getGap();
+    lmState = static_cast <const osmState *> (prev_state)->getLMState();
+  }
+}
+
+void osmHypothesis :: saveState(osmState &state)
+{
+  state.setState(lmState);
+  state.saveState(j,E,gap);
+}
+
+int osmHypothesis :: isTranslationOperation(int x)
+{
+  if (operations[x].find("_JMP_BCK_") != -1)
+    return 0;
+
+  if (operations[x].find("_JMP_FWD_") != -1)
+    return 0;
+
+  if (operations[x].find("_CONT_CEPT_") != -1)
+    return 0;
+
+  if (operations[x].find("_INS_GAP_") != -1)
+    return 0;
+
+  return 1;
+
+}
+
+void osmHypothesis :: removeReorderingOperations()
+{
+  gapCount = 0;
+  deletionCount = 0;
+  openGapCount = 0;
+  gapWidth = 0;
+
+  std::vector <std::string> tupleSequence;
+
+  for (int x = 0; x < operations.size(); x++) {
+    // cout<<operations[x]<<endl;
+
+    if(isTranslationOperation(x) == 1) {
+      tupleSequence.push_back(operations[x]);
+    }
+
+  }
+
+  operations.clear();
+  operations = tupleSequence;
+}
+
+void osmHypothesis :: calculateOSMProb(OSMLM& ptrOp)
+{
+
+  opProb = 0;
+  State currState = lmState;
+  State temp;
+
+  for (size_t i = 0; i<operations.size(); i++) {
+    temp = currState;
+    opProb += ptrOp.Score(temp,operations[i],currState);
+  }
+
+  lmState = currState;
+
+  //print();
+}
+
+
+int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
+{
+
+  int firstOG =-1;
+
+  for(int nd = 0; nd < coverageVector.size(); nd++) {
+    if(coverageVector[nd]==0) {
+      firstOG = nd;
+      return firstOG;
+    }
+  }
+
+  return firstOG;
+
+}
+
+string osmHypothesis :: intToString(int num)
+{
+  return SPrint(num);
+
+}
+
+void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , Bitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
+{
+
+  int gFlag = 0;
+  int gp = 0;
+  int ans;
+
+
+  if ( j < j1) { // j1 is the index of the source word we are about to generate ...
+    //if(coverageVector[j]==0) // if source word at j is not generated yet ...
+    if(coverageVector.GetValue(j)==0) { // if source word at j is not generated yet ...
+      operations.push_back("_INS_GAP_");
+      gFlag++;
+      gap[j]="Unfilled";
+    }
+    if (j == E) {
+      j = j1;
+    } else {
+      operations.push_back("_JMP_FWD_");
+      j=E;
+    }
+  }
+
+  if (j1 < j) {
+    // if(j < E && coverageVector[j]==0)
+    if(j < E && coverageVector.GetValue(j)==0) {
+      operations.push_back("_INS_GAP_");
+      gFlag++;
+      gap[j]="Unfilled";
+    }
+
+    j=closestGap(gap,j1,gp);
+    operations.push_back("_JMP_BCK_"+ intToString(gp));
+
+    //cout<<"I am j "<<j<<endl;
+    //cout<<"I am j1 "<<j1<<endl;
+
+    if(j==j1)
+      gap[j]="Filled";
+  }
+
+  if (j < j1) {
+    operations.push_back("_INS_GAP_");
+    gap[j] = "Unfilled";
+    gFlag++;
+    j=j1;
+  }
+
+  if(contFlag == 0) { // First words of the multi-word cept ...
+
+    if(english == "_TRANS_SLF_") { // Unknown word ...
+      operations.push_back("_TRANS_SLF_");
+    } else {
+      operations.push_back("_TRANS_" + english + "_TO_" + german);
+    }
+
+    //ans = firstOpenGap(coverageVector);
+    ans = coverageVector.GetFirstGapPos();
+
+    if (ans != -1)
+      gapWidth += j - ans;
+
+  } else if (contFlag == 2) {
+
+    operations.push_back("_INS_" + german);
+    ans = coverageVector.GetFirstGapPos();
+
+    if (ans != -1)
+      gapWidth += j - ans;
+    deletionCount++;
+  } else {
+    operations.push_back("_CONT_CEPT_");
+  }
+
+  //coverageVector[j]=1;
+  coverageVector.SetValue(j,1);
+  j+=1;
+
+  if(E<j)
+    E=j;
+
+  if (gFlag > 0)
+    gapCount++;
+
+  openGapCount += getOpenGaps();
+
+  //if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
+  if (j < coverageVector.GetSize()) {
+    if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) {
+      j1 = j;
+      german = currF[j1-startIndex];
+      english = "_INS_";
+      generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
+    }
+  }
+
+}
+
+void osmHypothesis :: print()
+{
+  for (int i = 0; i< operations.size(); i++) {
+    cerr<<operations[i]<<" ";
+
+  }
+
+  cerr<<endl<<endl;
+
+  cerr<<"Operation Probability "<<opProb<<endl;
+  cerr<<"Gap Count "<<gapCount<<endl;
+  cerr<<"Open Gap Count "<<openGapCount<<endl;
+  cerr<<"Gap Width "<<gapWidth<<endl;
+  cerr<<"Deletion Count "<<deletionCount<<endl;
+
+  cerr<<"_______________"<<endl;
+}
+
+int osmHypothesis :: closestGap(map <int,string> gap, int j1, int & gp)
+{
+
+  int dist=1172;
+  int value=-1;
+  int temp=0;
+  gp=0;
+  int opGap=0;
+
+  map <int,string> :: iterator iter;
+
+  iter=gap.end();
+
+  do {
+    iter--;
+    //cout<<"Trapped "<<iter->first<<endl;
+
+    if(iter->first==j1 && iter->second== "Unfilled") {
+      opGap++;
+      gp = opGap;
+      return j1;
+
+    }
+
+    if(iter->second =="Unfilled") {
+      opGap++;
+      temp = iter->first - j1;
+
+      if(temp<0)
+        temp=temp * -1;
+
+      if(dist>temp && iter->first < j1) {
+        dist=temp;
+        value=iter->first;
+        gp=opGap;
+      }
+    }
+
+
+  } while(iter!=gap.begin());
+
+  return value;
+}
+
+
+
+int osmHypothesis :: getOpenGaps()
+{
+  map <int,string> :: iterator iter;
+
+  int nd = 0;
+  for (iter = gap.begin(); iter!=gap.end(); iter++) {
+    if(iter->second == "Unfilled")
+      nd++;
+  }
+
+  return nd;
+
+}
+
+void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes)
+{
+
+  operations.push_back("_DEL_" + english);
+  currTargetIndex++;
+
+  while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end()) {
+    currTargetIndex++;
+  }
+
+  if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end()) {
+    english = currE[currTargetIndex];
+    generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
+  }
+
+}
+
+void osmHypothesis :: computeOSMFeature(int startIndex , Bitmap & coverageVector)
+{
+
+  set <int> doneTargetIndexes;
+  set <int> eSide;
+  set <int> fSide;
+  set <int> :: iterator iter;
+  string english;
+  string source;
+  int j1;
+  int targetIndex = 0;
+  doneTargetIndexes.clear();
+
+
+  if (targetNullWords.size() != 0) { // Source words to be deleted in the start of this phrase ...
+    iter = targetNullWords.begin();
+
+    if (*iter == startIndex) {
+
+      j1 = startIndex;
+      source = currF[j1-startIndex];
+      english = "_INS_";
+      generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
+    }
+  }
+
+  if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) { // first word has to be deleted ...
+    english = currE[targetIndex];
+    generateDeleteOperations(english,targetIndex, doneTargetIndexes);
+  }
+
+
+  for (size_t i = 0; i < ceptsInPhrase.size(); i++) {
+    source = "";
+    english = "";
+
+    fSide = ceptsInPhrase[i].first;
+    eSide = ceptsInPhrase[i].second;
+
+    iter = eSide.begin();
+    targetIndex = *iter;
+    english += currE[*iter];
+    iter++;
+
+    for (; iter != eSide.end(); iter++) {
+      if(*iter == targetIndex+1)
+        targetIndex++;
+      else
+        doneTargetIndexes.insert(*iter);
+
+      english += "^_^";
+      english += currE[*iter];
+    }
+
+    iter = fSide.begin();
+    source += currF[*iter];
+    iter++;
+
+    for (; iter != fSide.end(); iter++) {
+      source += "^_^";
+      source += currF[*iter];
+    }
+
+    iter = fSide.begin();
+    j1 = *iter + startIndex;
+    iter++;
+
+    generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
+
+
+    for (; iter != fSide.end(); iter++) {
+      j1 = *iter + startIndex;
+      generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
+    }
+
+    targetIndex++; // Check whether the next target word is unaligned ...
+
+    while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end()) {
+      targetIndex++;
+    }
+
+    if(sourceNullWords.find(targetIndex) != sourceNullWords.end()) {
+      english = currE[targetIndex];
+      generateDeleteOperations(english,targetIndex, doneTargetIndexes);
+    }
+  }
+
+  //removeReorderingOperations();
+
+  //print();
+
+}
+
+void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
+{
+  set <int> :: iterator iter;
+
+  int sz = eSide.size();
+  vector <int> t;
+
+  for (iter = eSide.begin(); iter != eSide.end(); iter++) {
+    t = tS[*iter];
+
+    for (size_t i = 0; i < t.size(); i++) {
+      fSide.insert(t[i]);
+    }
+
+  }
+
+  for (iter = fSide.begin(); iter != fSide.end(); iter++) {
+
+    t = sT[*iter];
+
+    for (size_t i = 0 ; i<t.size(); i++) {
+      eSide.insert(t[i]);
+    }
+
+  }
+
+  if (eSide.size () > sz) {
+    getMeCepts(eSide,fSide,tS,sT);
+  }
+
+}
+
+void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength)
+{
+
+  std::map <int , vector <int> > sT;
+  std::map <int , vector <int> > tS;
+  std::set <int> eSide;
+  std::set <int> fSide;
+  std::set <int> :: iterator iter;
+  std :: map <int , vector <int> > :: iterator iter2;
+  std :: pair < set <int> , set <int> > cept;
+  int src;
+  int tgt;
+
+
+  for (size_t i = 0;  i < align.size(); i+=2) {
+    src = align[i];
+    tgt = align[i+1];
+    tS[tgt].push_back(src);
+    sT[src].push_back(tgt);
+  }
+
+  for (int i = startIndex; i<= endIndex; i++) { // What are unaligned source words in this phrase ...
+    if (sT.find(i-startIndex) == sT.end()) {
+      targetNullWords.insert(i);
+    }
+  }
+
+  for (int i = 0; i < targetPhraseLength; i++) { // What are unaligned target words in this phrase ...
+    if (tS.find(i) == tS.end()) {
+      sourceNullWords.insert(i);
+    }
+  }
+
+
+  while (tS.size() != 0 && sT.size() != 0) {
+
+    iter2 = tS.begin();
+
+    eSide.clear();
+    fSide.clear();
+    eSide.insert (iter2->first);
+
+    getMeCepts(eSide, fSide, tS , sT);
+
+    for (iter = eSide.begin(); iter != eSide.end(); iter++) {
+      iter2 = tS.find(*iter);
+      tS.erase(iter2);
+    }
+
+    for (iter = fSide.begin(); iter != fSide.end(); iter++) {
+      iter2 = sT.find(*iter);
+      sT.erase(iter2);
+    }
+
+    cept = make_pair (fSide , eSide);
+    ceptsInPhrase.push_back(cept);
+  }
+
+
+
+  /*
+
+      cerr<<"Extracted Cepts "<<endl;
+      for (int i = 0; i < ceptsInPhrase.size(); i++)
+        {
+
+          fSide = ceptsInPhrase[i].first;
+          eSide = ceptsInPhrase[i].second;
+
+          for (iter = eSide.begin(); iter != eSide.end(); iter++)
+          {
+              cerr<<*iter<<" ";
+          }
+              cerr<<"<---> ";
+
+          for (iter = fSide.begin(); iter != fSide.end(); iter++)
+          {
+            cerr<<*iter<<" ";
+          }
+
+          cerr<<endl;
+        }
+        cerr<<endl;
+
+      cerr<<"Unaligned Target Words"<<endl;
+
+      for (iter = sourceNullWords.begin(); iter != sourceNullWords.end(); iter++)
+        cerr<<*iter<<"<--->"<<endl;
+
+      cerr<<"Unaligned Source Words"<<endl;
+
+      for (iter = targetNullWords.begin(); iter != targetNullWords.end(); iter++)
+        cerr<<*iter<<"<--->"<<endl;
+
+  */
+
+}
+
+void osmHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
+{
+  scores.clear();
+  scores.push_back(opProb);
+
+  if (numFeatures == 1)
+    return;
+
+  scores.push_back(gapWidth);
+  scores.push_back(gapCount);
+  scores.push_back(openGapCount);
+  scores.push_back(deletionCount);
+}
+
+
+} // namespace
+
diff --git a/contrib/moses2/FF/OSM/osmHyp.h b/contrib/moses2/FF/OSM/osmHyp.h
new file mode 100644
index 000000000..c2893d366
--- /dev/null
+++ b/contrib/moses2/FF/OSM/osmHyp.h
@@ -0,0 +1,111 @@
+#pragma once
+
+# include <set>
+# include <map>
+# include <string>
+# include <vector>
+#include "KenOSM.h"
+# include "../FFState.h"
+# include "../../legacy/Bitmap.h"
+
+namespace Moses2
+{
+
+class osmState : public FFState
+{
+public:
+  osmState()
+  {}
+
+  void setState(const lm::ngram::State & val);
+
+  virtual size_t hash() const;
+  virtual bool operator==(const FFState& other) const;
+
+  virtual std::string ToString() const
+  { return "osmState"; }
+
+  void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
+  int getJ()const {
+    return j;
+  }
+  int getE()const {
+    return E;
+  }
+  std::map <int , std::string> getGap() const {
+    return gap;
+  }
+
+  lm::ngram::State getLMState() const {
+    return lmState;
+  }
+
+  void print() const;
+  std::string getName() const;
+
+protected:
+  int j, E;
+  std::map <int,std::string> gap;
+  lm::ngram::State lmState;
+};
+
+class osmHypothesis
+{
+
+private:
+
+
+  std::vector <std::string> operations; // List of operations required to generated this hyp ...
+  std::map <int,std::string> gap; // Maintains gap history ...
+  int j;  // Position after the last source word generated ...
+  int E; // Position after the right most source word so far generated ...
+  lm::ngram::State lmState; // KenLM's Model State ...
+
+  int gapCount; // Number of gaps inserted ...
+  int deletionCount;
+  int openGapCount;
+  int gapWidth;
+  double opProb;
+
+  std::vector <std::string> currE;
+  std::vector <std::string> currF;
+  std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
+  std::set <int> targetNullWords;
+  std::set <int> sourceNullWords;
+
+  int closestGap(std::map <int,std::string> gap,int j1, int & gp);
+  int firstOpenGap(std::vector <int> & coverageVector);
+  std::string intToString(int);
+  int  getOpenGaps();
+  int isTranslationOperation(int j);
+  void removeReorderingOperations();
+
+  void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
+
+public:
+
+  osmHypothesis();
+  ~osmHypothesis() {};
+  void generateOperations(int & startIndex, int j1 , int contFlag , Bitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
+  void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
+  void calculateOSMProb(OSMLM& ptrOp);
+  void computeOSMFeature(int startIndex , Bitmap & coverageVector);
+  void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
+  void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2) {
+    currF = val1;
+    currE = val2;
+  }
+  void setState(const FFState* prev_state);
+  void saveState(osmState &state);
+  void print();
+  void populateScores(std::vector <float> & scores , const int numFeatures);
+  void setState(const lm::ngram::State & val) {
+    lmState = val;
+  }
+
+};
+
+} // namespace
+
+
+
diff --git a/contrib/moses2/legacy/PointerState.cpp b/contrib/moses2/FF/PointerState.cpp
index e69de29bb..e69de29bb 100644
--- a/contrib/moses2/legacy/PointerState.cpp
+++ b/contrib/moses2/FF/PointerState.cpp
diff --git a/contrib/moses2/legacy/PointerState.h b/contrib/moses2/FF/PointerState.h
index 41e6edf9f..41e6edf9f 100644
--- a/contrib/moses2/legacy/PointerState.h
+++ b/contrib/moses2/FF/PointerState.h
diff --git a/contrib/moses2/FF/StatefulFeatureFunction.h b/contrib/moses2/FF/StatefulFeatureFunction.h
index 70be3ad39..fffb1eea7 100644
--- a/contrib/moses2/FF/StatefulFeatureFunction.h
+++ b/contrib/moses2/FF/StatefulFeatureFunction.h
@@ -9,7 +9,7 @@
 #define STATEFULFEATUREFUNCTION_H_
 
 #include "FeatureFunction.h"
-#include "../legacy/FFState.h"
+#include "FFState.h"
 #include "../MemPool.h"
 
 namespace Moses2
diff --git a/contrib/moses2/HypothesisBase.h b/contrib/moses2/HypothesisBase.h
index 23f5c6474..6ef4d3891 100644
--- a/contrib/moses2/HypothesisBase.h
+++ b/contrib/moses2/HypothesisBase.h
@@ -8,7 +8,7 @@
 
 #include <iostream>
 #include <cstddef>
-#include "legacy/FFState.h"
+#include "FF/FFState.h"
 #include "Scores.h"
 
 namespace Moses2
diff --git a/contrib/moses2/Jamfile b/contrib/moses2/Jamfile
index 600dd0513..ed74865ee 100644
--- a/contrib/moses2/Jamfile
+++ b/contrib/moses2/Jamfile
@@ -44,6 +44,10 @@ alias deps :  ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../mose
     FF/LexicalReordering/PhraseBasedReorderingState.cpp
     FF/LexicalReordering/ReorderingStack.cpp
 
+    FF/OSM/OpSequenceModel.cpp
+    FF/OSM/KenOSM.cpp
+    FF/OSM/osmHyp.cpp
+    
  #   LM/LanguageModelDALM.cpp
     LM/LanguageModel.cpp
     LM/KENLM.cpp
@@ -52,6 +56,7 @@ alias deps :  ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../mose
     
  	 	TranslationModel/PhraseTable.cpp 
  	 	TranslationModel/ProbingPT.cpp 
+ 	 	TranslationModel/Transliteration.cpp 
  	 	TranslationModel/UnknownWordPenalty.cpp 
     TranslationModel/Memory/PhraseTableMemory.cpp 
    	
diff --git a/contrib/moses2/LM/LanguageModel.cpp b/contrib/moses2/LM/LanguageModel.cpp
index b27b84c77..8a6fe3b39 100644
--- a/contrib/moses2/LM/LanguageModel.cpp
+++ b/contrib/moses2/LM/LanguageModel.cpp
@@ -11,9 +11,9 @@
 #include "../PhraseBased/Manager.h"
 #include "../PhraseBased/Hypothesis.h"
 #include "../PhraseBased/TargetPhraseImpl.h"
+#include "../FF/PointerState.h"
 #include "../legacy/Util2.h"
 #include "../legacy/InputFileStream.h"
-#include "../legacy/PointerState.h"
 #include "../legacy/Bitmap.h"
 #include "../legacy/Util2.h"
 
diff --git a/contrib/moses2/PhraseBased/Hypothesis.h b/contrib/moses2/PhraseBased/Hypothesis.h
index 3afb17df4..7859c1d14 100644
--- a/contrib/moses2/PhraseBased/Hypothesis.h
+++ b/contrib/moses2/PhraseBased/Hypothesis.h
@@ -8,14 +8,14 @@
 
 #include <iostream>
 #include <cstddef>
-#include "../legacy/FFState.h"
+#include "../FF/FFState.h"
 #include "../legacy/Bitmap.h"
+#include "../legacy/Range.h"
 #include "../Scores.h"
 #include "../Phrase.h"
 #include "../TargetPhrase.h"
 #include "../InputPathBase.h"
 #include "../HypothesisBase.h"
-#include "../legacy/Range.h"
 
 namespace Moses2
 {
diff --git a/contrib/moses2/PhraseBased/Sentence.cpp b/contrib/moses2/PhraseBased/Sentence.cpp
index 2021da7d7..d0c728530 100644
--- a/contrib/moses2/PhraseBased/Sentence.cpp
+++ b/contrib/moses2/PhraseBased/Sentence.cpp
@@ -51,7 +51,7 @@ Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
 
     string str2 = "<xml>" + str + "</xml>";
     pugi::xml_parse_result result = doc.load(str2.c_str(),
-                                    pugi::parse_default | pugi::parse_comments);
+                                    pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
     pugi::xml_node topNode = doc.child("xml");
 
     std::vector<std::string> toks;
diff --git a/contrib/moses2/SCFG/Sentence.cpp b/contrib/moses2/SCFG/Sentence.cpp
index b900e6fbe..5e69a7e23 100644
--- a/contrib/moses2/SCFG/Sentence.cpp
+++ b/contrib/moses2/SCFG/Sentence.cpp
@@ -48,7 +48,7 @@ Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
 
     string str2 = "<xml>" + str + "</xml>";
     pugi::xml_parse_result result = doc.load(str2.c_str(),
-                                    pugi::parse_default | pugi::parse_comments);
+                                      pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
     pugi::xml_node topNode = doc.child("xml");
 
     std::vector<std::string> toks;
diff --git a/contrib/moses2/TranslationModel/Transliteration.cpp b/contrib/moses2/TranslationModel/Transliteration.cpp
new file mode 100644
index 000000000..f92348ee9
--- /dev/null
+++ b/contrib/moses2/TranslationModel/Transliteration.cpp
@@ -0,0 +1,229 @@
+/*
+ * Transliteration.cpp
+ *
+ *  Created on: 28 Oct 2015
+ *      Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include "Transliteration.h"
+#include "../System.h"
+#include "../Scores.h"
+#include "../InputType.h"
+#include "../PhraseBased/Manager.h"
+#include "../PhraseBased/TargetPhraseImpl.h"
+#include "../PhraseBased/InputPath.h"
+#include "../PhraseBased/TargetPhrases.h"
+#include "../PhraseBased/Sentence.h"
+#include "../SCFG/InputPath.h"
+#include "../SCFG/TargetPhraseImpl.h"
+#include "../SCFG/Manager.h"
+#include "../SCFG/Sentence.h"
+#include "../SCFG/ActiveChart.h"
+#include "util/tempfile.hh"
+#include "../legacy/Util2.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+Transliteration::Transliteration(size_t startInd, const std::string &line) :
+    PhraseTable(startInd, line)
+{
+  ReadParameters();
+  UTIL_THROW_IF2(m_mosesDir.empty() ||
+                 m_scriptDir.empty() ||
+                 m_externalDir.empty() ||
+                 m_inputLang.empty() ||
+                 m_outputLang.empty(), "Must specify all arguments");
+}
+
+Transliteration::~Transliteration()
+{
+  // TODO Auto-generated destructor stub
+}
+
+void
+Transliteration::
+SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "moses-dir") {
+    m_mosesDir = value;
+  } else if (key == "script-dir") {
+    m_scriptDir = value;
+  } else if (key == "external-dir") {
+    m_externalDir = value;
+  } else if (key == "input-lang") {
+    m_inputLang = value;
+  } else if (key == "output-lang") {
+    m_outputLang = value;
+  } else {
+    PhraseTable::SetParameter(key, value);
+  }
+}
+
+void Transliteration::Lookup(const Manager &mgr,
+    InputPathsBase &inputPaths) const
+{
+	BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
+	  InputPath *path = static_cast<InputPath*>(pathBase);
+
+	  if (SatisfyBackoff(mgr, *path)) {
+		  const SubPhrase<Moses2::Word> &phrase = path->subPhrase;
+
+		  TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
+		  path->AddTargetPhrases(*this, tps);
+	  }
+	}
+
+}
+
+TargetPhrases *Transliteration::Lookup(const Manager &mgr, MemPool &pool,
+    InputPath &inputPath) const
+{
+  const SubPhrase<Moses2::Word> &sourcePhrase = inputPath.subPhrase;
+  size_t hash = sourcePhrase.hash();
+
+  // TRANSLITERATE
+  const util::temp_file inFile;
+  const util::temp_dir outDir;
+
+  ofstream inStream(inFile.path().c_str());
+  inStream << sourcePhrase.Debug(mgr.system) << endl;
+  inStream.close();
+
+  string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
+               " --transliteration-model-dir " + m_filePath +
+               " --moses-src-dir " + m_mosesDir +
+               " --external-bin-dir " + m_externalDir +
+               " --input-extension " + m_inputLang +
+               " --output-extension " + m_outputLang +
+               " --oov-file " + inFile.path() +
+               " --out-dir " + outDir.path();
+
+  int ret = system(cmd.c_str());
+  UTIL_THROW_IF2(ret != 0, "Transliteration script error");
+
+  TargetPhrases *tps = NULL;
+  tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, 1);
+
+  vector<TargetPhraseImpl*> targetPhrases
+  = CreateTargetPhrases(mgr, pool, sourcePhrase, outDir.path());
+
+  vector<TargetPhraseImpl*>::const_iterator iter;
+  for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
+    TargetPhraseImpl *tp = *iter;
+    tps->AddTargetPhrase(*tp);
+  }
+  mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, sourcePhrase);
+
+  inputPath.AddTargetPhrases(*this, tps);
+}
+
+std::vector<TargetPhraseImpl*> Transliteration::CreateTargetPhrases(
+    const Manager &mgr,
+    MemPool &pool,
+    const SubPhrase<Moses2::Word> &sourcePhrase,
+    const std::string &outDir) const
+{
+  std::vector<TargetPhraseImpl*> ret;
+
+  string outPath = outDir + "/out.txt";
+  ifstream outStream(outPath.c_str());
+
+  string line;
+  while (getline(outStream, line)) {
+    vector<string> toks = Moses2::Tokenize(line, "\t");
+    UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
+
+    TargetPhraseImpl *tp =
+        new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this, mgr.system, 1);
+    Moses2::Word &word = (*tp)[0];
+    word.CreateFromString(mgr.system.GetVocab(), mgr.system, toks[0]);
+
+    float score = Scan<float>(toks[1]);
+    tp->GetScores().PlusEquals(mgr.system, *this, score);
+
+    // score of all other ff when this rule is being loaded
+    mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *tp);
+
+    ret.push_back(tp);
+  }
+
+  outStream.close();
+
+  return ret;
+
+}
+
+
+void Transliteration::EvaluateInIsolation(const System &system,
+    const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+    SCORE &estimatedScore) const
+{
+  UTIL_THROW2("Not implemented");
+}
+
+// SCFG ///////////////////////////////////////////////////////////////////////////////////////////
+void Transliteration::InitActiveChart(
+    MemPool &pool,
+    const SCFG::Manager &mgr,
+    SCFG::InputPath &path) const
+{
+  UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::Lookup(MemPool &pool,
+    const SCFG::Manager &mgr,
+    size_t maxChartSpan,
+    const SCFG::Stacks &stacks,
+    SCFG::InputPath &path) const
+{
+  UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::LookupUnary(MemPool &pool,
+    const SCFG::Manager &mgr,
+    const SCFG::Stacks &stacks,
+    SCFG::InputPath &path) const
+{
+  UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::LookupNT(
+    MemPool &pool,
+    const SCFG::Manager &mgr,
+    const Moses2::Range &subPhraseRange,
+    const SCFG::InputPath &prevPath,
+    const SCFG::Stacks &stacks,
+    SCFG::InputPath &outPath) const
+{
+  UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::LookupGivenWord(
+    MemPool &pool,
+    const SCFG::Manager &mgr,
+    const SCFG::InputPath &prevPath,
+    const SCFG::Word &wordSought,
+    const Moses2::Hypotheses *hypos,
+    const Moses2::Range &subPhraseRange,
+    SCFG::InputPath &outPath) const
+{
+  UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::LookupGivenNode(
+    MemPool &pool,
+    const SCFG::Manager &mgr,
+    const SCFG::ActiveChartEntry &prevEntry,
+    const SCFG::Word &wordSought,
+    const Moses2::Hypotheses *hypos,
+    const Moses2::Range &subPhraseRange,
+    SCFG::InputPath &outPath) const
+{
+  UTIL_THROW2("Not implemented");
+}
+
+}
+
diff --git a/contrib/moses2/TranslationModel/Transliteration.h b/contrib/moses2/TranslationModel/Transliteration.h
new file mode 100644
index 000000000..15f262ac8
--- /dev/null
+++ b/contrib/moses2/TranslationModel/Transliteration.h
@@ -0,0 +1,91 @@
+/*
+ * Transliteration.h
+ *
+ *  Created on: 28 Oct 2015
+ *      Author: hieu
+ */
+
+#pragma once
+
+#include "PhraseTable.h"
+
+namespace Moses2
+{
+class Sentence;
+class InputPaths;
+class Range;
+
+class Transliteration: public PhraseTable
+{
+public:
+  Transliteration(size_t startInd, const std::string &line);
+  virtual ~Transliteration();
+
+  void Lookup(const Manager &mgr, InputPathsBase &inputPaths) const;
+  virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
+      InputPath &inputPath) const;
+
+  virtual void
+  EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
+      const TargetPhraseImpl &targetPhrase, Scores &scores,
+      SCORE &estimatedScore) const;
+
+  virtual void InitActiveChart(
+      MemPool &pool,
+      const SCFG::Manager &mgr,
+      SCFG::InputPath &path) const;
+
+  void Lookup(MemPool &pool,
+      const SCFG::Manager &mgr,
+      size_t maxChartSpan,
+      const SCFG::Stacks &stacks,
+      SCFG::InputPath &path) const;
+
+  void LookupUnary(MemPool &pool,
+      const SCFG::Manager &mgr,
+      const SCFG::Stacks &stacks,
+      SCFG::InputPath &path) const;
+
+protected:
+  virtual void LookupNT(
+      MemPool &pool,
+      const SCFG::Manager &mgr,
+      const Moses2::Range &subPhraseRange,
+      const SCFG::InputPath &prevPath,
+      const SCFG::Stacks &stacks,
+      SCFG::InputPath &outPath) const;
+
+  virtual void LookupGivenWord(
+      MemPool &pool,
+      const SCFG::Manager &mgr,
+      const SCFG::InputPath &prevPath,
+      const SCFG::Word &wordSought,
+      const Moses2::Hypotheses *hypos,
+      const Moses2::Range &subPhraseRange,
+      SCFG::InputPath &outPath) const;
+
+  virtual void LookupGivenNode(
+      MemPool &pool,
+      const SCFG::Manager &mgr,
+      const SCFG::ActiveChartEntry &prevEntry,
+      const SCFG::Word &wordSought,
+      const Moses2::Hypotheses *hypos,
+      const Moses2::Range &subPhraseRange,
+      SCFG::InputPath &outPath) const;
+
+  void SetParameter(const std::string& key, const std::string& value);
+
+protected:
+  std::string m_filePath;
+  std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang;
+
+  std::vector<TargetPhraseImpl*> CreateTargetPhrases(
+      const Manager &mgr,
+      MemPool &pool,
+      const SubPhrase<Moses2::Word> &sourcePhrase,
+      const std::string &outDir) const;
+
+};
+
+}
+
diff --git a/misc/1-1-Extraction.cpp b/misc/1-1-Extraction.cpp
index cf3817abf..cea1f3cb7 100644
--- a/misc/1-1-Extraction.cpp
+++ b/misc/1-1-Extraction.cpp
@@ -216,7 +216,9 @@ int main(int argc, char * argv[])
     getWords(f[i],currF);
     getWords(a[i],currA);
 
-    cerr<<"Processing "<<i<<endl;
+    if (i % 100000 == 0) {
+      cerr<<"Processing "<<i<<endl;
+    }
     constructCepts(ceptsInPhrase, sourceNullWords , targetNullWords, currA , currE.size(), currF.size());
     getOneToOne(ceptsInPhrase , currF , currE, one);
 
diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile
index 68b9ebd39..17e399e43 100644
--- a/regression-testing/Jamfile
+++ b/regression-testing/Jamfile
@@ -37,9 +37,11 @@ if $(with-regtest) {
   if $(skip-compact) {
     reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM $(test-dir)/*compactptable ] : ../moses-cmd//moses : @reg_test_decode ;
     reg_test chart  : [ glob $(test-dir)/chart.*  : $(test-dir)/*withDALM $(test-dir)/*compactptable ] : ../moses-cmd//moses : @reg_test_decode ;
+    reg_test moses2  : [ glob $(test-dir)/moses2.*  : $(test-dir)/*withDALM $(test-dir)/*compactptable ] : ../moses-cmd//moses2 : @reg_test_decode ;
   } else {
     reg_test phrase : [ glob $(test-dir)/phrase.* : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
     reg_test chart  : [ glob $(test-dir)/chart.*  : $(test-dir)/*withDALM ] : ../moses-cmd//moses : @reg_test_decode ;
+    reg_test moses2  : [ glob $(test-dir)/moses2.*  : $(test-dir)/*withDALM ] : ../contrib/moses2//moses2 : @reg_test_decode ;
   }
 
   if [ option.get "with-dalm" : : "yes" ] {
diff --git a/scripts/ems/example/config.factored b/scripts/ems/example/config.factored
index 7e1004db6..6344c9714 100644
--- a/scripts/ems/example/config.factored
+++ b/scripts/ems/example/config.factored
@@ -414,7 +414,7 @@ alignment-symmetrization-method = grow-diag-final-and
 # 
 #operation-sequence-model = "yes"
 #operation-sequence-model-order = 5
-#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
+#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40%'"
 #
 # OR if you want to use with SRILM
 #
diff --git a/scripts/ems/example/config.hierarchical b/scripts/ems/example/config.hierarchical
index 3d00ffd79..88c36c430 100644
--- a/scripts/ems/example/config.hierarchical
+++ b/scripts/ems/example/config.hierarchical
@@ -397,7 +397,7 @@ alignment-symmetrization-method = grow-diag-final-and
 # 
 #operation-sequence-model = "yes"
 #operation-sequence-model-order = 5
-#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
+#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40%'"
 #
 # if OSM training should be skipped, point to OSM Model 
 #osm-model =
diff --git a/scripts/ems/example/config.syntax b/scripts/ems/example/config.syntax
index bdbd2b4e0..8b20df1e2 100644
--- a/scripts/ems/example/config.syntax
+++ b/scripts/ems/example/config.syntax
@@ -401,7 +401,7 @@ alignment-symmetrization-method = grow-diag-final-and
 # 
 #operation-sequence-model = "yes"
 #operation-sequence-model-order = 5
-#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
+#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40%'"
 #
 # if OSM training should be skipped, point to OSM Model 
 #osm-model =
diff --git a/scripts/ems/example/config.toy b/scripts/ems/example/config.toy
index 6667a9744..748fd0cd0 100644
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@@ -378,7 +378,7 @@ alignment-symmetrization-method = grow-diag-final-and
 # 
 #operation-sequence-model = "yes"
 #operation-sequence-model-order = 5
-#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
+#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40%'"
 #
 # OR if you want to use with SRILM
 #
diff --git a/scripts/ems/example/config.toy.bilinguallm b/scripts/ems/example/config.toy.bilinguallm
index 9bf94613f..3e64947fc 100644
--- a/scripts/ems/example/config.toy.bilinguallm
+++ b/scripts/ems/example/config.toy.bilinguallm
@@ -394,7 +394,7 @@ alignment-symmetrization-method = grow-diag-final-and
 # 
 #operation-sequence-model = "yes"
 #operation-sequence-model-order = 5
-#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
+#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40%'"
 #
 # OR if you want to use with SRILM
 #
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index e52c82319..23e771e8b 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2315,7 +2315,7 @@ sub define_training_build_transliteration_model {
     my $sym_method = &check_and_get("TRAINING:alignment-symmetrization-method");
     my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir");
     my $external_bin_dir = &check_and_get("GENERAL:external-bin-dir");
-    my $srilm_dir = &check_and_get("TRAINING:srilm-dir");
+    my $srilm_dir = &check_backoff_and_get("TRAINING:srilm-dir");
     my $decoder = &get("TRAINING:transliteration-decoder");
 
     my $cmd = "$moses_script_dir/Transliteration/train-transliteration-module.pl";
diff --git a/scripts/generic/binarize4moses2.perl b/scripts/generic/binarize4moses2.perl
index 0865b9f66..5b9f08e50 100755
--- a/scripts/generic/binarize4moses2.perl
+++ b/scripts/generic/binarize4moses2.perl
@@ -12,12 +12,14 @@ my $mosesDir = "$RealBin/../..";
 my $ptPath;
 my $lexRoPath;
 my $outPath;
+my $numScores = 4;
 my $numLexScores;
 my $pruneNum = 0;
 
 GetOptions("phrase-table=s"  => \$ptPath,
            "lex-ro=s"   => \$lexRoPath,
            "output-dir=s" => \$outPath,
+           "num-scores=s" => \$numScores,
            "num-lex-scores=i" => \$numLexScores,
            "prune=i" => \$pruneNum
 	   ) or exit 1;
@@ -41,7 +43,7 @@ systemCheck($cmd);
 $cmd = "$mosesDir/bin/addLexROtoPT $tempPath/pt.gz $tempPath/lex-ro.minlexr  | gzip -c > $tempPath/pt.withLexRO.gz";
 systemCheck($cmd);
 
-$cmd = "$mosesDir/bin/CreateProbingPT2 --num-lex-scores $numLexScores --log-prob --input-pt $tempPath/pt.withLexRO.gz --output-dir $outPath";
+$cmd = "$mosesDir/bin/CreateProbingPT2 --num-scores $numScores --num-lex-scores $numLexScores --log-prob --input-pt $tempPath/pt.withLexRO.gz --output-dir $outPath";
 systemCheck($cmd);
 
 exit(0);
author	Hieu Hoang <hieuhoang@gmail.com>	2016-10-03 14:04:01 +0300
committer	Hieu Hoang <hieuhoang@gmail.com>	2016-10-03 14:04:01 +0300
commit	88e13e3b833c4221e7417ce29c973af8c867c82b (patch)
tree	da211936f591bffa8e6f4ffad68a404a6b50804f
parent	968c72538f6976f6558f0799e7d0b1293b7e3d96 (diff)
parent	fc0d9900ffe2bf6dfb5243c642d8686f450c2af9 (diff)