Merge branch 'master' into sampled-lexicalized-reordering

author: Ulrich Germann <ugermann@inf.ed.ac.uk> 2015-02-13 05:22:37 +0300
committer: Ulrich Germann <ugermann@inf.ed.ac.uk> 2015-02-13 05:22:37 +0300
commit: fa3f82a04db309dddda1c3e89fc57e4339b19fce (patch)
tree: 991e8c6269a40a5c1b8cc6c139ea158278d142aa /moses
parent: 31da9e8a01693b2afca1048159b2d0be4cca8353 (diff)
parent: 3fb8f58b4e88e7db9dae26e5c1768a70a0845b8f (diff)
13 files changed, 49 insertions, 37 deletions
diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp
index ce0d5e1c0..f6d7b4168 100644
--- a/moses/ConfusionNet.cpp
+++ b/moses/ConfusionNet.cpp
@@ -67,7 +67,7 @@ ConfusionNet()
   stats.createOne();
 
   const StaticData& staticData = StaticData::Instance();
-  if (staticData.IsChart()) {
+  if (staticData.IsSyntax()) {
     m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
   }
   UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp
index 5be3b0b6b..24887c373 100644
--- a/moses/FF/BleuScoreFeature.cpp
+++ b/moses/FF/BleuScoreFeature.cpp
@@ -27,7 +27,7 @@ int BleuScoreState::Compare(const FFState& o) const
   if (&o == this)
     return 0;
 
-  if (StaticData::Instance().IsChart())
+  if (StaticData::Instance().IsSyntax())
     return 0;
 
   const BleuScoreState& other = dynamic_cast<const BleuScoreState&>(o);
diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp
index 39535f58f..10e5347e4 100644
--- a/moses/FF/InputFeature.cpp
+++ b/moses/FF/InputFeature.cpp
@@ -52,15 +52,15 @@ void InputFeature::EvaluateWithSourceContext(const InputType &input
     , ScoreComponentCollection *estimatedFutureScore) const
 {
   if (m_legacy) {
-    //binary phrase-table does input feature itself
-    return;
+	//binary phrase-table does input feature itself
+	return;
   }
-  /*
-  const ScorePair *scores = inputPath.GetInputScore();
-  if (scores) {
-  	  scoreBreakdown.PlusEquals(this, *scores);
+  else if (input.GetType() == WordLatticeInput){
+	const ScorePair *scores = inputPath.GetInputScore();
+	if (scores) {
+	  scoreBreakdown.PlusEquals(this, *scores);
+	}
   }
-  */
 }
 
 } // namespace
diff --git a/moses/FF/OSM-Feature/KenOSM.h b/moses/FF/OSM-Feature/KenOSM.h
index a50589edc..03deead07 100644
--- a/moses/FF/OSM-Feature/KenOSM.h
+++ b/moses/FF/OSM-Feature/KenOSM.h
@@ -10,6 +10,8 @@ namespace Moses
 class KenOSMBase
 {
 public:
+  virtual ~KenOSMBase() {}
+
   virtual float Score(const lm::ngram::State&, const std::string&,
                       lm::ngram::State&) const = 0;
 
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index 71f300481..e7267f66c 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -146,6 +146,8 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
   :LanguageModel(line)
   ,m_factorType(factorType)
 {
+  ReadParameters();
+
   lm::ngram::Config config;
   IFVERBOSE(1) {
     config.messages = &std::cerr;
@@ -441,15 +443,18 @@ bool LanguageModelKen<Model>::IsUseable(const FactorMask &mask) const
   return ret;
 }
 
-LanguageModel *ConstructKenLM(const std::string &line)
+LanguageModel *ConstructKenLM(const std::string &lineOrig)
 {
   FactorType factorType = 0;
   string filePath;
   bool lazy = false;
 
-  util::TokenIter<util::SingleCharacter, true> argument(line, ' ');
+  util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
   ++argument; // KENLM 
 
+  stringstream line;
+  line << "KENLM";
+
   for (; argument; ++argument) {
     const char *equals = std::find(argument->data(), argument->data() + argument->size(), '=');
     UTIL_THROW_IF2(equals == argument->data() + argument->size(),
@@ -465,12 +470,12 @@ LanguageModel *ConstructKenLM(const std::string &line)
     } else if (name == "lazyken") {
       lazy = boost::lexical_cast<bool>(value);
     } else {
-      // that's ok. do nothing, passes onto LM constructor
-      //UTIL_THROW2("Unknown KenLM argument " << name);
+      // pass to base class to interpret
+      line << " " << name << "=" << value;
     }
   }
 
-  return ConstructKenLM(line, filePath, factorType, lazy);
+  return ConstructKenLM(line.str(), filePath, factorType, lazy);
 }
 
 LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index c5677b73b..6052624cc 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -978,7 +978,7 @@ void Parameter::WeightOverwrite()
   for (size_t i = 0; i < toks.size(); ++i) {
     const string &tok = toks[i];
 
-    if (starts_with(tok, "=")) {
+    if (ends_with(tok, "=")) {
       // start of new feature
 
       if (name != "") {
diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp
index 58d650aa3..a937f21e3 100644
--- a/moses/Sentence.cpp
+++ b/moses/Sentence.cpp
@@ -43,7 +43,7 @@ Sentence::Sentence()
   , InputType()
 {
   const StaticData& staticData = StaticData::Instance();
-  if (staticData.IsChart()) {
+  if (staticData.IsSyntax()) {
     m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
   }
 }
@@ -168,7 +168,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
 
   if (staticData.GetXmlInputType() != XmlPassThrough) {
     int offset = 0;
-    if (staticData.IsChart()) {
+    if (staticData.IsSyntax()) {
       offset = 1;
     }
 
@@ -188,7 +188,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
   // placeholders
   ProcessPlaceholders(placeholders);
 
-  if (staticData.IsChart()) {
+  if (staticData.IsSyntax()) {
     InitStartEndWord();
   }
 
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 2fad752da..8709d758f 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -112,7 +112,7 @@ bool StaticData::LoadData(Parameter *parameter)
   // to cube or not to cube
   m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
 
-  if (IsChart())
+  if (IsSyntax())
     LoadChartDecodingParameters();
 
   // input type has to be specified BEFORE loading the phrase tables!
@@ -698,7 +698,7 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
     UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
     if (m_decodeGraphs.size() < decodeGraphInd + 1) {
       DecodeGraph *decodeGraph;
-      if (IsChart()) {
+      if (IsSyntax()) {
         size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
         VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
         decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
@@ -765,7 +765,7 @@ void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVect
     UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
     if (m_decodeGraphs.size() < decodeGraphInd + 1) {
       DecodeGraph *decodeGraph;
-      if (IsChart()) {
+      if (IsSyntax()) {
         size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
         VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
         decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
diff --git a/moses/StaticData.h b/moses/StaticData.h
index 193f79aad..d9a96aaa3 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -436,8 +436,13 @@ public:
   SearchAlgorithm GetSearchAlgorithm() const {
     return m_searchAlgorithm;
   }
-  bool IsChart() const {
-    return m_searchAlgorithm == CYKPlus || m_searchAlgorithm == ChartIncremental;
+  bool IsSyntax() const {
+    return m_searchAlgorithm == CYKPlus ||
+           m_searchAlgorithm == ChartIncremental ||
+           m_searchAlgorithm == SyntaxS2T ||
+           m_searchAlgorithm == SyntaxT2S ||
+           m_searchAlgorithm == SyntaxT2S_SCFG ||
+           m_searchAlgorithm == SyntaxF2S;
   }
 
   const ScoreComponentCollection& GetAllWeights() const {
diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
index 90d5575a1..9c3f6b513 100644
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <queue>
 #include <algorithm>
 #include <sys/stat.h>
+#include <boost/algorithm/string/predicate.hpp>
 
 #include "PhraseDictionaryCompact.h"
 #include "moses/FactorCollection.h"
@@ -37,6 +38,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "util/exception.hh"
 
 using namespace std;
+using namespace boost::algorithm;
 
 namespace Moses
 {
@@ -63,18 +65,9 @@ void PhraseDictionaryCompact::Load()
   std::string tFilePath = m_filePath;
 
   std::string suffix = ".minphr";
-  if(tFilePath.substr(tFilePath.length() - suffix.length(), suffix.length()) == suffix) {
-    if(!FileExists(tFilePath)) {
-      throw runtime_error("Error: File " + tFilePath + " does not exit.");
-      exit(1);
-    }
-  } else {
-    if(FileExists(tFilePath + suffix)) {
-      tFilePath += suffix;
-    } else {
-      throw runtime_error("Error: File " + tFilePath + ".minphr does not exit.");
-    }
-  }
+  if (!ends_with(tFilePath, suffix)) tFilePath += suffix;
+  if (!FileExists(tFilePath))
+    throw runtime_error("Error: File " + tFilePath + " does not exist.");
 
   m_phraseDecoder = new PhraseDecoder(*this, &m_input, &m_output,
                                       m_numScoreComponents, &m_weight);
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 7c629db7f..eff0588b6 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -61,7 +61,7 @@ void TranslationTask::Run()
   // which manager
   BaseManager *manager;
 
-  if (!staticData.IsChart()) {
+  if (!staticData.IsSyntax()) {
     // phrase-based
     manager = new Manager(*m_source);
   } else if (staticData.GetSearchAlgorithm() == SyntaxF2S ||
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index 0a1e1ad9b..a619639bc 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -122,7 +122,7 @@ enum InputTypeEnum {
   ,ConfusionNetworkInput	= 1
   ,WordLatticeInput				= 2
   ,TreeInputType					= 3
-  ,WordLatticeInput2			= 4
+  //,WordLatticeInput2			= 4
   , TabbedSentenceInput = 5
   ,ForestInputType        = 6
 };
@@ -140,6 +140,9 @@ enum DictionaryFind {
   ,All		= 1
 };
 
+// Note: StaticData uses SearchAlgorithm to determine whether the translation
+// model is phrase-based or syntax-based.  If you add a syntax-based search
+// algorithm here then you should also update StaticData::IsSyntax().
 enum SearchAlgorithm {
   Normal				= 0
   ,CubePruning	= 1
diff --git a/moses/WordLattice.h b/moses/WordLattice.h
index 325271234..dc1582d78 100644
--- a/moses/WordLattice.h
+++ b/moses/WordLattice.h
@@ -21,6 +21,10 @@ private:
 
 public:
   WordLattice();
+
+  InputTypeEnum GetType() const
+  { return WordLatticeInput; }
+
   size_t GetColumnIncrement(size_t ic, size_t j) const;
   void Print(std::ostream&) const;
   /** Get shortest path between two nodes
author	Ulrich Germann <ugermann@inf.ed.ac.uk>	2015-02-13 05:22:37 +0300
committer	Ulrich Germann <ugermann@inf.ed.ac.uk>	2015-02-13 05:22:37 +0300
commit	fa3f82a04db309dddda1c3e89fc57e4339b19fce (patch)
tree	991e8c6269a40a5c1b8cc6c139ea158278d142aa /moses
parent	31da9e8a01693b2afca1048159b2d0be4cca8353 (diff)
parent	3fb8f58b4e88e7db9dae26e5c1768a70a0845b8f (diff)