From bb941c01f6571524ecae01da5b51a4a4acf243aa Mon Sep 17 00:00:00 2001
From: dowobeha <dowobeha@1f5c12ca-751b-0410-a591-d2e778427230>
Date: Fri, 13 May 2011 18:07:21 +0000
Subject: Merge branch 'master' into local-trunk

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3971 1f5c12ca-751b-0410-a591-d2e778427230
---
 .gitignore                              |   21 +
 config.h.in                             |   13 +-
 configure.in                            |   43 +-
 moses/src/Makefile.am                   |    8 +
 moses/src/Parameter.cpp                 |    6 +
 moses/src/ScoreIndexManager.cpp         |    2 +
 moses/src/StaticData.cpp                | 1365 ++++++++++++++++---------------
 moses/src/StaticData.h                  |   15 +
 moses/src/SyntacticLanguageModel.cpp    |  123 +++
 moses/src/SyntacticLanguageModel.h      |   52 ++
 moses/src/SyntacticLanguageModelFiles.h |   95 +++
 moses/src/SyntacticLanguageModelState.h |  303 +++++++
 regenerate-makefiles.sh                 |    2 +-
 scripts/generic/balance-corpus          |  392 +++++++++
 scripts/training/mert-moses.pl          |   13 +-
 15 files changed, 1791 insertions(+), 662 deletions(-)
 create mode 100755 moses/src/SyntacticLanguageModel.cpp
 create mode 100755 moses/src/SyntacticLanguageModel.h
 create mode 100755 moses/src/SyntacticLanguageModelFiles.h
 create mode 100755 moses/src/SyntacticLanguageModelState.h
 create mode 100644 scripts/generic/balance-corpus

diff --git a/.gitignore b/.gitignore
index 77987e8ee..14834115d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,12 +1,23 @@
 *.[oa]
+*~
+CreateOnDisk/src/.deps
+CreateOnDisk/src/CreateOnDiskPt
 Makefile
 Makefile.in
+OnDiskPt/src/.deps
 aclocal.m4
 autom4te.cache/
 config.h
 config.log
 config.status
 configure
+kenlm/.deps
+kenlm/.libs
+kenlm/*.la
+kenlm/*.lo
+kenlm/build_binary
+kenlm/query
+libtool
 mert/.deps/
 mert/Makefile
 mert/Makefile.in
@@ -18,16 +29,26 @@ misc/Makefile.in
 misc/processLexicalTable
 misc/processPhraseTable
 misc/queryLexicalTable
+misc/queryPhraseTable
+moses-chart/src/.deps
+moses-chart-cmd/src/.deps
+moses-chart-cmd/src/moses_chart
 moses-cmd/src/.deps/
 moses-cmd/src/Makefile
 moses-cmd/src/Makefile.in
+moses-cmd/src/checkplf
+moses-cmd/src/lmbrgrid
 moses-cmd/src/moses
 moses/src/.deps/
+moses/src/.libs
+moses/src/*.lo
 moses/src/Makefile
 moses/src/Makefile.in
+moses/src/libmoses.la
 scripts/training/cmert-0.5/mert
 scripts/training/mbr/mbr
 scripts/training/phrase-extract/extract
 scripts/training/phrase-extract/score
 scripts/training/symal/symal
+server/.deps
 stamp-h1
diff --git a/config.h.in b/config.h.in
index 264a888a5..f2f6ed881 100644
--- a/config.h.in
+++ b/config.h.in
@@ -30,6 +30,9 @@
 /* Define to 1 if you have the <memory.h> header file. */
 #undef HAVE_MEMORY_H
 
+/* Define to 1 if you have the <nl-cpt.h> header file. */
+#undef HAVE_NL_CPT_H
+
 /* flag for protobuf */
 #undef HAVE_PROTOBUF
 
@@ -51,6 +54,9 @@
 /* Define to 1 if you have the <string.h> header file. */
 #undef HAVE_STRING_H
 
+/* flag for Syntactic Parser */
+#undef HAVE_SYNLM
+
 /* Define to 1 if you have the <sys/stat.h> header file. */
 #undef HAVE_SYS_STAT_H
 
@@ -60,10 +66,6 @@
 /* Define to 1 if you have the <unistd.h> header file. */
 #undef HAVE_UNISTD_H
 
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
-   */
-#undef LT_OBJDIR
-
 /* Name of package */
 #undef PACKAGE
 
@@ -79,9 +81,6 @@
 /* Define to the one symbol short name of this package. */
 #undef PACKAGE_TARNAME
 
-/* Define to the home page for this package. */
-#undef PACKAGE_URL
-
 /* Define to the version of this package. */
 #undef PACKAGE_VERSION
 
diff --git a/configure.in b/configure.in
index 8778fa91a..4bf0aea46 100644
--- a/configure.in
+++ b/configure.in
@@ -34,6 +34,13 @@ AC_ARG_WITH(srilm-dynamic,
             [with_srilm_dynamic=no]
             )
 
+AC_ARG_WITH(srilm-arch,
+            [AC_HELP_STRING([--with-srilm-arch=ARCH], [(optional) architecture for which SRILM was built])],
+            [with_srilm_arch=$withval],
+            [with_srilm_arch=no]
+           )
+
+
 AC_ARG_WITH(irstlm,
             [AC_HELP_STRING([--with-irstlm=PATH], [(optional) path to IRST's LM toolkit])],
             [with_irstlm=$withval],
@@ -52,6 +59,12 @@ AC_ARG_WITH(randlm,
             [with_randlm=no]
            )
 
+AC_ARG_WITH(synlm,
+	   [AC_HELP_STRING([--with-synlm=PATH], [(optional) path to syntactic language model parser])],
+	   [with_synlm=$withval],
+	   [with_synlm=no]
+	   )
+
 AC_ARG_WITH(notrace,
             [AC_HELP_STRING([--notrace], [disable trace])],
             [without_trace=yes],
@@ -82,7 +95,7 @@ AC_ARG_ENABLE(boost,
         )
 
 AC_ARG_WITH(zlib,
-            [AC_HELP_STRING([--with-zlib=PATH], [(optional) path to zlib])],
+boost            [AC_HELP_STRING([--with-zlib=PATH], [(optional) path to zlib])],
             [with_zlib=$withval],
             [with_zlib=no]
            )
@@ -94,7 +107,7 @@ AC_ARG_WITH(tcmalloc,
            )
 
 require_boost=no
-if test "x$enable_threads" != 'xno' || test "x$enable_boost" != 'xno'
+if test "x$enable_threads" != 'xno' || test "x$enable_boost" != 'xno' || test "x$with_synlm" != 'xno'
 then
   require_boost=yes
 fi
@@ -111,6 +124,7 @@ AM_CONDITIONAL([SRI_LM], false)
 AM_CONDITIONAL([IRST_LM], false)
 AM_CONDITIONAL([KEN_LM], false)
 AM_CONDITIONAL([RAND_LM], false)
+AM_CONDITIONAL([SYN_LM], false)
 AM_CONDITIONAL([PROTOBUF], false)
 AM_CONDITIONAL([am__fastdepCC], false)
 AM_CONDITIONAL([WITH_THREADS],false)
@@ -124,13 +138,13 @@ else
     CPPFLAGS="$CPPFLAGS -DTRACE_ENABLE=1" 
 fi
 
-if test "x$require_boost" = 'xyes'
+if test "x$require_boost" = 'xyes' || test "x$with_synlm"
 then
     AC_MSG_NOTICE([Using Boost library])
     BOOST_REQUIRE([1.36.0])
 fi
 
-if test "x$enable_threads" = 'xyes'
+if test "x$enable_threads" = 'xyes' || test "x$with_synlm"
 then
     AC_MSG_NOTICE([Building threaded moses])
     BOOST_THREADS
@@ -172,7 +186,12 @@ then
     # ROOT/lib/i686-m64/liboolm.a
     # ROOT/lib/i686-m64/libdstruct.a
     # ROOT/lib/i686-m64/libmisc.a
-    MY_ARCH=`${with_srilm}/sbin/machine-type`
+    if test "x$with_srilm_arch" != 'xno'
+    then
+	MY_ARCH=${with_srilm_arch}    
+    else
+        MY_ARCH=`${with_srilm}/sbin/machine-type`
+    fi
     LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH} -L${with_srilm}/flm/obj/${MY_ARCH}"
     LIBS="$LIBS $LIB_SRILM"
     FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
@@ -260,6 +279,20 @@ then
                  )
 fi
 
+
+if test "x$with_synlm" != 'xno'
+then
+  SAVE_CPPFLAGS="$CPPFLAGS" 
+  CPPFLAGS="$CPPFLAGS -DWITH_THREADS -I${with_synlm}/rvtl/include -I${with_synlm}/wsjparse/include -lm"
+
+  AC_CHECK_HEADERS(nl-cpt.h,
+	[AC_DEFINE([HAVE_SYNLM], [], [flag for Syntactic Parser])])
+
+  AM_CONDITIONAL([SYN_LM], true)
+
+fi
+
+
 AM_CONDITIONAL([WITH_MERT],false)
 AC_CHECK_HEADERS([getopt.h],
             [AM_CONDITIONAL([WITH_MERT],true)],
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index 3ca22d3ec..7cb610430 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -149,6 +149,10 @@ libmoses_la_HEADERS += LanguageModelInternal.h \
 		      NGramNode.h
 endif
 
+if SYN_LM
+libmoses_la_HEADERS += SyntacticLanguageModel.h
+endif
+
 libmoses_la_SOURCES = \
         AlignmentInfo.cpp \
         BilingualDynSuffixArray.cpp \
@@ -306,6 +310,10 @@ libmoses_la_SOURCES += LanguageModelInternal.cpp \
 		      NGramNode.cpp
 endif
 
+if SYN_LM
+libmoses_la_SOURCES += SyntacticLanguageModel.cpp
+endif
+
 if KEN_LM
 libmoses_la_SOURCES += LanguageModelKen.cpp
 endif
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
index e0173e49a..27170e0f8 100644
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@@ -68,6 +68,12 @@ Parameter::Parameter()
   AddParam("report-all-factors", "report all factors in output, not just first");
   AddParam("report-all-factors-in-n-best", "Report all factors in n-best-lists. Default is false");
   AddParam("report-segmentation", "t", "report phrase segmentation in the output");
+#ifdef HAVE_SYNLM
+	AddParam("slmodel-file", "location of the syntactic language model file(s)");
+	AddParam("weight-slm", "slm", "weight(s) for syntactic language model");
+	AddParam("slmodel-factor", "factor to use with syntactic language model");
+	AddParam("slmodel-beam", "beam width to use with syntactic language model's parser");
+#endif
   AddParam("stack", "s", "maximum stack size for histogram pruning");
   AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
   AddParam("threads","th", "number of threads to use in decoding (defaults to single-threaded)");
diff --git a/moses/src/ScoreIndexManager.cpp b/moses/src/ScoreIndexManager.cpp
index 3ef293039..16e8408f4 100644
--- a/moses/src/ScoreIndexManager.cpp
+++ b/moses/src/ScoreIndexManager.cpp
@@ -23,6 +23,7 @@ void ScoreIndexManager::AddScoreProducer(const ScoreProducer* sp)
 
   m_producers.push_back(sp);
 
+
   m_begins.push_back(m_last);
   size_t numScoreCompsProduced = sp->GetNumScoreComponents();
   assert(numScoreCompsProduced > 0);
@@ -32,6 +33,7 @@ void ScoreIndexManager::AddScoreProducer(const ScoreProducer* sp)
   					<< " " << sp->GetScoreProducerDescription()
   					<< ") index=" << m_begins.back() << "-" << m_ends.back()-1 << std::endl);
   */
+
 }
 
 void ScoreIndexManager::PrintLabeledScores(std::ostream& os, const ScoreComponentCollection& scores) const
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index cb2981444..046381d8c 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -41,6 +41,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include "DecodeGraph.h"
 #include "InputFileStream.h"
 
+#ifdef HAVE_SYNLM
+#include "SyntacticLanguageModel.h"
+#endif
+
 using namespace std;
 
 namespace Moses
@@ -409,6 +413,12 @@ bool StaticData::LoadData(Parameter *parameter)
     }
   }
 
+#ifdef HAVE_SYNLM
+	if (m_parameter->GetParam("slmodel-file").size() > 0) {
+	  if (!LoadSyntacticLanguageModel()) return false;
+	}
+#endif
+
   // use of xml in input
   if (m_parameter->GetParam("xml-input").size() == 0) m_xmlInputType = XmlPassThrough;
   else if (m_parameter->GetParam("xml-input")[0]=="exclusive") m_xmlInputType = XmlExclusive;
@@ -508,6 +518,11 @@ bool StaticData::LoadData(Parameter *parameter)
 
 
     //Add any other features here.
+#ifdef HAVE_SYNLM
+    if (m_syntacticLanguageModel != NULL) {
+      m_translationSystems.find(config[0])->second.AddFeatureFunction(m_syntacticLanguageModel);
+    }
+#endif
 
   }
 
@@ -538,651 +553,709 @@ void StaticData::SetBooleanParameter( bool *parameter, string parameterName, boo
 
 StaticData::~StaticData()
 {
-  RemoveAllInColl(m_phraseDictionary);
-  RemoveAllInColl(m_generationDictionary);
-  RemoveAllInColl(m_reorderModels);
-  RemoveAllInColl(m_globalLexicalModels);
-  RemoveAllInColl(m_decodeGraphs);
-  RemoveAllInColl(m_wordPenaltyProducers);
-  RemoveAllInColl(m_distortionScoreProducers);
-  m_languageModel.CleanUp();
-
-  // delete trans opt
-  map<std::pair<size_t, Phrase>, std::pair< TranslationOptionList*, clock_t > >::iterator iterCache;
-  for (iterCache = m_transOptCache.begin() ; iterCache != m_transOptCache.end() ; ++iterCache) {
-    TranslationOptionList *transOptList = iterCache->second.first;
-    delete transOptList;
-  }
-
-  // small score producers
-  delete m_unknownWordPenaltyProducer;
-
-  //delete m_parameter;
-
-  // memory pools
-  Phrase::FinalizeMemPool();
-
-}
-
-bool StaticData::LoadLexicalReorderingModel()
-{
-  VERBOSE(1, "Loading lexical distortion models...");
-  const vector<string> fileStr    = m_parameter->GetParam("distortion-file");
-  bool hasWeightlr = (m_parameter->GetParam("weight-lr").size() != 0);
-  vector<string> weightsStr;
-  if (hasWeightlr) {
-    weightsStr = m_parameter->GetParam("weight-lr");
-  } else {
-    weightsStr = m_parameter->GetParam("weight-d");
-  }
-
-  std::vector<float>   weights;
-  size_t w = 1; //cur weight
-  if (hasWeightlr) {
-    w = 0; // if reading from weight-lr, don't have to count first as distortion penalty
-  }
-  size_t f = 0; //cur file
-  //get weights values
-  VERBOSE(1, "have " << fileStr.size() << " models" << std::endl);
-  for(size_t j = 0; j < weightsStr.size(); ++j) {
-    weights.push_back(Scan<float>(weightsStr[j]));
-  }
-  //load all models
-  for(size_t i = 0; i < fileStr.size(); ++i) {
-    vector<string> spec = Tokenize<string>(fileStr[f], " ");
-    ++f; //mark file as consumed
-    if(spec.size() != 4) {
-      UserMessage::Add("Invalid Lexical Reordering Model Specification: " + fileStr[f]);
-      return false;
-    }
-
-    // spec[0] = factor map
-    // spec[1] = name
-    // spec[2] = num weights
-    // spec[3] = fileName
-
-    // decode factor map
-
-    vector<FactorType> input, output;
-    vector<string> inputfactors = Tokenize(spec[0],"-");
-    if(inputfactors.size() == 2) {
-      input  = Tokenize<FactorType>(inputfactors[0],",");
-      output = Tokenize<FactorType>(inputfactors[1],",");
-    } else if(inputfactors.size() == 1) {
-      //if there is only one side assume it is on e side... why?
-      output = Tokenize<FactorType>(inputfactors[0],",");
-    } else {
-      //format error
-      return false;
-    }
-
-    string modelType = spec[1];
-
-    // decode num weights and fetch weights from array
-    std::vector<float> mweights;
-    size_t numWeights = atoi(spec[2].c_str());
-    for(size_t k = 0; k < numWeights; ++k, ++w) {
-      if(w >= weights.size()) {
-        UserMessage::Add("Lexicalized distortion model: Not enough weights, add to [weight-d]");
-        return false;
-      } else
-        mweights.push_back(weights[w]);
-    }
-
-    string filePath = spec[3];
-
-    m_reorderModels.push_back(new LexicalReordering(input, output, modelType, filePath, mweights));
-  }
-  return true;
-}
-
-bool StaticData::LoadGlobalLexicalModel()
-{
-  const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-lex"));
-  const vector<string> &file = m_parameter->GetParam("global-lexical-file");
-
-  if (weight.size() != file.size()) {
-    std::cerr << "number of weights and models for the global lexical model does not match ("
-              << weight.size() << " != " << file.size() << ")" << std::endl;
-    return false;
-  }
-
-  for (size_t i = 0; i < weight.size(); i++ ) {
-    vector<string> spec = Tokenize<string>(file[i], " ");
-    if ( spec.size() != 2 ) {
-      std::cerr << "wrong global lexical model specification: " << file[i] << endl;
-      return false;
-    }
-    vector< string > factors = Tokenize(spec[0],"-");
-    if ( factors.size() != 2 ) {
-      std::cerr << "wrong factor definition for global lexical model: " << spec[0] << endl;
-      return false;
-    }
-    vector<FactorType> inputFactors = Tokenize<FactorType>(factors[0],",");
-    vector<FactorType> outputFactors = Tokenize<FactorType>(factors[1],",");
-    m_globalLexicalModels.push_back( new GlobalLexicalModel( spec[1], weight[i], inputFactors, outputFactors ) );
-  }
-  return true;
-}
-
-bool StaticData::LoadLanguageModels()
-{
-  if (m_parameter->GetParam("lmodel-file").size() > 0) {
-    // weights
-    vector<float> weightAll = Scan<float>(m_parameter->GetParam("weight-l"));
-
-    for (size_t i = 0 ; i < weightAll.size() ; i++) {
-      m_allWeights.push_back(weightAll[i]);
-    }
-
-    // dictionary upper-bounds fo all IRST LMs
-    vector<int> LMdub = Scan<int>(m_parameter->GetParam("lmodel-dub"));
-    if (m_parameter->GetParam("lmodel-dub").size() == 0) {
-      for(size_t i=0; i<m_parameter->GetParam("lmodel-file").size(); i++)
-        LMdub.push_back(0);
-    }
-
-    // initialize n-gram order for each factor. populated only by factored lm
-    const vector<string> &lmVector = m_parameter->GetParam("lmodel-file");
-    //prevent language models from being loaded twice
-    map<string,LanguageModel*> languageModelsLoaded;
-
-    for(size_t i=0; i<lmVector.size(); i++) {
-      LanguageModel* lm = NULL;
-      if (languageModelsLoaded.find(lmVector[i]) != languageModelsLoaded.end()) {
-        lm = new LanguageModel(m_scoreIndexManager, languageModelsLoaded[lmVector[i]]);
-      } else {
-        vector<string>	token		= Tokenize(lmVector[i]);
-        if (token.size() != 4 && token.size() != 5 ) {
-          UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
-          return false;
-        }
-        // type = implementation, SRI, IRST etc
-        LMImplementation lmImplementation = static_cast<LMImplementation>(Scan<int>(token[0]));
-
-        // factorType = 0 = Surface, 1 = POS, 2 = Stem, 3 = Morphology, etc
-        vector<FactorType> 	factorTypes		= Tokenize<FactorType>(token[1], ",");
-
-        // nGramOrder = 2 = bigram, 3 = trigram, etc
-        size_t nGramOrder = Scan<int>(token[2]);
-
-        string &languageModelFile = token[3];
-        if (token.size() == 5) {
-          if (lmImplementation==IRST)
-            languageModelFile += " " + token[4];
-          else {
-            UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
-            return false;
-          }
-        }
-        IFVERBOSE(1)
-        PrintUserTime(string("Start loading LanguageModel ") + languageModelFile);
-
-        lm = LanguageModelFactory::CreateLanguageModel(
-               lmImplementation
-               , factorTypes
-               , nGramOrder
-               , languageModelFile
-               , m_scoreIndexManager
-               , LMdub[i]);
-        if (lm == NULL) {
-          UserMessage::Add("no LM created. We probably don't have it compiled");
-          return false;
-        }
-        languageModelsLoaded[lmVector[i]] = lm;
-      }
-
-      m_languageModel.Add(lm);
-    }
-  }
-  // flag indicating that language models were loaded,
-  // since phrase table loading requires their presence
-  m_fLMsLoaded = true;
-  IFVERBOSE(1)
-  PrintUserTime("Finished loading LanguageModels");
-  return true;
-}
-
-bool StaticData::LoadGenerationTables()
-{
-  if (m_parameter->GetParam("generation-file").size() > 0) {
-    const vector<string> &generationVector = m_parameter->GetParam("generation-file");
-    const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-generation"));
-
-    IFVERBOSE(1) {
-      TRACE_ERR( "weight-generation: ");
-      for (size_t i = 0 ; i < weight.size() ; i++) {
-        TRACE_ERR( weight[i] << "\t");
-      }
-      TRACE_ERR(endl);
-    }
-    size_t currWeightNum = 0;
-
-    for(size_t currDict = 0 ; currDict < generationVector.size(); currDict++) {
-      vector<string>			token		= Tokenize(generationVector[currDict]);
-      vector<FactorType> 	input		= Tokenize<FactorType>(token[0], ",")
-                                    ,output	= Tokenize<FactorType>(token[1], ",");
-      m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], input, output);
-      string							filePath;
-      size_t							numFeatures;
-
-      numFeatures = Scan<size_t>(token[2]);
-      filePath = token[3];
-
-      if (!FileExists(filePath) && FileExists(filePath + ".gz")) {
-        filePath += ".gz";
-      }
-
-      VERBOSE(1, filePath << endl);
-
-      m_generationDictionary.push_back(new GenerationDictionary(numFeatures, m_scoreIndexManager, input,output));
-      assert(m_generationDictionary.back() && "could not create GenerationDictionary");
-      if (!m_generationDictionary.back()->Load(filePath, Output)) {
-        delete m_generationDictionary.back();
-        return false;
-      }
-      for(size_t i = 0; i < numFeatures; i++) {
-        assert(currWeightNum < weight.size());
-        m_allWeights.push_back(weight[currWeightNum++]);
-      }
-    }
-    if (currWeightNum != weight.size()) {
-      TRACE_ERR( "  [WARNING] config file has " << weight.size() << " generation weights listed, but the configuration for generation files indicates there should be " << currWeightNum << "!\n");
-    }
-  }
-
-  return true;
-}
-
-/* Doesn't load phrase tables any more. Just creates the features. */
-bool StaticData::LoadPhraseTables()
-{
-  VERBOSE(2,"Creating phrase table features" << endl);
-
-  // language models must be loaded prior to loading phrase tables
-  assert(m_fLMsLoaded);
-  // load phrase translation tables
-  if (m_parameter->GetParam("ttable-file").size() > 0) {
-    // weights
-    vector<float> weightAll									= Scan<float>(m_parameter->GetParam("weight-t"));
-
-    const vector<string> &translationVector = m_parameter->GetParam("ttable-file");
-    vector<size_t>	maxTargetPhrase					= Scan<size_t>(m_parameter->GetParam("ttable-limit"));
-
-    if(maxTargetPhrase.size() == 1 && translationVector.size() > 1) {
-      VERBOSE(1, "Using uniform ttable-limit of " << maxTargetPhrase[0] << " for all translation tables." << endl);
-      for(size_t i = 1; i < translationVector.size(); i++)
-        maxTargetPhrase.push_back(maxTargetPhrase[0]);
-    } else if(maxTargetPhrase.size() != 1 && maxTargetPhrase.size() < translationVector.size()) {
-      stringstream strme;
-      strme << "You specified " << translationVector.size() << " translation tables, but only " << maxTargetPhrase.size() << " ttable-limits.";
-      UserMessage::Add(strme.str());
-      return false;
-    }
-
-    size_t index = 0;
-    size_t weightAllOffset = 0;
-    bool oldFileFormat = false;
-    for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) {
-      vector<string>                  token           = Tokenize(translationVector[currDict]);
-
-      if(currDict == 0 && token.size() == 4) {
-        VERBOSE(1, "Warning: Phrase table specification in old 4-field format. Assuming binary phrase tables (type 1)!" << endl);
-        oldFileFormat = true;
-      }
-
-      if((!oldFileFormat && token.size() < 5) || (oldFileFormat && token.size() != 4)) {
-        UserMessage::Add("invalid phrase table specification");
-        return false;
-      }
-
-      PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(token[0]);
-      if(oldFileFormat) {
-        token.push_back(token[3]);
-        token[3] = token[2];
-        token[2] = token[1];
-        token[1] = token[0];
-        token[0] = "1";
-        implementation = Binary;
-      } else
-        implementation = (PhraseTableImplementation) Scan<int>(token[0]);
-
-      assert(token.size() >= 5);
-      //characteristics of the phrase table
-
-      vector<FactorType>  input		= Tokenize<FactorType>(token[1], ",")
-                                    ,output = Tokenize<FactorType>(token[2], ",");
-      m_maxFactorIdx[0] = CalcMax(m_maxFactorIdx[0], input);
-      m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], output);
-      m_maxNumFactors = std::max(m_maxFactorIdx[0], m_maxFactorIdx[1]) + 1;
-      size_t numScoreComponent = Scan<size_t>(token[3]);
-      string filePath= token[4];
-
-      assert(weightAll.size() >= weightAllOffset + numScoreComponent);
-
-      // weights for this phrase dictionary
-      // first InputScores (if any), then translation scores
-      vector<float> weight;
-
-      if(currDict==0 && (m_inputType == ConfusionNetworkInput || m_inputType == WordLatticeInput)) {
-        // TODO. find what the assumptions made by confusion network about phrase table output which makes
-        // it only work with binrary file. This is a hack
-
-        m_numInputScores=m_parameter->GetParam("weight-i").size();
-        for(unsigned k=0; k<m_numInputScores; ++k)
-          weight.push_back(Scan<float>(m_parameter->GetParam("weight-i")[k]));
-
-        if(m_parameter->GetParam("link-param-count").size())
-          m_numLinkParams = Scan<size_t>(m_parameter->GetParam("link-param-count")[0]);
-
-        //print some info about this interaction:
-        if (m_numLinkParams == m_numInputScores) {
-          VERBOSE(1,"specified equal numbers of link parameters and insertion weights, not using non-epsilon 'real' word link count.\n");
-        } else if ((m_numLinkParams + 1) == m_numInputScores) {
-          VERBOSE(1,"WARN: "<< m_numInputScores << " insertion weights found and only "<< m_numLinkParams << " link parameters specified, applying non-epsilon 'real' word link count for last feature weight.\n");
-        } else {
-          stringstream strme;
-          strme << "You specified " << m_numInputScores
-                << " input weights (weight-i), but you specified " << m_numLinkParams << " link parameters (link-param-count)!";
-          UserMessage::Add(strme.str());
-          return false;
-        }
-
-      }
-      if (!m_inputType) {
-        m_numInputScores=0;
-      }
-      //this number changes depending on what phrase table we're talking about: only 0 has the weights on it
-      size_t tableInputScores = (currDict == 0 ? m_numInputScores : 0);
-
-      for (size_t currScore = 0 ; currScore < numScoreComponent; currScore++)
-        weight.push_back(weightAll[weightAllOffset + currScore]);
-
-
-      if(weight.size() - tableInputScores != numScoreComponent) {
-        stringstream strme;
-        strme << "Your phrase table has " << numScoreComponent
-              << " scores, but you specified " << (weight.size() - tableInputScores) << " weights!";
-        UserMessage::Add(strme.str());
-        return false;
-      }
-
-      weightAllOffset += numScoreComponent;
-      numScoreComponent += tableInputScores;
-
-      string targetPath, alignmentsFile;
-      if (implementation == SuffixArray) {
-        targetPath		= token[5];
-        alignmentsFile= token[6];
-      }
-
-      assert(numScoreComponent==weight.size());
-
-      std::copy(weight.begin(),weight.end(),std::back_inserter(m_allWeights));
-
-      //This is needed for regression testing, but the phrase table
-      //might not really be loading here
-      IFVERBOSE(1)
-      PrintUserTime(string("Start loading PhraseTable ") + filePath);
-      VERBOSE(1,"filePath: " << filePath <<endl);
-
-      PhraseDictionaryFeature* pdf = new PhraseDictionaryFeature(
-        implementation
-        , numScoreComponent
-        , (currDict==0 ? m_numInputScores : 0)
-        , input
-        , output
-        , filePath
-        , weight
-        , maxTargetPhrase[index]
-        , targetPath, alignmentsFile);
-
-      m_phraseDictionary.push_back(pdf);
-
-
-
-
-
-      index++;
-    }
-  }
-
-  IFVERBOSE(1)
-  PrintUserTime("Finished loading phrase tables");
-  return true;
-}
-
-void StaticData::LoadNonTerminals()
-{
-  string defaultNonTerminals;
-
-  if (m_parameter->GetParam("non-terminals").size() == 0) {
-    defaultNonTerminals = "X";
-  } else {
-    vector<std::string> tokens = Tokenize(m_parameter->GetParam("non-terminals")[0]);
-    defaultNonTerminals = tokens[0];
-  }
-
-  FactorCollection &factorCollection = FactorCollection::Instance();
-
-  m_inputDefaultNonTerminal.SetIsNonTerminal(true);
-  const Factor *sourceFactor = factorCollection.AddFactor(Input, 0, defaultNonTerminals);
-  m_inputDefaultNonTerminal.SetFactor(0, sourceFactor);
-
-  m_outputDefaultNonTerminal.SetIsNonTerminal(true);
-  const Factor *targetFactor = factorCollection.AddFactor(Output, 0, defaultNonTerminals);
-  m_outputDefaultNonTerminal.SetFactor(0, targetFactor);
-
-  // for unknwon words
-  if (m_parameter->GetParam("unknown-lhs").size() == 0) {
-    UnknownLHSEntry entry(defaultNonTerminals, 0.0f);
-    m_unknownLHS.push_back(entry);
-  } else {
-    const string &filePath = m_parameter->GetParam("unknown-lhs")[0];
-
-    InputFileStream inStream(filePath);
-    string line;
-    while(getline(inStream, line)) {
-      vector<string> tokens = Tokenize(line);
-      assert(tokens.size() == 2);
-      UnknownLHSEntry entry(tokens[0], Scan<float>(tokens[1]));
-      m_unknownLHS.push_back(entry);
-    }
-
-  }
-
-}
-
-void StaticData::LoadChartDecodingParameters()
-{
-  LoadNonTerminals();
-
-  // source label overlap
-  if (m_parameter->GetParam("source-label-overlap").size() > 0) {
-    m_sourceLabelOverlap = (SourceLabelOverlap) Scan<int>(m_parameter->GetParam("source-label-overlap")[0]);
-  } else {
-    m_sourceLabelOverlap = SourceLabelOverlapAdd;
-  }
-
-  m_ruleLimit = (m_parameter->GetParam("rule-limit").size() > 0)
-                ? Scan<size_t>(m_parameter->GetParam("rule-limit")[0]) : DEFAULT_MAX_TRANS_OPT_SIZE;
-}
-
-void StaticData::LoadPhraseBasedParameters()
-{
-  const vector<string> distortionWeights = m_parameter->GetParam("weight-d");
-  size_t distortionWeightCount = distortionWeights.size();
-  //if there's a lex-reordering model, and no separate weight set, then
-  //take just one of these weights for linear distortion
-  if (!m_parameter->GetParam("weight-lr").size() && m_parameter->GetParam("distortion-file").size()) {
-    distortionWeightCount = 1;
-  }
-  for (size_t i = 0; i < distortionWeightCount; ++i) {
-    float weightDistortion = Scan<float>(distortionWeights[i]);
-    m_distortionScoreProducers.push_back(new DistortionScoreProducer(m_scoreIndexManager));
-    m_allWeights.push_back(weightDistortion);
-  }
-}
-
-bool StaticData::LoadDecodeGraphs()
-{
-  const vector<string> &mappingVector = m_parameter->GetParam("mapping");
-  const vector<size_t> &maxChartSpans = Scan<size_t>(m_parameter->GetParam("max-chart-span"));
-
-  DecodeStep *prev = 0;
-  size_t prevDecodeGraphInd = 0;
-  for(size_t i=0; i<mappingVector.size(); i++) {
-    vector<string>	token		= Tokenize(mappingVector[i]);
-    size_t decodeGraphInd;
-    DecodeType decodeType;
-    size_t index;
-    if (token.size() == 2) {
-      decodeGraphInd = 0;
-      decodeType = token[0] == "T" ? Translate : Generate;
-      index = Scan<size_t>(token[1]);
-    } else if (token.size() == 3) {
-      // For specifying multiple translation model
-      decodeGraphInd = Scan<size_t>(token[0]);
-      //the vectorList index can only increment by one
-      assert(decodeGraphInd == prevDecodeGraphInd || decodeGraphInd == prevDecodeGraphInd + 1);
-      if (decodeGraphInd > prevDecodeGraphInd) {
-        prev = NULL;
-      }
-      decodeType = token[1] == "T" ? Translate : Generate;
-      index = Scan<size_t>(token[2]);
-    } else {
-      UserMessage::Add("Malformed mapping!");
-      assert(false);
-    }
-
-    DecodeStep* decodeStep = NULL;
-    switch (decodeType) {
-    case Translate:
-      if(index>=m_phraseDictionary.size()) {
-        stringstream strme;
-        strme << "No phrase dictionary with index "
-              << index << " available!";
-        UserMessage::Add(strme.str());
-        assert(false);
-      }
-      decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev);
-      break;
-    case Generate:
-      if(index>=m_generationDictionary.size()) {
-        stringstream strme;
-        strme << "No generation dictionary with index "
-              << index << " available!";
-        UserMessage::Add(strme.str());
-        assert(false);
-      }
-      decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev);
-      break;
-    case InsertNullFertilityWord:
-      assert(!"Please implement NullFertilityInsertion.");
-      break;
-    }
-
-    assert(decodeStep);
-    if (m_decodeGraphs.size() < decodeGraphInd + 1) {
-      DecodeGraph *decodeGraph;
-      if (m_searchAlgorithm == ChartDecoding) {
-        size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
-        decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
-      } else {
-        decodeGraph = new DecodeGraph(m_decodeGraphs.size());
-      }
-
-      m_decodeGraphs.push_back(decodeGraph); // TODO max chart span
-    }
-
-    m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
-    prev = decodeStep;
-    prevDecodeGraphInd = decodeGraphInd;
-  }
-
-  // set maximum n-gram size for backoff approach to decoding paths
-  // default is always use subsequent paths (value = 0)
-  for(size_t i=0; i<m_decodeGraphs.size(); i++) {
-    m_decodeGraphBackoff.push_back( 0 );
-  }
-  // if specified, record maxmimum unseen n-gram size
-  const vector<string> &backoffVector = m_parameter->GetParam("decoding-graph-backoff");
-  for(size_t i=0; i<m_decodeGraphs.size() && i<backoffVector.size(); i++) {
-    m_decodeGraphBackoff[i] = Scan<size_t>(backoffVector[i]);
-  }
-
-  return true;
-}
-
-
-void StaticData::SetWeightsForScoreProducer(const ScoreProducer* sp, const std::vector<float>& weights)
-{
-  const size_t id = sp->GetScoreBookkeepingID();
-  const size_t begin = m_scoreIndexManager.GetBeginIndex(id);
-  const size_t end = m_scoreIndexManager.GetEndIndex(id);
-  assert(end - begin == weights.size());
-  if (m_allWeights.size() < end)
-    m_allWeights.resize(end);
-  std::vector<float>::const_iterator weightIter = weights.begin();
-  for (size_t i = begin; i < end; i++)
-    m_allWeights[i] = *weightIter++;
-}
-
-const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const
-{
-  std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
-#ifdef WITH_THREADS
-  boost::mutex::scoped_lock lock(m_transOptCacheMutex);
-#endif
-  std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
-  = m_transOptCache.find(key);
-  if (iter == m_transOptCache.end())
-    return NULL;
-  iter->second.second = clock(); // update last used time
-  return iter->second.first;
-}
-
-void StaticData::ReduceTransOptCache() const
-{
-  if (m_transOptCache.size() <= m_transOptCacheMaxSize) return; // not full
-  clock_t t = clock();
-
-  // find cutoff for last used time
-  priority_queue< clock_t > lastUsedTimes;
-  std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter;
-  iter = m_transOptCache.begin();
-  while( iter != m_transOptCache.end() ) {
-    lastUsedTimes.push( iter->second.second );
-    iter++;
-  }
-  for( size_t i=0; i < lastUsedTimes.size()-m_transOptCacheMaxSize/2; i++ )
-    lastUsedTimes.pop();
-  clock_t cutoffLastUsedTime = lastUsedTimes.top();
-
-  // remove all old entries
-  iter = m_transOptCache.begin();
-  while( iter != m_transOptCache.end() ) {
-    if (iter->second.second < cutoffLastUsedTime) {
-      std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iterRemove = iter++;
-      delete iterRemove->second.first;
-      m_transOptCache.erase(iterRemove);
-    } else iter++;
-  }
-  VERBOSE(2,"Reduced persistent translation option cache in " << ((clock()-t)/(float)CLOCKS_PER_SEC) << " seconds." << std::endl);
-}
-
-void StaticData::AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const
-{
-  if (m_transOptCacheMaxSize == 0) return;
-  std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
-  TranslationOptionList* storedTransOptList = new TranslationOptionList(transOptList);
-#ifdef WITH_THREADS
-  boost::mutex::scoped_lock lock(m_transOptCacheMutex);
-#endif
-  m_transOptCache[key] = make_pair( storedTransOptList, clock() );
-  ReduceTransOptCache();
-}
-
-}
+//   RemoveAllInColl(m_phraseDictionary);
+//   RemoveAllInColl(m_generationDictionary);
+//   RemoveAllInColl(m_reorderModels);
+//   RemoveAllInColl(m_globalLexicalModels);
+//   RemoveAllInColl(m_decodeGraphs);
+//   RemoveAllInColl(m_wordPenaltyProducers);
+//   RemoveAllInColl(m_distortionScoreProducers);
+//   m_languageModel.CleanUp();
+// #ifdef HAVE_SYNLM
+// 	delete m_syntacticLanguageModel;
+// #endif
+
+//   // delete trans opt
+//   map<std::pair<size_t, Phrase>, std::pair< TranslationOptionList*, clock_t > >::iterator iterCache;
+//   for (iterCache = m_transOptCache.begin() ; iterCache != m_transOptCache.end() ; ++iterCache) {
+//     TranslationOptionList *transOptList = iterCache->second.first;
+//     delete transOptList;
+//   }
+
+//   // small score producers
+//   delete m_unknownWordPenaltyProducer;
+
+//   //delete m_parameter;
+
+//   // memory pools
+//   Phrase::FinalizeMemPool();
+
+// }
+
+// #ifdef HAVE_SYNLM
+//   bool StaticData::LoadSyntacticLanguageModel() {
+//     cerr << "Loading syntactic language models..." << std::endl;
+    
+//     const vector<float> weights = Scan<float>(m_parameter->GetParam("weight-slm"));
+//     const vector<string> files = m_parameter->GetParam("slmodel-file");
+    
+//     const FactorType factorType = (m_parameter->GetParam("slmodel-factor").size() > 0) ?
+//       TransformScore(Scan<int>(m_parameter->GetParam("slmodel-factor")[0]))
+//       : 0;
+
+//     const size_t beamWidth = (m_parameter->GetParam("slmodel-beam").size() > 0) ?
+//       TransformScore(Scan<int>(m_parameter->GetParam("slmodel-beam")[0]))
+//       : 500;
+
+//     if (files.size() < 1) {
+//       cerr << "No syntactic language model files specified!" << std::endl;
+//       return false;
+//     }
+
+//     // check if feature is used
+//     if (weights.size() >= 1) {
+
+//       //cout.setf(ios::scientific,ios::floatfield);
+//       //cerr.setf(ios::scientific,ios::floatfield);
+      
+//       // create the feature
+//       m_syntacticLanguageModel = new SyntacticLanguageModel(files,weights,factorType,beamWidth); 
+
+//       /* 
+//       /////////////////////////////////////////
+//       // BEGIN LANE's UNSTABLE EXPERIMENT :)
+//       //
+
+//       double ppl = m_syntacticLanguageModel->perplexity();
+//       cerr << "Probability is " << ppl << endl;
+
+
+//       //
+//       // END LANE's UNSTABLE EXPERIMENT
+//       /////////////////////////////////////////
+//       */
+
+
+//       if (m_syntacticLanguageModel==NULL) {
+// 	return false;
+//       }
+
+//     }
+    
+//     return true;
+
+//   }
+// #endif
+
+// bool StaticData::LoadLexicalReorderingModel()
+// {
+//   VERBOSE(1, "Loading lexical distortion models...");
+//   const vector<string> fileStr    = m_parameter->GetParam("distortion-file");
+//   bool hasWeightlr = (m_parameter->GetParam("weight-lr").size() != 0);
+//   vector<string> weightsStr;
+//   if (hasWeightlr) {
+//     weightsStr = m_parameter->GetParam("weight-lr");
+//   } else {
+//     weightsStr = m_parameter->GetParam("weight-d");
+//   }
+
+//   std::vector<float>   weights;
+//   size_t w = 1; //cur weight
+//   if (hasWeightlr) {
+//     w = 0; // if reading from weight-lr, don't have to count first as distortion penalty
+//   }
+//   size_t f = 0; //cur file
+//   //get weights values
+//   VERBOSE(1, "have " << fileStr.size() << " models" << std::endl);
+//   for(size_t j = 0; j < weightsStr.size(); ++j) {
+//     weights.push_back(Scan<float>(weightsStr[j]));
+//   }
+//   //load all models
+//   for(size_t i = 0; i < fileStr.size(); ++i) {
+//     vector<string> spec = Tokenize<string>(fileStr[f], " ");
+//     ++f; //mark file as consumed
+//     if(spec.size() != 4) {
+//       UserMessage::Add("Invalid Lexical Reordering Model Specification: " + fileStr[f]);
+//       return false;
+//     }
+
+//     // spec[0] = factor map
+//     // spec[1] = name
+//     // spec[2] = num weights
+//     // spec[3] = fileName
+
+//     // decode factor map
+
+//     vector<FactorType> input, output;
+//     vector<string> inputfactors = Tokenize(spec[0],"-");
+//     if(inputfactors.size() == 2) {
+//       input  = Tokenize<FactorType>(inputfactors[0],",");
+//       output = Tokenize<FactorType>(inputfactors[1],",");
+//     } else if(inputfactors.size() == 1) {
+//       //if there is only one side assume it is on e side... why?
+//       output = Tokenize<FactorType>(inputfactors[0],",");
+//     } else {
+//       //format error
+//       return false;
+//     }
+
+//     string modelType = spec[1];
+
+//     // decode num weights and fetch weights from array
+//     std::vector<float> mweights;
+//     size_t numWeights = atoi(spec[2].c_str());
+//     for(size_t k = 0; k < numWeights; ++k, ++w) {
+//       if(w >= weights.size()) {
+//         UserMessage::Add("Lexicalized distortion model: Not enough weights, add to [weight-d]");
+//         return false;
+//       } else
+//         mweights.push_back(weights[w]);
+//     }
+
+//     string filePath = spec[3];
+
+//     m_reorderModels.push_back(new LexicalReordering(input, output, modelType, filePath, mweights));
+//   }
+//   return true;
+// }
+
+// bool StaticData::LoadGlobalLexicalModel()
+// {
+//   const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-lex"));
+//   const vector<string> &file = m_parameter->GetParam("global-lexical-file");
+
+//   if (weight.size() != file.size()) {
+//     std::cerr << "number of weights and models for the global lexical model does not match ("
+//               << weight.size() << " != " << file.size() << ")" << std::endl;
+//     return false;
+//   }
+
+//   for (size_t i = 0; i < weight.size(); i++ ) {
+//     vector<string> spec = Tokenize<string>(file[i], " ");
+//     if ( spec.size() != 2 ) {
+//       std::cerr << "wrong global lexical model specification: " << file[i] << endl;
+//       return false;
+//     }
+//     vector< string > factors = Tokenize(spec[0],"-");
+//     if ( factors.size() != 2 ) {
+//       std::cerr << "wrong factor definition for global lexical model: " << spec[0] << endl;
+//       return false;
+//     }
+//     vector<FactorType> inputFactors = Tokenize<FactorType>(factors[0],",");
+//     vector<FactorType> outputFactors = Tokenize<FactorType>(factors[1],",");
+//     m_globalLexicalModels.push_back( new GlobalLexicalModel( spec[1], weight[i], inputFactors, outputFactors ) );
+//   }
+//   return true;
+// }
+
+// bool StaticData::LoadLanguageModels()
+// {
+//   if (m_parameter->GetParam("lmodel-file").size() > 0) {
+//     // weights
+//     vector<float> weightAll = Scan<float>(m_parameter->GetParam("weight-l"));
+
+//     for (size_t i = 0 ; i < weightAll.size() ; i++) {
+//       m_allWeights.push_back(weightAll[i]);
+//     }
+
+//     // dictionary upper-bounds fo all IRST LMs
+//     vector<int> LMdub = Scan<int>(m_parameter->GetParam("lmodel-dub"));
+//     if (m_parameter->GetParam("lmodel-dub").size() == 0) {
+//       for(size_t i=0; i<m_parameter->GetParam("lmodel-file").size(); i++)
+//         LMdub.push_back(0);
+//     }
+
+//     // initialize n-gram order for each factor. populated only by factored lm
+//     const vector<string> &lmVector = m_parameter->GetParam("lmodel-file");
+//     //prevent language models from being loaded twice
+//     map<string,LanguageModel*> languageModelsLoaded;
+
+//     for(size_t i=0; i<lmVector.size(); i++) {
+//       LanguageModel* lm = NULL;
+//       if (languageModelsLoaded.find(lmVector[i]) != languageModelsLoaded.end()) {
+//         lm = new LanguageModel(m_scoreIndexManager, languageModelsLoaded[lmVector[i]]);
+//       } else {
+//         vector<string>	token		= Tokenize(lmVector[i]);
+//         if (token.size() != 4 && token.size() != 5 ) {
+//           UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
+//           return false;
+//         }
+//         // type = implementation, SRI, IRST etc
+//         LMImplementation lmImplementation = static_cast<LMImplementation>(Scan<int>(token[0]));
+
+//         // factorType = 0 = Surface, 1 = POS, 2 = Stem, 3 = Morphology, etc
+//         vector<FactorType> 	factorTypes		= Tokenize<FactorType>(token[1], ",");
+
+//         // nGramOrder = 2 = bigram, 3 = trigram, etc
+//         size_t nGramOrder = Scan<int>(token[2]);
+
+//         string &languageModelFile = token[3];
+//         if (token.size() == 5) {
+//           if (lmImplementation==IRST)
+//             languageModelFile += " " + token[4];
+//           else {
+//             UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
+//             return false;
+//           }
+//         }
+//         IFVERBOSE(1)
+//         PrintUserTime(string("Start loading LanguageModel ") + languageModelFile);
+
+//         lm = LanguageModelFactory::CreateLanguageModel(
+//                lmImplementation
+//                , factorTypes
+//                , nGramOrder
+//                , languageModelFile
+//                , m_scoreIndexManager
+//                , LMdub[i]);
+//         if (lm == NULL) {
+//           UserMessage::Add("no LM created. We probably don't have it compiled");
+//           return false;
+//         }
+//         languageModelsLoaded[lmVector[i]] = lm;
+//       }
+
+//       m_languageModel.Add(lm);
+//     }
+//   }
+//   // flag indicating that language models were loaded,
+//   // since phrase table loading requires their presence
+//   m_fLMsLoaded = true;
+//   IFVERBOSE(1)
+//   PrintUserTime("Finished loading LanguageModels");
+//   return true;
+// }
+
+// bool StaticData::LoadGenerationTables()
+// {
+//   if (m_parameter->GetParam("generation-file").size() > 0) {
+//     const vector<string> &generationVector = m_parameter->GetParam("generation-file");
+//     const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-generation"));
+
+//     IFVERBOSE(1) {
+//       TRACE_ERR( "weight-generation: ");
+//       for (size_t i = 0 ; i < weight.size() ; i++) {
+//         TRACE_ERR( weight[i] << "\t");
+//       }
+//       TRACE_ERR(endl);
+//     }
+//     size_t currWeightNum = 0;
+
+//     for(size_t currDict = 0 ; currDict < generationVector.size(); currDict++) {
+//       vector<string>			token		= Tokenize(generationVector[currDict]);
+//       vector<FactorType> 	input		= Tokenize<FactorType>(token[0], ",")
+//                                     ,output	= Tokenize<FactorType>(token[1], ",");
+//       m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], input, output);
+//       string							filePath;
+//       size_t							numFeatures;
+
+//       numFeatures = Scan<size_t>(token[2]);
+//       filePath = token[3];
+
+//       if (!FileExists(filePath) && FileExists(filePath + ".gz")) {
+//         filePath += ".gz";
+//       }
+
+//       VERBOSE(1, filePath << endl);
+
+//       m_generationDictionary.push_back(new GenerationDictionary(numFeatures, m_scoreIndexManager, input,output));
+//       assert(m_generationDictionary.back() && "could not create GenerationDictionary");
+//       if (!m_generationDictionary.back()->Load(filePath, Output)) {
+//         delete m_generationDictionary.back();
+//         return false;
+//       }
+//       for(size_t i = 0; i < numFeatures; i++) {
+//         assert(currWeightNum < weight.size());
+//         m_allWeights.push_back(weight[currWeightNum++]);
+//       }
+//     }
+//     if (currWeightNum != weight.size()) {
+//       TRACE_ERR( "  [WARNING] config file has " << weight.size() << " generation weights listed, but the configuration for generation files indicates there should be " << currWeightNum << "!\n");
+//     }
+//   }
+
+//   return true;
+// }
+
+// /* Doesn't load phrase tables any more. Just creates the features. */
+// bool StaticData::LoadPhraseTables()
+// {
+//   VERBOSE(2,"Creating phrase table features" << endl);
+
+//   // language models must be loaded prior to loading phrase tables
+//   assert(m_fLMsLoaded);
+//   // load phrase translation tables
+//   if (m_parameter->GetParam("ttable-file").size() > 0) {
+//     // weights
+//     vector<float> weightAll									= Scan<float>(m_parameter->GetParam("weight-t"));
+
+//     const vector<string> &translationVector = m_parameter->GetParam("ttable-file");
+//     vector<size_t>	maxTargetPhrase					= Scan<size_t>(m_parameter->GetParam("ttable-limit"));
+
+//     if(maxTargetPhrase.size() == 1 && translationVector.size() > 1) {
+//       VERBOSE(1, "Using uniform ttable-limit of " << maxTargetPhrase[0] << " for all translation tables." << endl);
+//       for(size_t i = 1; i < translationVector.size(); i++)
+//         maxTargetPhrase.push_back(maxTargetPhrase[0]);
+//     } else if(maxTargetPhrase.size() != 1 && maxTargetPhrase.size() < translationVector.size()) {
+//       stringstream strme;
+//       strme << "You specified " << translationVector.size() << " translation tables, but only " << maxTargetPhrase.size() << " ttable-limits.";
+//       UserMessage::Add(strme.str());
+//       return false;
+//     }
+
+//     size_t index = 0;
+//     size_t weightAllOffset = 0;
+//     bool oldFileFormat = false;
+//     for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) {
+//       vector<string>                  token           = Tokenize(translationVector[currDict]);
+
+//       if(currDict == 0 && token.size() == 4) {
+//         VERBOSE(1, "Warning: Phrase table specification in old 4-field format. Assuming binary phrase tables (type 1)!" << endl);
+//         oldFileFormat = true;
+//       }
+
+//       if((!oldFileFormat && token.size() < 5) || (oldFileFormat && token.size() != 4)) {
+//         UserMessage::Add("invalid phrase table specification");
+//         return false;
+//       }
+
+//       PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(token[0]);
+//       if(oldFileFormat) {
+//         token.push_back(token[3]);
+//         token[3] = token[2];
+//         token[2] = token[1];
+//         token[1] = token[0];
+//         token[0] = "1";
+//         implementation = Binary;
+//       } else
+//         implementation = (PhraseTableImplementation) Scan<int>(token[0]);
+
+//       assert(token.size() >= 5);
+//       //characteristics of the phrase table
+
+//       vector<FactorType>  input		= Tokenize<FactorType>(token[1], ",")
+//                                     ,output = Tokenize<FactorType>(token[2], ",");
+//       m_maxFactorIdx[0] = CalcMax(m_maxFactorIdx[0], input);
+//       m_maxFactorIdx[1] = CalcMax(m_maxFactorIdx[1], output);
+//       m_maxNumFactors = std::max(m_maxFactorIdx[0], m_maxFactorIdx[1]) + 1;
+//       size_t numScoreComponent = Scan<size_t>(token[3]);
+//       string filePath= token[4];
+
+//       assert(weightAll.size() >= weightAllOffset + numScoreComponent);
+
+//       // weights for this phrase dictionary
+//       // first InputScores (if any), then translation scores
+//       vector<float> weight;
+
+//       if(currDict==0 && (m_inputType == ConfusionNetworkInput || m_inputType == WordLatticeInput)) {
+//         // TODO. find what the assumptions made by confusion network about phrase table output which makes
+//         // it only work with binrary file. This is a hack
+
+//         m_numInputScores=m_parameter->GetParam("weight-i").size();
+//         for(unsigned k=0; k<m_numInputScores; ++k)
+//           weight.push_back(Scan<float>(m_parameter->GetParam("weight-i")[k]));
+
+//         if(m_parameter->GetParam("link-param-count").size())
+//           m_numLinkParams = Scan<size_t>(m_parameter->GetParam("link-param-count")[0]);
+
+//         //print some info about this interaction:
+//         if (m_numLinkParams == m_numInputScores) {
+//           VERBOSE(1,"specified equal numbers of link parameters and insertion weights, not using non-epsilon 'real' word link count.\n");
+//         } else if ((m_numLinkParams + 1) == m_numInputScores) {
+//           VERBOSE(1,"WARN: "<< m_numInputScores << " insertion weights found and only "<< m_numLinkParams << " link parameters specified, applying non-epsilon 'real' word link count for last feature weight.\n");
+//         } else {
+//           stringstream strme;
+//           strme << "You specified " << m_numInputScores
+//                 << " input weights (weight-i), but you specified " << m_numLinkParams << " link parameters (link-param-count)!";
+//           UserMessage::Add(strme.str());
+//           return false;
+//         }
+
+//       }
+//       if (!m_inputType) {
+//         m_numInputScores=0;
+//       }
+//       //this number changes depending on what phrase table we're talking about: only 0 has the weights on it
+//       size_t tableInputScores = (currDict == 0 ? m_numInputScores : 0);
+
+//       for (size_t currScore = 0 ; currScore < numScoreComponent; currScore++)
+//         weight.push_back(weightAll[weightAllOffset + currScore]);
+
+
+//       if(weight.size() - tableInputScores != numScoreComponent) {
+//         stringstream strme;
+//         strme << "Your phrase table has " << numScoreComponent
+//               << " scores, but you specified " << (weight.size() - tableInputScores) << " weights!";
+//         UserMessage::Add(strme.str());
+//         return false;
+//       }
+
+//       weightAllOffset += numScoreComponent;
+//       numScoreComponent += tableInputScores;
+
+//       string targetPath, alignmentsFile;
+//       if (implementation == SuffixArray) {
+//         targetPath		= token[5];
+//         alignmentsFile= token[6];
+//       }
+
+//       assert(numScoreComponent==weight.size());
+
+//       std::copy(weight.begin(),weight.end(),std::back_inserter(m_allWeights));
+
+//       //This is needed for regression testing, but the phrase table
+//       //might not really be loading here
+//       IFVERBOSE(1)
+//       PrintUserTime(string("Start loading PhraseTable ") + filePath);
+//       VERBOSE(1,"filePath: " << filePath <<endl);
+
+//       PhraseDictionaryFeature* pdf = new PhraseDictionaryFeature(
+//         implementation
+//         , numScoreComponent
+//         , (currDict==0 ? m_numInputScores : 0)
+//         , input
+//         , output
+//         , filePath
+//         , weight
+//         , maxTargetPhrase[index]
+//         , targetPath, alignmentsFile);
+
+//       m_phraseDictionary.push_back(pdf);
+
+
+
+
+
+//       index++;
+//     }
+//   }
+
+//   IFVERBOSE(1)
+//   PrintUserTime("Finished loading phrase tables");
+//   return true;
+// }
+
+// void StaticData::LoadNonTerminals()
+// {
+//   string defaultNonTerminals;
+
+//   if (m_parameter->GetParam("non-terminals").size() == 0) {
+//     defaultNonTerminals = "X";
+//   } else {
+//     vector<std::string> tokens = Tokenize(m_parameter->GetParam("non-terminals")[0]);
+//     defaultNonTerminals = tokens[0];
+//   }
+
+//   FactorCollection &factorCollection = FactorCollection::Instance();
+
+//   m_inputDefaultNonTerminal.SetIsNonTerminal(true);
+//   const Factor *sourceFactor = factorCollection.AddFactor(Input, 0, defaultNonTerminals);
+//   m_inputDefaultNonTerminal.SetFactor(0, sourceFactor);
+
+//   m_outputDefaultNonTerminal.SetIsNonTerminal(true);
+//   const Factor *targetFactor = factorCollection.AddFactor(Output, 0, defaultNonTerminals);
+//   m_outputDefaultNonTerminal.SetFactor(0, targetFactor);
+
+//   // for unknwon words
+//   if (m_parameter->GetParam("unknown-lhs").size() == 0) {
+//     UnknownLHSEntry entry(defaultNonTerminals, 0.0f);
+//     m_unknownLHS.push_back(entry);
+//   } else {
+//     const string &filePath = m_parameter->GetParam("unknown-lhs")[0];
+
+//     InputFileStream inStream(filePath);
+//     string line;
+//     while(getline(inStream, line)) {
+//       vector<string> tokens = Tokenize(line);
+//       assert(tokens.size() == 2);
+//       UnknownLHSEntry entry(tokens[0], Scan<float>(tokens[1]));
+//       m_unknownLHS.push_back(entry);
+//     }
+
+//   }
+
+// }
+
+// void StaticData::LoadChartDecodingParameters()
+// {
+//   LoadNonTerminals();
+
+//   // source label overlap
+//   if (m_parameter->GetParam("source-label-overlap").size() > 0) {
+//     m_sourceLabelOverlap = (SourceLabelOverlap) Scan<int>(m_parameter->GetParam("source-label-overlap")[0]);
+//   } else {
+//     m_sourceLabelOverlap = SourceLabelOverlapAdd;
+//   }
+
+//   m_ruleLimit = (m_parameter->GetParam("rule-limit").size() > 0)
+//                 ? Scan<size_t>(m_parameter->GetParam("rule-limit")[0]) : DEFAULT_MAX_TRANS_OPT_SIZE;
+// }
+
+// void StaticData::LoadPhraseBasedParameters()
+// {
+//   const vector<string> distortionWeights = m_parameter->GetParam("weight-d");
+//   size_t distortionWeightCount = distortionWeights.size();
+//   //if there's a lex-reordering model, and no separate weight set, then
+//   //take just one of these weights for linear distortion
+//   if (!m_parameter->GetParam("weight-lr").size() && m_parameter->GetParam("distortion-file").size()) {
+//     distortionWeightCount = 1;
+//   }
+//   for (size_t i = 0; i < distortionWeightCount; ++i) {
+//     float weightDistortion = Scan<float>(distortionWeights[i]);
+//     m_distortionScoreProducers.push_back(new DistortionScoreProducer(m_scoreIndexManager));
+//     m_allWeights.push_back(weightDistortion);
+//   }
+// }
+
+// bool StaticData::LoadDecodeGraphs()
+// {
+//   const vector<string> &mappingVector = m_parameter->GetParam("mapping");
+//   const vector<size_t> &maxChartSpans = Scan<size_t>(m_parameter->GetParam("max-chart-span"));
+
+//   DecodeStep *prev = 0;
+//   size_t prevDecodeGraphInd = 0;
+//   for(size_t i=0; i<mappingVector.size(); i++) {
+//     vector<string>	token		= Tokenize(mappingVector[i]);
+//     size_t decodeGraphInd;
+//     DecodeType decodeType;
+//     size_t index;
+//     if (token.size() == 2) {
+//       decodeGraphInd = 0;
+//       decodeType = token[0] == "T" ? Translate : Generate;
+//       index = Scan<size_t>(token[1]);
+//     } else if (token.size() == 3) {
+//       // For specifying multiple translation model
+//       decodeGraphInd = Scan<size_t>(token[0]);
+//       //the vectorList index can only increment by one
+//       assert(decodeGraphInd == prevDecodeGraphInd || decodeGraphInd == prevDecodeGraphInd + 1);
+//       if (decodeGraphInd > prevDecodeGraphInd) {
+//         prev = NULL;
+//       }
+//       decodeType = token[1] == "T" ? Translate : Generate;
+//       index = Scan<size_t>(token[2]);
+//     } else {
+//       UserMessage::Add("Malformed mapping!");
+//       assert(false);
+//     }
+
+//     DecodeStep* decodeStep = NULL;
+//     switch (decodeType) {
+//     case Translate:
+//       if(index>=m_phraseDictionary.size()) {
+//         stringstream strme;
+//         strme << "No phrase dictionary with index "
+//               << index << " available!";
+//         UserMessage::Add(strme.str());
+//         assert(false);
+//       }
+//       decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev);
+//       break;
+//     case Generate:
+//       if(index>=m_generationDictionary.size()) {
+//         stringstream strme;
+//         strme << "No generation dictionary with index "
+//               << index << " available!";
+//         UserMessage::Add(strme.str());
+//         assert(false);
+//       }
+//       decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev);
+//       break;
+//     case InsertNullFertilityWord:
+//       assert(!"Please implement NullFertilityInsertion.");
+//       break;
+//     }
+
+//     assert(decodeStep);
+//     if (m_decodeGraphs.size() < decodeGraphInd + 1) {
+//       DecodeGraph *decodeGraph;
+//       if (m_searchAlgorithm == ChartDecoding) {
+//         size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
+//         decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
+//       } else {
+//         decodeGraph = new DecodeGraph(m_decodeGraphs.size());
+//       }
+
+//       m_decodeGraphs.push_back(decodeGraph); // TODO max chart span
+//     }
+
+//     m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
+//     prev = decodeStep;
+//     prevDecodeGraphInd = decodeGraphInd;
+//   }
+
+//   // set maximum n-gram size for backoff approach to decoding paths
+//   // default is always use subsequent paths (value = 0)
+//   for(size_t i=0; i<m_decodeGraphs.size(); i++) {
+//     m_decodeGraphBackoff.push_back( 0 );
+//   }
+//   // if specified, record maxmimum unseen n-gram size
+//   const vector<string> &backoffVector = m_parameter->GetParam("decoding-graph-backoff");
+//   for(size_t i=0; i<m_decodeGraphs.size() && i<backoffVector.size(); i++) {
+//     m_decodeGraphBackoff[i] = Scan<size_t>(backoffVector[i]);
+//   }
+
+//   return true;
+// }
+
+
+// void StaticData::SetWeightsForScoreProducer(const ScoreProducer* sp, const std::vector<float>& weights)
+// {
+//   const size_t id = sp->GetScoreBookkeepingID();
+//   const size_t begin = m_scoreIndexManager.GetBeginIndex(id);
+//   const size_t end = m_scoreIndexManager.GetEndIndex(id);
+//   assert(end - begin == weights.size());
+//   if (m_allWeights.size() < end)
+//     m_allWeights.resize(end);
+//   std::vector<float>::const_iterator weightIter = weights.begin();
+//   for (size_t i = begin; i < end; i++)
+//     m_allWeights[i] = *weightIter++;
+// }
+
+// const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const
+// {
+//   std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
+// #ifdef WITH_THREADS
+//   boost::mutex::scoped_lock lock(m_transOptCacheMutex);
+// #endif
+//   std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
+//   = m_transOptCache.find(key);
+//   if (iter == m_transOptCache.end())
+//     return NULL;
+//   iter->second.second = clock(); // update last used time
+//   return iter->second.first;
+// }
+
+// void StaticData::ReduceTransOptCache() const
+// {
+//   if (m_transOptCache.size() <= m_transOptCacheMaxSize) return; // not full
+//   clock_t t = clock();
+
+//   // find cutoff for last used time
+//   priority_queue< clock_t > lastUsedTimes;
+//   std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter;
+//   iter = m_transOptCache.begin();
+//   while( iter != m_transOptCache.end() ) {
+//     lastUsedTimes.push( iter->second.second );
+//     iter++;
+//   }
+//   for( size_t i=0; i < lastUsedTimes.size()-m_transOptCacheMaxSize/2; i++ )
+//     lastUsedTimes.pop();
+//   clock_t cutoffLastUsedTime = lastUsedTimes.top();
+
+//   // remove all old entries
+//   iter = m_transOptCache.begin();
+//   while( iter != m_transOptCache.end() ) {
+//     if (iter->second.second < cutoffLastUsedTime) {
+//       std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iterRemove = iter++;
+//       delete iterRemove->second.first;
+//       m_transOptCache.erase(iterRemove);
+//     } else iter++;
+//   }
+//   VERBOSE(2,"Reduced persistent translation option cache in " << ((clock()-t)/(float)CLOCKS_PER_SEC) << " seconds." << std::endl);
+// }
+
+// void StaticData::AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const
+// {
+//   if (m_transOptCacheMaxSize == 0) return;
+//   std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
+//   TranslationOptionList* storedTransOptList = new TranslationOptionList(transOptList);
+// #ifdef WITH_THREADS
+//   boost::mutex::scoped_lock lock(m_transOptCacheMutex);
+// #endif
+//   m_transOptCache[key] = make_pair( storedTransOptList, clock() );
+//   ReduceTransOptCache();
+// }
+
+// }
 
 
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index b3ea80a60..0d46d9531 100644
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -63,6 +63,9 @@ class GenerationDictionary;
 class DistortionScoreProducer;
 class DecodeStep;
 class UnknownWordPenaltyProducer;
+#ifdef HAVE_SYNLM
+class SyntacticLanguageModel;
+#endif
 class TranslationSystem;
 
 typedef std::pair<std::string, float> UnknownLHSEntry;
@@ -95,6 +98,11 @@ protected:
   m_earlyDiscardingThreshold,
   m_translationOptionThreshold,
   m_wordDeletionWeight;
+#ifdef HAVE_SYNLM
+	SyntacticLanguageModel* m_syntacticLanguageModel;
+#endif
+
+
 
   // PhraseTrans, Generation & LanguageModelScore has multiple weights.
   int				m_maxDistortion;
@@ -205,10 +213,16 @@ protected:
   void LoadChartDecodingParameters();
   void LoadNonTerminals();
 
+
   //! helper fn to set bool param from ini file/command line
   void SetBooleanParameter(bool *paramter, std::string parameterName, bool defaultValue);
   //! load all language models as specified in ini file
   bool LoadLanguageModels();
+#ifdef HAVE_SYNLM
+  //! load syntactic language model
+	bool LoadSyntacticLanguageModel();
+#endif
+
   //! load not only the main phrase table but also any auxiliary tables that depend on which features are being used (e.g., word-deletion, word-insertion tables)
   bool LoadPhraseTables();
   //! load all generation tables as specified in ini file
@@ -220,6 +234,7 @@ protected:
   void ReduceTransOptCache() const;
   bool m_continuePartialTranslation;
 
+
 public:
 
   bool IsAlwaysCreateDirectTranslationOption() const {
diff --git a/moses/src/SyntacticLanguageModel.cpp b/moses/src/SyntacticLanguageModel.cpp
new file mode 100755
index 000000000..85c19bdc0
--- /dev/null
+++ b/moses/src/SyntacticLanguageModel.cpp
@@ -0,0 +1,123 @@
+//
+
+#include "StaticData.h"
+#include "SyntacticLanguageModel.h"
+#include "HHMMLangModel-gf.h"
+#include "TextObsModel.h"
+#include "SyntacticLanguageModelFiles.h"
+#include "SyntacticLanguageModelState.h"
+
+
+namespace Moses
+{
+  //  asnteousntaoheisnthaoesntih
+  SyntacticLanguageModel::SyntacticLanguageModel(const std::vector<std::string>& filePath,
+						 const std::vector<float>& weights,
+						 const FactorType factorType,
+						 size_t beamWidth) 
+    // Initialize member variables  
+  : m_NumScoreComponents(weights.size())
+  , m_beamWidth(beamWidth)
+  , m_factorType(factorType)
+  , m_files(new SyntacticLanguageModelFiles<YModel,XModel>(filePath)) {
+
+    // Inform Moses score manager of this feature and its weight(s)
+    const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
+    const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
+    VERBOSE(3,"Constructed SyntacticLanguageModel" << endl);
+  }
+
+  SyntacticLanguageModel::~SyntacticLanguageModel() {
+    VERBOSE(3,"Destructing SyntacticLanguageModel" << std::endl);
+    //    delete m_files;
+  }
+
+  size_t SyntacticLanguageModel::GetNumScoreComponents() const {
+    return m_NumScoreComponents;
+  }
+
+  std::string SyntacticLanguageModel::GetScoreProducerDescription() const {
+    return "Syntactic Language Model";
+  }
+
+  std::string SyntacticLanguageModel::GetScoreProducerWeightShortName() const {
+    return "slm";
+  }
+
+  const FFState* SyntacticLanguageModel::EmptyHypothesisState(const InputType &input) const {
+
+    return new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth);
+
+  }
+
+  /*
+  double SyntacticLanguageModel::perplexity() {
+
+    SyntacticLanguageModelState<YModel,XModel,S,R> *prev = 
+      new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth);
+
+    std::cerr << "Initial prob:" << "\t" << prev->getProb() <<std::endl;
+
+
+    std::vector<std::string> words(3);
+    words[0] = "no";
+    words[1] = ",";
+    words[2] = "zxvth";
+
+
+    for (std::vector<std::string>::iterator i=words.begin();
+	 i != words.end();
+	 i++) {
+
+      prev = new SyntacticLanguageModelState<YModel,XModel,S,R>(prev, *i);
+      std::cerr << *i << "\t" << prev->getProb() <<std::endl;
+
+    }
+
+    if (true) exit(-1);
+
+    return prev->getProb();
+
+  }
+  */
+  FFState* SyntacticLanguageModel::Evaluate(const Hypothesis& cur_hypo,
+		    const FFState* prev_state,
+		    ScoreComponentCollection* accumulator) const {
+
+    VERBOSE(3,"Evaluating SyntacticLanguageModel for a hypothesis" << endl);
+
+    const SyntacticLanguageModelState<YModel,XModel,S,R>& prev =
+      static_cast<const SyntacticLanguageModelState<YModel,XModel,S,R>&>(*prev_state);
+
+    const SyntacticLanguageModelState<YModel,XModel,S,R>* currentState = &prev;
+    SyntacticLanguageModelState<YModel,XModel,S,R>* nextState = NULL;
+  
+
+    const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
+
+    for (size_t i=0, n=targetPhrase.GetSize(); i<n; i++) {
+      
+      const Word& word = targetPhrase.GetWord(i);
+      const Factor* factor = word.GetFactor(m_factorType);
+      
+      const std::string& string = factor->GetString();
+      
+      if (i==0) {
+	nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>(&prev, string);
+      } else {
+	currentState = nextState;
+	nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>(currentState, string);
+      }
+      
+      double score = nextState->getScore();
+      VERBOSE(3,"SynLM evaluated a score of " << score << endl);
+      accumulator->Assign( this, score );
+    }
+
+  
+
+    return nextState;
+
+  }
+
+}
diff --git a/moses/src/SyntacticLanguageModel.h b/moses/src/SyntacticLanguageModel.h
new file mode 100755
index 000000000..977a57680
--- /dev/null
+++ b/moses/src/SyntacticLanguageModel.h
@@ -0,0 +1,52 @@
+//
+
+#ifndef moses_SyntacticLanguageModel_h
+#define moses_SyntacticLanguageModel_h
+
+#include "FeatureFunction.h"
+
+
+class YModel; // hidden model
+class XModel; // observed model
+
+namespace Moses
+{
+
+  template <class MH, class MO> class SyntacticLanguageModelFiles;
+  
+  class SyntacticLanguageModel : public StatefulFeatureFunction {
+
+  public:
+
+    SyntacticLanguageModel(const std::vector<std::string>& filePaths,
+			   const std::vector<float>& weights,
+			   const FactorType factorType,
+			   const size_t beamWidth);
+
+    ~SyntacticLanguageModel();
+
+    size_t GetNumScoreComponents() const;
+    std::string GetScoreProducerDescription() const;
+    std::string GetScoreProducerWeightShortName() const;
+
+    const FFState* EmptyHypothesisState(const InputType &input) const;
+
+    FFState* Evaluate(const Hypothesis& cur_hypo,
+		      const FFState* prev_state,
+		      ScoreComponentCollection* accumulator) const;
+
+    //    double perplexity();
+
+  private:
+
+    const size_t m_NumScoreComponents;
+    SyntacticLanguageModelFiles<YModel,XModel>* m_files;
+    const FactorType m_factorType;
+    const size_t m_beamWidth;
+
+  };
+
+
+}
+
+#endif
diff --git a/moses/src/SyntacticLanguageModelFiles.h b/moses/src/SyntacticLanguageModelFiles.h
new file mode 100755
index 000000000..318e22636
--- /dev/null
+++ b/moses/src/SyntacticLanguageModelFiles.h
@@ -0,0 +1,95 @@
+//
+
+#ifndef moses_SyntacticLanguageModelFiles_h
+#define moses_SyntacticLanguageModelFiles_h
+
+#include "nl-iomacros.h"
+#include "nl-string.h"
+
+namespace Moses
+{
+
+template <class MH, class MO> 
+class SyntacticLanguageModelFiles {
+
+ public:
+
+  SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths);
+  ~SyntacticLanguageModelFiles();
+  
+  MH* getHiddenModel();
+  MO* getObservedModel();
+
+ private:
+  MH* hiddenModel;
+  MO* observedModel;
+  
+};
+
+
+template <class MH, class MO>
+  SyntacticLanguageModelFiles<MH,MO>::SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths) {
+
+  this->hiddenModel = new MH();
+  this->observedModel = new MO();
+  
+  //// I. LOAD MODELS...
+  std::cerr << "Reading syntactic language model files...\n";
+  // For each model file...
+  for ( int a=0, n=filePaths.size(); a<n; a++ ) {                                           // read models
+    FILE* pf = fopen(filePaths[a].c_str(),"r"); //assert(pf);                                   // Read model file
+    if(!pf){
+      std::cerr << "Error loading model file " << filePaths[a] << std::endl;
+      return;
+    }
+    std::cerr << "Loading model \'" << filePaths[a] << "\'...\n";
+    int c=' '; int i=0; int line=1; String sBuff(1000);                          // Lookahead/ctrs/buffers
+    CONSUME_ALL ( pf, c, WHITESPACE(c), line);                                   // Get to first record
+    while ( c!=-1 && c!='\0' && c!='\5' ) {                                      // For each record
+      CONSUME_STR ( pf, c, (c!='\n' && c!='\0' && c!='\5'), sBuff, i, line );    //   Consume line
+      StringInput si(sBuff.c_array());
+      if ( !( sBuff[0]=='#'                                                   //   Accept comments/fields
+			  ||  si>>*(this->hiddenModel)>>"\0"!=NULL 
+			  ||  si>>*(this->observedModel)>>"\0"!=NULL
+			  ))								   
+	std::cerr<<"\nERROR: can't parse \'"<<sBuff<<"\' in line "<<line<<"\n\n";
+      CONSUME_ALL ( pf, c, WHITESPACE(c), line);                                 //   Consume whitespace
+      if ( line%100000==0 ) std::cerr<<"  "<<line<<" lines read...\n";                //   Progress for big models
+    }
+    std::cerr << "Model \'" << filePaths[a] << "\' loaded.\n";
+  }
+
+  std::cerr << "...reading syntactic language model files completed\n";
+
+
+}
+
+
+template <class MH, class MO>
+  SyntacticLanguageModelFiles<MH,MO>::~SyntacticLanguageModelFiles() {
+
+  std::cerr<<"Destructing syntactic language model files" << std::endl;
+  //delete hiddenModel;
+  //delete observedModel;
+
+}
+
+
+template <class MH, class MO>
+  MH* SyntacticLanguageModelFiles<MH,MO>::getHiddenModel() {
+ 
+  return this->hiddenModel;
+
+}
+
+template <class MH, class MO>
+  MO* SyntacticLanguageModelFiles<MH,MO>::getObservedModel() {
+ 
+  return this->observedModel;
+
+}
+
+
+}
+
+#endif
diff --git a/moses/src/SyntacticLanguageModelState.h b/moses/src/SyntacticLanguageModelState.h
new file mode 100755
index 000000000..0877a59b3
--- /dev/null
+++ b/moses/src/SyntacticLanguageModelState.h
@@ -0,0 +1,303 @@
+//
+
+#ifndef moses_SyntacticLanguageModelState_h
+#define moses_SyntacticLanguageModelState_h
+
+#include "nl-iomacros.h"
+#include "nl-cpt.h"
+#include "nl-hmm.h"
+
+#include "SyntacticLanguageModelFiles.h"
+#include "FFState.h"
+#include <string>
+
+namespace Moses
+{
+
+template <class MY, class MX, class YS=typename MY::RandVarType, class B=NullBackDat<typename MY::RandVarType> >
+  class SyntacticLanguageModelState : public FFState {
+ public:
+
+  // Initialize an empty LM state
+  SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize );
+
+  // Get the next LM state from an existing LM state and the next word
+  SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word );
+
+
+ ~SyntacticLanguageModelState() {
+   //cerr << "Deleting SyntacticLanguageModelState" << std::endl;
+   //delete randomVariableStore;
+ }
+
+ virtual int Compare(const FFState& other) const;
+
+  // Get the LM score from this LM state
+  double getScore() const;
+
+ double getProb() const;
+
+ private:
+
+ void setScore(double score);
+ void printRV();
+
+ SafeArray1D<Id<int>,pair<YS,LogProb> >* randomVariableStore;
+ double prob;
+ double score;
+ int beamSize;
+ SyntacticLanguageModelFiles<MY,MX>* modelData;
+ bool sentenceStart;
+};
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+ 
+ template <class MY, class MX, class YS, class B>
+   void SyntacticLanguageModelState<MY,MX,YS,B>::printRV() {
+
+   cerr << "*********** BEGIN printRV() ******************" << endl;
+   int size=randomVariableStore->getSize();
+   cerr << "randomVariableStore->getSize() == " << size << endl;
+
+   for (int depth=0; depth<size; depth+=1) {
+
+     
+     const pair<YS,LogProb> *data = &(randomVariableStore->get(depth));
+     std::cerr << "randomVariableStore[" << depth << "]\t" << data->first << "\tprob = " << data->second.toProb() << "\tlogProb = " << double(data->second.toInt())/100 << std::endl;
+
+   }
+   cerr << "*********** END printRV() ******************" << endl;
+
+ }
+
+// Initialize an empty LM state from grammar files
+//
+//    nArgs is the number of model files
+//    argv is the list of model file names
+//
+template <class MY, class MX, class YS, class B>
+  SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize ) {
+
+  this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
+  this->modelData = modelData;
+  this->beamSize = beamSize;
+
+  // Initialize an empty random variable value
+  YS xBEG;
+  StringInput(String(BEG_STATE).c_array())>>xBEG>>"\0";
+  cerr<<xBEG<<"\n";
+
+  //  cout << "Examining RV store just before RV init" << endl;   
+  //printRV();
+
+  // Initialize the random variable store
+  this->randomVariableStore->init(1,pair<YS,LogProb>(xBEG,0));
+
+  this->sentenceStart = true;
+
+  IFVERBOSE(3) {
+    VERBOSE(3,"Examining RV store just after RV init" << endl);
+    printRV();
+  }
+
+  // Get score of final frame in HHMM
+  LogProb l(1.0);
+  //score = l.toDouble();
+  setScore(l.toDouble());
+  //  MY::F_ROOT_OBS = true;
+ // this->modelData->getHiddenModel()->setRootObs(true);
+  
+  
+}
+
+
+template <class MY, class MX, class YS, class B>
+  int SyntacticLanguageModelState<MY,MX,YS,B>::Compare(const FFState& other) const {
+  /*
+  const SyntacticLanguageModelState<MY,MX,YS,B>& o = 
+    static_cast<const SyntacticLanguageModelState<MY,MX,YS,B>&>(other);
+
+  if (o.score > score) return 1;
+  else if (o.score < score) return -1;
+  else return 0;
+  */
+  return 0;
+ }
+
+
+template <class MY, class MX, class YS, class B>
+  SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word ) {
+
+  // Initialize member variables 
+  this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
+  this->modelData = prev->modelData;
+  this->beamSize = prev->beamSize;
+  this->randomVariableStore->init(this->beamSize);
+  this->sentenceStart=false;
+
+  YS ysEND;
+  StringInput(String(END_STATE).c_array())>>ysEND>>"\0";
+
+  // Get HHMM model files
+  MY& mH = *(modelData->getHiddenModel());
+  MX& mO = *(modelData->getObservedModel());
+  
+  // Initialize HHMM
+  HMM<MY,MX,YS,B> hmm(mH,mO);  
+  int MAX_WORDS  = 2;
+  hmm.init(MAX_WORDS,this->beamSize,prev->randomVariableStore);
+  typename MX::RandVarType x(word.c_str()); 
+  //  cout << "Examining HHMM just after hmm.init" << endl;   
+  //  hmm.debugPrint();
+
+
+  /*  cerr << "*********** BEGIN writeCurr() ******************" << endl;
+  hmm.writeCurr(cout,0);
+  hmm.writeCurr(cout,1);
+  cerr << "*********** END writeCurr() ******************" << endl;
+  */
+/*
+  {
+    
+  int wnum=1;
+  list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND);                                           // get mls list
+      for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) {   // for each frame
+        cout << "HYPOTH " << wnum
+             << " " << i->getBackData()
+             << " " << x
+             << " " << i->getId() 
+             << " (" << i->getLogProb() << ")"
+             << endl;                                                                             //   print RV val
+      }
+  }
+  */
+
+
+  /*
+  cerr << "Writing hmm.writeCurr" << endl;
+  hmm.writeCurr(cerr,0);
+  hmm.writeCurr(cerr,1);
+  cerr << "...done writing hmm.writeCurr" << endl;
+  */
+  hmm.getCurrSum();
+
+
+
+  // Initialize observed variable
+  //  typename MX::RandVarType ov;
+  //  ov.set(word.c_str(),mO);
+  //  MY::WORD = ov.getW();
+  //bool endOfSentence = prev->sentenceStart;//true; 
+
+  //  std::cerr << "About to give HHMM a word of input:\t" << word << std::endl;
+
+  hmm.updateRanked(x, prev->sentenceStart);
+
+  //  cout << "Examining HHMM just after hmm.updateRanked(" << x << "," << prev->sentenceStart << ")" << endl;
+  //  hmm.debugPrint();
+/*
+  cerr << "*********** BEGIN writeCurr() ******************" << endl;
+  hmm.writeCurr(cout,0);
+  hmm.writeCurr(cout,1);
+  cerr << "*********** END writeCurr() ******************" << endl;
+  */
+/*
+{
+
+  int wnum=1;
+  list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND);                                           // get mls list
+      for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) {   // for each frame
+        cout << "HYPOTH " << wnum
+             << " " << i->getBackData()
+             << " " << x
+             << " " << i->getId() 
+             << " (" << i->getLogProb() << ")"
+             << endl;                                                                             //   print RV val
+      }
+  }
+  */
+//  X ov(word.c_str());
+  //mH.setWord(ov);
+  // MY::WORD = ov;//ov.getW();
+
+  // Update HHMM based on observed variable
+  //hmm.updateRanked(ov);
+  //mH.setRootObs(true);
+  //MY::F_ROOT_OBS = false;
+  
+  // Get the current score
+   double currSum = hmm.getCurrSum();
+   //VERBOSE(3,"Setting score using currSum for " << scientific << x << " = " << currSum << endl);
+  setScore(currSum);
+  //  cout << "Examining RV store just before RV init via gatherElementsInBeam" << endl;   
+  //  printRV();
+
+  // Get new hidden random variable store from HHMM
+  hmm.gatherElementsInBeam(randomVariableStore);
+  //  cout << "Examining RV store just after RV init via gatherElementsInBeam" << endl;   
+  //  printRV();
+  /*
+  cerr << "Writing hmm.writeCurr..." << endl;
+  hmm.writeCurr(cerr,0);
+  hmm.writeCurr(cerr,1);
+  cerr << "...done writing hmm.writeCurr" << endl;
+  */
+}
+
+
+template <class MY, class MX, class YS, class B>
+double SyntacticLanguageModelState<MY,MX,YS,B>::getProb() const {
+  
+  return prob;
+}
+
+template <class MY, class MX, class YS, class B>
+double SyntacticLanguageModelState<MY,MX,YS,B>::getScore() const {
+  
+  return score;
+}
+
+
+template <class MY, class MX, class YS, class B>
+  void SyntacticLanguageModelState<MY,MX,YS,B>::setScore(double score) {
+
+  
+
+
+  this->prob = score;
+
+  // We want values to range from -100 to 0
+  //
+  // If the minimum positive value for a double is min=4.94065645841246544e-324
+  //    then to scale, we want a logarithmic base such that log_b(min)=-100
+  //
+  // -100 = log(min) / log(b)
+  //
+  // log(b) = log(min) / -100
+  //
+  // b = exp( log(min) / -100 )
+  //
+  // b = 7.44440071921381
+
+  // Check for score==0 to avoid causing -infinity with log(score)
+  if (score==0) {
+    this->score = -100;
+  } else {
+    double x = log(score) / 7.44440071921381;
+    if ( x >= -100) {
+      this->score = x;
+    } else {
+      this->score = -100;
+    }
+  }
+
+  VERBOSE(3,"\tSyntacticLanguageModelState has score=" << this->score << endl);
+
+}
+
+
+}
+
+#endif
diff --git a/regenerate-makefiles.sh b/regenerate-makefiles.sh
index 747dabf88..c4c2e8bee 100755
--- a/regenerate-makefiles.sh
+++ b/regenerate-makefiles.sh
@@ -54,7 +54,7 @@ $LIBTOOLIZE || die "libtoolize failed"
 
 echo
 echo "You should now be able to configure and build:"
-echo "   ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--without-kenlm] [--with-xmlrpc-c=/path/to/xmlrpc-c-config]"
+echo "   ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--without-kenlm] [--with-synlm=/path/to/modelblocks] [--with-xmlrpc-c=/path/to/xmlrpc-c-config]"
 echo "   make -j 4"
 echo
 
diff --git a/scripts/generic/balance-corpus b/scripts/generic/balance-corpus
new file mode 100644
index 000000000..647fa4502
--- /dev/null
+++ b/scripts/generic/balance-corpus
@@ -0,0 +1,392 @@
+#!/usr/bin/ruby -w
+
+require 'optparse'
+require 'ostruct'
+require 'pp'
+require 'set'
+
+options = OpenStruct.new
+OptionParser.new { |opts|
+
+	opts.banner = "Usage:	#{$0} [options]"
+
+	opts.on("-n N","--num-parts N", Integer, "Number of parts into which the corpus should be split") { |v|
+		options.parts = v
+		options.parts_digits = options.parts.to_s.length
+	}
+
+	opts.on("-i FILE", "--corpus", String, "Corpus to split") { |v|
+		options.corpus = v
+	}
+
+	options.reference = Array.new
+	opts.on("-r FILE", "--reference", String, "Reference file") { |v|
+		options.reference << v
+	}
+	
+	options.put_all = false
+	opts.on("-a","--all","Output all lines into a single file, in addition to split files") { |v|
+		options.put_all = v
+	}
+
+	options.max_words = 1.0/0.0
+	opts.on("-m N","--max-words", Integer, "Maximum number of words allowed in a line") { |v|
+		options.max_words = v
+	}
+
+	options.min_words = 1
+	opts.on("--min-words N", Integer, "Minimum number of words allowed in a line") { |v|
+		options.min_words = v
+	} 
+
+	options.index_prefix = false
+	opts.on("--index-prefix FILE_PREFIX", String, "Index file name prefixing the part number") { |v|
+		options.index_prefix = v
+	}
+
+	opts.on("-p FILE_PREFIX","--prefix FILE_PREFIX", String, "File name prefixing the part number") { |v|
+		options.output_prefix = v
+	}
+
+	opts.on("-s FILE_SUFFIX","--suffix FILE_SUFFIX", String, "File name suffixing the part number") { |v|
+		options.output_suffix = v
+	}
+
+	options.ref_prefix = Array.new
+	opts.on("--ref-prefix FILE_PREFIX", String, "File name prefixing the part number") { |v|
+		options.ref_prefix << v
+	}
+
+	options.ref_suffix = Array.new
+	opts.on("--ref-suffix FILE_SUFFIX", String, "File name suffixing the part number") { |v|
+		options.ref_suffix << v
+	}
+
+	options.balance_naive = false
+	opts.on("--balance-naive","Balance according to combined number of lines") { |v|
+		options.balance_naive = v
+	}
+
+	options.balance_histogram = false
+	opts.on("-h","--balance-histogram","Balance according to sentence length histogram") { |v|
+		options.balance_histogram = v
+	}
+
+	options.balance_word_count = true
+	opts.on("-w","--balance-words","Balance according to combined number of words") { |v|
+		options.balance_word_count = v
+	}
+
+	options.balance_time = false
+	opts.on("-t TIMES","--balance-time TIMES","Balance according to estimated per-sentence processing time") { |v|
+		options.balance_time = v
+	}
+
+	options.verbose = false
+	opts.on("-v","--[no-]verbose","Turn verbose on") { |v| 
+		options.verbose = v
+	}
+
+	options.zero_pad = true
+	opts.on("-z","--[no-]zeropad","Zero pad file names") { |v|
+		options.zero_pad = v
+	}
+
+        if ARGV.length==0
+	  puts opts
+          exit
+        end
+
+
+}.parse!
+
+
+
+
+class LineSize 
+	include Comparable
+
+	attr_reader :size, :index
+	attr_writer :size
+
+	@@max_index_digits = 0
+	@@max_size_digits = 0
+	
+	def initialize(line,index)
+		@index = index
+		@size = line.strip.split(/\s+/).length
+
+		index_digits = @index.to_s.length
+		@@max_index_digits = index_digits if (index_digits > @@max_index_digits)
+
+		size_digits = @size.to_s.length
+		@@max_size_digits = size_digits if (size_digits > @@max_size_digits)
+	end
+
+	def <=>(other)
+		if @size==other.size
+			@index <=> other.index
+		else
+			size <=> other.size
+		end
+	end
+
+	def to_s
+		sprintf("Line %#{@@max_index_digits}i:	%#{@@max_size_digits}i words",@index, @size)
+	end
+end
+
+
+
+def split_into_parts(file,part_for_line,parts,output_prefix,output_suffix,verbose,put_all,zeropad,index_prefix) 
+
+	if (zeropad)
+		parts_digits = parts.to_s.length
+	else
+		parts_digits = 0
+	end
+
+	out = Hash.new
+	all = File.new("#{output_prefix}_all#{output_suffix}","a") if put_all
+	index_out = Hash.new
+
+	1.upto(parts) {|v|
+
+		file_name = sprintf("%s%0#{parts_digits}i%s",output_prefix,v,output_suffix)
+		out[v] = File.new(file_name,"w")
+		
+		unless index_prefix==false
+			index_file_name = sprintf("%s%0#{parts_digits}i",index_prefix,v)
+			index_out[v] = File.new(index_file_name,"w")
+		end
+	}
+
+
+	File.open(file).each_with_index { |line,index|
+
+
+		if (part_for_line.has_key?(index))
+			puts "index==#{index}\tpart_for_line[#{index}]==#{part_for_line[index]}" if out[part_for_line[index]]==nil
+			if verbose
+				STDERR.puts "Line #{index} goes in #{out[part_for_line[index]].path}	#{line}"
+			end	
+
+			out[part_for_line[index]].puts(line)
+			index_out[part_for_line[index]].puts(index) unless index_prefix==false
+
+		elsif verbose
+			STDERR.puts "Line #{index} will be skipped	#{line}"
+		end
+	}
+
+	out.each_value { |file|
+		file.close
+	}
+
+
+	if (put_all)
+	    1.upto(parts) {|v|
+
+		    file_name = sprintf("%s%0#{parts_digits}i%s",output_prefix,v,output_suffix)
+		    File.open(file_name,"r").each { |line|
+			    all.puts(line)
+		    }
+
+	    }
+
+	    all.close
+	end
+
+end
+
+
+def index_of_least(array)
+	best=1.0/0 #Infinity
+	best_index=0
+	array.each_with_index {|v,i|
+		if (v<best)
+			best=v
+			best_index=i
+		end
+	}
+	return best_index
+end
+
+
+# Use to store which partition each line should be placed in
+# 
+# So, part_for_line[74] = 3 would mean that
+# line number 74 should go into partition 3
+#
+part_for_line = Hash.new
+
+# Use to store how many words are in each line
+#
+# So, words_per_line[74] = 15 would mean that
+# line number 74 contains 15 words
+#
+words_per_line=Array.new
+
+skipped_lines=Set.new
+
+File.open(options.corpus).each_with_index { |line,index|
+
+	line_size = LineSize.new(line,index)
+
+	if line_size.size > options.max_words
+
+		STDERR.puts "Line #{index} is too long: #{line_size.size} words. Max allowed is #{options.max_words}" if options.verbose
+		skipped_lines.add(index)
+
+	elsif line_size.size < options.min_words
+
+		STDERR.puts "Line #{index} is too short: #{line_size.size} words. Min allowed is #{options.min_words}" if options.verbose
+		skipped_lines.add(index)
+
+	else
+
+		words_per_line.push(line_size)
+
+	end
+}
+
+
+if (options.balance_naive)
+
+	total_lines=words_per_line.size
+
+	STDERR.puts "total_lines=#{total_lines}" if options.verbose
+
+	ceil=(total_lines/options.parts.to_f).ceil
+	floor=(total_lines/options.parts.to_f).floor
+
+	part_ceil = total_lines - floor*options.parts
+	part_floor = options.parts - part_ceil
+
+	STDERR.puts "#{ceil}*#{part_ceil} + #{floor}*#{part_floor}  =  #{ceil*part_ceil + floor*part_floor}" if options.verbose
+
+
+	partition = 1
+	lines_in_this_part = 0
+
+	0.upto(total_lines-1) { |index|
+
+		unless skipped_lines.include?(index)
+			if (partition <= part_ceil)
+				if (lines_in_this_part >= ceil)
+					STDERR.puts "Partition #{partition} has #{lines_in_this_part} lines" if options.verbose
+					lines_in_this_part=0 
+					partition += 1
+				end
+			else
+				if (lines_in_this_part >= floor)
+					STDERR.puts "Partition #{partition} has #{lines_in_this_part} lines" if options.verbose
+					lines_in_this_part=0
+					partition += 1
+				end
+			end
+
+			part_for_line[index] = partition
+			lines_in_this_part += 1
+			puts "part_for_line[#{index}] = #{partition}" if options.verbose
+		end
+		
+	}
+
+elsif (options.balance_histogram) 
+    
+    STDERR.puts "Balancing according to sentence length histogram"    
+    
+    words_per_line.sort!
+
+
+    index=0
+
+    words_per_line.each { |lineSize|
+	    if index<options.parts
+		    index+=1
+	    else
+		    index=1
+	    end
+
+	    part_for_line[lineSize.index] = index
+
+    }
+
+elsif (options.balance_word_count || options.balance_time)
+
+	measure_unit = ""
+
+	if (options.balance_time)
+		STDERR.puts "Balancing according to time estimates"
+		measure_unit = "seconds"
+
+		index = 0
+		File.open(options.balance_time).each_with_index { |time,line_index|
+			unless (skipped_lines.include?(line_index))
+				words_per_line[index].size = time.strip.to_f
+				index += 1
+			end
+		}
+				
+
+	elsif (options.balance_word_count)
+		STDERR.puts "Balancing according to word count"
+		measure_unit = "words"
+	end
+
+	# Sort in reverse order
+	words_per_line.sort! {|x,y| y <=> x }
+
+	# Store the number of words that have been placed in each partition
+	words_in_part = Array.new(options.parts,0)
+
+	# At this point, words_per_line should be sorted with the longest sentences first
+	words_per_line.each { |lineSize|
+		partition = index_of_least(words_in_part)
+		STDERR.puts "Line #{lineSize.index}\t#{lineSize.size} #{measure_unit}\tPartition #{partition}" if options.verbose
+		part_for_line[lineSize.index] = partition+1 # part_for_line needs a 1-based partition index, so add 1
+		words_in_part[partition] += lineSize.size
+	}
+
+	if (options.verbose)
+		words_in_part.each_with_index { |words,partition|
+			STDERR.puts "Partition #{partition}\t#{words} #{measure_unit}"
+		}
+	end
+
+else
+
+
+  exit;
+
+end
+
+
+split_into_parts(
+	options.corpus,
+	part_for_line,
+	options.parts,
+	options.output_prefix,
+	options.output_suffix,
+	options.verbose,
+	options.put_all,
+	options.zero_pad,
+	options.index_prefix)
+
+
+
+options.reference.each_with_index { |reference,index|
+
+	split_into_parts(
+		reference,
+		part_for_line,
+		options.parts,
+		options.ref_prefix[index],
+		options.ref_suffix[index],
+		options.verbose,
+		options.put_all,
+		options.zero_pad,
+		false)
+
+}
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index fca0c1d31..f4d0b4551 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -62,6 +62,7 @@ my $additional_triples = {
     # (due to additional tables) use the following values for them
     "d"  => [ [ 1.0, 0.0, 2.0 ] ],  # lexicalized reordering model
     "lm" => [ [ 1.0, 0.0, 2.0 ] ],  # language model
+    "slm"=> [ [ 1.0, 0.0, 2.0 ] ],  # language model
     "g"  => [ [ 1.0, 0.0, 2.0 ],    # generation model
 	      [ 1.0, 0.0, 2.0 ] ],
     "tm" => [ [ 0.3, 0.0, 0.5 ],    # translation model
@@ -79,14 +80,14 @@ my $additional_tripes_loop = { map { ($_, 1) } qw/ d I / };
 
 # moses.ini file uses FULL names for lambdas, while this training script internally (and on the command line)
 # uses ABBR names.
-my $ABBR_FULL_MAP = "d=weight-d lm=weight-l tm=weight-t w=weight-w g=weight-generation lex=weight-lex I=weight-i";
+my $ABBR_FULL_MAP = "d=weight-d lm=weight-l tm=weight-t w=weight-w g=weight-generation slm=weight-slm lex=weight-lex I=weight-i";
 my %ABBR2FULL = map {split/=/,$_,2} split /\s+/, $ABBR_FULL_MAP;
 my %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} split /\s+/, $ABBR_FULL_MAP;
 
 # We parse moses.ini to figure out how many weights do we need to optimize.
 # For this, we must know the correspondence between options defining files
 # for models and options assigning weights to these models.
-my $TABLECONFIG_ABBR_MAP = "ttable-file=tm lmodel-file=lm distortion-file=d generation-file=g global-lexical-file=lex link-param-count=I";
+my $TABLECONFIG_ABBR_MAP = "ttable-file=tm lmodel-file=lm distortion-file=d slmodel-file=slm generation-file=g global-lexical-file=lex link-param-count=I";
 my %TABLECONFIG2ABBR = map {split(/=/,$_,2)} split /\s+/, $TABLECONFIG_ABBR_MAP;
 
 # There are weights that do not correspond to any input file, they just increase the total number of lambdas we optimize
@@ -901,7 +902,12 @@ sub run_decoder {
     my $decoder_cmd;
 
     if (defined $___JOBS && $___JOBS > 0) {
-      $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$parameters $decoder_config\" -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
+	my $times_params="-timesfile run$run.times";
+	if ($run>1) {
+	    my $prevrun=$run-1;
+	    $times_params.=" -existingtimesfile run$prevrun.times";
+	} 
+      $decoder_cmd = "$moses_parallel_cmd $pass_old_sge $times_params -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$parameters $decoder_config\" -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
     } else {
       $decoder_cmd = "$___DECODER $parameters  -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config -n-best-list $filename $___N_BEST_LIST_SIZE -input-file $___DEV_F > run$run.out";
     }
@@ -1107,6 +1113,7 @@ sub scan_config {
     "lmodel-file" => 3,
     "distortion-file" => 3,
     "global-lexical-file" => 1,
+    "slmodel-file" => 0,
   );
   # by default, each line of each section means one lambda, but some sections
   # explicitly state a custom number of lambdas
-- 
cgit v1.2.3