Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config.h.in13
-rw-r--r--configure.in42
-rw-r--r--moses/src/Makefile.am8
-rw-r--r--moses/src/Parameter.cpp6
-rw-r--r--moses/src/StaticData.cpp71
-rw-r--r--moses/src/StaticData.h6
-rwxr-xr-xmoses/src/SyntacticLanguageModel.cpp123
-rwxr-xr-xmoses/src/SyntacticLanguageModel.h52
-rwxr-xr-xmoses/src/SyntacticLanguageModelFiles.h95
-rwxr-xr-xmoses/src/SyntacticLanguageModelState.h303
-rwxr-xr-xregenerate-makefiles.sh2
11 files changed, 710 insertions, 11 deletions
diff --git a/config.h.in b/config.h.in
index 264a888a5..f2f6ed881 100644
--- a/config.h.in
+++ b/config.h.in
@@ -30,6 +30,9 @@
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
+/* Define to 1 if you have the <nl-cpt.h> header file. */
+#undef HAVE_NL_CPT_H
+
/* flag for protobuf */
#undef HAVE_PROTOBUF
@@ -51,6 +54,9 @@
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
+/* flag for Syntactic Parser */
+#undef HAVE_SYNLM
+
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
@@ -60,10 +66,6 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
- */
-#undef LT_OBJDIR
-
/* Name of package */
#undef PACKAGE
@@ -79,9 +81,6 @@
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
-/* Define to the home page for this package. */
-#undef PACKAGE_URL
-
/* Define to the version of this package. */
#undef PACKAGE_VERSION
diff --git a/configure.in b/configure.in
index 8778fa91a..255d8fadd 100644
--- a/configure.in
+++ b/configure.in
@@ -34,6 +34,13 @@ AC_ARG_WITH(srilm-dynamic,
[with_srilm_dynamic=no]
)
+AC_ARG_WITH(srilm-arch,
+ [AC_HELP_STRING([--with-srilm-arch=ARCH], [(optional) architecture for which SRILM was built])],
+ [with_srilm_arch=$withval],
+ [with_srilm_arch=no]
+ )
+
+
AC_ARG_WITH(irstlm,
[AC_HELP_STRING([--with-irstlm=PATH], [(optional) path to IRST's LM toolkit])],
[with_irstlm=$withval],
@@ -52,6 +59,12 @@ AC_ARG_WITH(randlm,
[with_randlm=no]
)
+AC_ARG_WITH(synlm,
+ [AC_HELP_STRING([--with-synlm=PATH], [(optional) path to syntactic language model parser])],
+ [with_synlm=$withval],
+ [with_synlm=no]
+ )
+
AC_ARG_WITH(notrace,
[AC_HELP_STRING([--notrace], [disable trace])],
[without_trace=yes],
@@ -94,7 +107,7 @@ AC_ARG_WITH(tcmalloc,
)
require_boost=no
-if test "x$enable_threads" != 'xno' || test "x$enable_boost" != 'xno'
+if test "x$enable_threads" != 'xno' || test "x$enable_boost" != 'xno' || test "x$with_synlm" != 'xno'
then
require_boost=yes
fi
@@ -111,6 +124,7 @@ AM_CONDITIONAL([SRI_LM], false)
AM_CONDITIONAL([IRST_LM], false)
AM_CONDITIONAL([KEN_LM], false)
AM_CONDITIONAL([RAND_LM], false)
+AM_CONDITIONAL([SYN_LM], false)
AM_CONDITIONAL([PROTOBUF], false)
AM_CONDITIONAL([am__fastdepCC], false)
AM_CONDITIONAL([WITH_THREADS],false)
@@ -130,7 +144,7 @@ then
BOOST_REQUIRE([1.36.0])
fi
-if test "x$enable_threads" = 'xyes'
+if test "x$with_threads" = 'xyes' || test "x$with_synlm"
then
AC_MSG_NOTICE([Building threaded moses])
BOOST_THREADS
@@ -172,7 +186,12 @@ then
# ROOT/lib/i686-m64/liboolm.a
# ROOT/lib/i686-m64/libdstruct.a
# ROOT/lib/i686-m64/libmisc.a
- MY_ARCH=`${with_srilm}/sbin/machine-type`
+ if test "x$with_srilm_arch" != 'xno'
+ then
+ MY_ARCH=${with_srilm_arch}
+ else
+ MY_ARCH=`${with_srilm}/sbin/machine-type`
+ fi
LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH} -L${with_srilm}/flm/obj/${MY_ARCH}"
LIBS="$LIBS $LIB_SRILM"
FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
@@ -252,6 +271,7 @@ then
AC_CHECK_LIB([tcmalloc], [malloc], [], [AC_MSG_ERROR([Cannot find tcmalloc])])
fi
+
if test "x$enable_boost_pool" != 'xno'
then
AC_CHECK_HEADER(boost/pool/object_pool.hpp,
@@ -260,6 +280,22 @@ then
)
fi
+if test "x$with_synlm" != 'xno'
+then
+ SAVE_CPPFLAGS="$CPPFLAGS"
+ CPPFLAGS="$CPPFLAGS -DWITH_THREADS -I${with_synlm}/rvtl/include -I${with_synlm}/wsjparse/include -lm"
+
+
+
+
+ AC_CHECK_HEADERS(nl-cpt.h,
+ [AC_DEFINE([HAVE_SYNLM], [], [flag for Syntactic Parser])])
+
+ AM_CONDITIONAL([SYN_LM], true)
+
+fi
+
+
AM_CONDITIONAL([WITH_MERT],false)
AC_CHECK_HEADERS([getopt.h],
[AM_CONDITIONAL([WITH_MERT],true)],
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index 3ca22d3ec..7cb610430 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -149,6 +149,10 @@ libmoses_la_HEADERS += LanguageModelInternal.h \
NGramNode.h
endif
+if SYN_LM
+libmoses_la_HEADERS += SyntacticLanguageModel.h
+endif
+
libmoses_la_SOURCES = \
AlignmentInfo.cpp \
BilingualDynSuffixArray.cpp \
@@ -306,6 +310,10 @@ libmoses_la_SOURCES += LanguageModelInternal.cpp \
NGramNode.cpp
endif
+if SYN_LM
+libmoses_la_SOURCES += SyntacticLanguageModel.cpp
+endif
+
if KEN_LM
libmoses_la_SOURCES += LanguageModelKen.cpp
endif
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
index e0173e49a..27170e0f8 100644
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@@ -68,6 +68,12 @@ Parameter::Parameter()
AddParam("report-all-factors", "report all factors in output, not just first");
AddParam("report-all-factors-in-n-best", "Report all factors in n-best-lists. Default is false");
AddParam("report-segmentation", "t", "report phrase segmentation in the output");
+#ifdef HAVE_SYNLM
+ AddParam("slmodel-file", "location of the syntactic language model file(s)");
+ AddParam("weight-slm", "slm", "weight(s) for syntactic language model");
+ AddParam("slmodel-factor", "factor to use with syntactic language model");
+ AddParam("slmodel-beam", "beam width to use with syntactic language model's parser");
+#endif
AddParam("stack", "s", "maximum stack size for histogram pruning");
AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
AddParam("threads","th", "number of threads to use in decoding (defaults to single-threaded)");
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index cb2981444..55cb82134 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -41,6 +41,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "DecodeGraph.h"
#include "InputFileStream.h"
+#ifdef HAVE_SYNLM
+#include "SyntacticLanguageModel.h"
+#endif
+
using namespace std;
namespace Moses
@@ -420,6 +424,12 @@ bool StaticData::LoadData(Parameter *parameter)
return false;
}
+#ifdef HAVE_SYNLM
+ if (m_parameter->GetParam("slmodel-file").size() > 0) {
+ if (!LoadSyntacticLanguageModel()) return false;
+ }
+#endif
+
if (!LoadLexicalReorderingModel()) return false;
if (!LoadLanguageModels()) return false;
if (!LoadGenerationTables()) return false;
@@ -542,6 +552,12 @@ StaticData::~StaticData()
RemoveAllInColl(m_generationDictionary);
RemoveAllInColl(m_reorderModels);
RemoveAllInColl(m_globalLexicalModels);
+
+#ifdef HAVE_SYNLM
+ delete m_syntacticLanguageModel;
+#endif
+
+
RemoveAllInColl(m_decodeGraphs);
RemoveAllInColl(m_wordPenaltyProducers);
RemoveAllInColl(m_distortionScoreProducers);
@@ -564,6 +580,61 @@ StaticData::~StaticData()
}
+#ifdef HAVE_SYNLM
+ bool StaticData::LoadSyntacticLanguageModel() {
+ cerr << "Loading syntactic language models..." << std::endl;
+
+ const vector<float> weights = Scan<float>(m_parameter->GetParam("weight-slm"));
+ const vector<string> files = m_parameter->GetParam("slmodel-file");
+
+ const FactorType factorType = (m_parameter->GetParam("slmodel-factor").size() > 0) ?
+ TransformScore(Scan<int>(m_parameter->GetParam("slmodel-factor")[0]))
+ : 0;
+
+ const size_t beamWidth = (m_parameter->GetParam("slmodel-beam").size() > 0) ?
+ TransformScore(Scan<int>(m_parameter->GetParam("slmodel-beam")[0]))
+ : 500;
+
+ if (files.size() < 1) {
+ cerr << "No syntactic language model files specified!" << std::endl;
+ return false;
+ }
+
+ // check if feature is used
+ if (weights.size() >= 1) {
+
+ //cout.setf(ios::scientific,ios::floatfield);
+ //cerr.setf(ios::scientific,ios::floatfield);
+
+ // create the feature
+ m_syntacticLanguageModel = new SyntacticLanguageModel(files,weights,factorType,beamWidth);
+
+ /*
+ /////////////////////////////////////////
+ // BEGIN LANE's UNSTABLE EXPERIMENT :)
+ //
+
+ double ppl = m_syntacticLanguageModel->perplexity();
+ cerr << "Probability is " << ppl << endl;
+
+
+ //
+ // END LANE's UNSTABLE EXPERIMENT
+ /////////////////////////////////////////
+ */
+
+
+ if (m_syntacticLanguageModel==NULL) {
+ return false;
+ }
+
+ }
+
+ return true;
+
+ }
+#endif
+
bool StaticData::LoadLexicalReorderingModel()
{
VERBOSE(1, "Loading lexical distortion models...");
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index b3ea80a60..115d6a1f9 100644
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -63,6 +63,9 @@ class GenerationDictionary;
class DistortionScoreProducer;
class DecodeStep;
class UnknownWordPenaltyProducer;
+#ifdef HAVE_SYNLM
+class SyntacticLanguageModel;
+#endif
class TranslationSystem;
typedef std::pair<std::string, float> UnknownLHSEntry;
@@ -81,6 +84,9 @@ protected:
Parameter *m_parameter;
std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
LMList m_languageModel;
+#ifdef HAVE_SYNLM
+ SyntacticLanguageModel* m_syntacticLanguageModel;
+#endif
ScoreIndexManager m_scoreIndexManager;
std::vector<float> m_allWeights;
std::vector<LexicalReordering*> m_reorderModels;
diff --git a/moses/src/SyntacticLanguageModel.cpp b/moses/src/SyntacticLanguageModel.cpp
new file mode 100755
index 000000000..85c19bdc0
--- /dev/null
+++ b/moses/src/SyntacticLanguageModel.cpp
@@ -0,0 +1,123 @@
+//
+
+#include "StaticData.h"
+#include "SyntacticLanguageModel.h"
+#include "HHMMLangModel-gf.h"
+#include "TextObsModel.h"
+#include "SyntacticLanguageModelFiles.h"
+#include "SyntacticLanguageModelState.h"
+
+
+namespace Moses
+{
+ // asnteousntaoheisnthaoesntih
+ SyntacticLanguageModel::SyntacticLanguageModel(const std::vector<std::string>& filePath,
+ const std::vector<float>& weights,
+ const FactorType factorType,
+ size_t beamWidth)
+ // Initialize member variables
+ : m_NumScoreComponents(weights.size())
+ , m_beamWidth(beamWidth)
+ , m_factorType(factorType)
+ , m_files(new SyntacticLanguageModelFiles<YModel,XModel>(filePath)) {
+
+ // Inform Moses score manager of this feature and its weight(s)
+ const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
+ const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights);
+ VERBOSE(3,"Constructed SyntacticLanguageModel" << endl);
+ }
+
+ SyntacticLanguageModel::~SyntacticLanguageModel() {
+ VERBOSE(3,"Destructing SyntacticLanguageModel" << std::endl);
+ // delete m_files;
+ }
+
+ size_t SyntacticLanguageModel::GetNumScoreComponents() const {
+ return m_NumScoreComponents;
+ }
+
+ std::string SyntacticLanguageModel::GetScoreProducerDescription() const {
+ return "Syntactic Language Model";
+ }
+
+ std::string SyntacticLanguageModel::GetScoreProducerWeightShortName() const {
+ return "slm";
+ }
+
+ const FFState* SyntacticLanguageModel::EmptyHypothesisState(const InputType &input) const {
+
+ return new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth);
+
+ }
+
+ /*
+ double SyntacticLanguageModel::perplexity() {
+
+ SyntacticLanguageModelState<YModel,XModel,S,R> *prev =
+ new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth);
+
+ std::cerr << "Initial prob:" << "\t" << prev->getProb() <<std::endl;
+
+
+ std::vector<std::string> words(3);
+ words[0] = "no";
+ words[1] = ",";
+ words[2] = "zxvth";
+
+
+ for (std::vector<std::string>::iterator i=words.begin();
+ i != words.end();
+ i++) {
+
+ prev = new SyntacticLanguageModelState<YModel,XModel,S,R>(prev, *i);
+ std::cerr << *i << "\t" << prev->getProb() <<std::endl;
+
+ }
+
+ if (true) exit(-1);
+
+ return prev->getProb();
+
+ }
+ */
+ FFState* SyntacticLanguageModel::Evaluate(const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const {
+
+ VERBOSE(3,"Evaluating SyntacticLanguageModel for a hypothesis" << endl);
+
+ const SyntacticLanguageModelState<YModel,XModel,S,R>& prev =
+ static_cast<const SyntacticLanguageModelState<YModel,XModel,S,R>&>(*prev_state);
+
+ const SyntacticLanguageModelState<YModel,XModel,S,R>* currentState = &prev;
+ SyntacticLanguageModelState<YModel,XModel,S,R>* nextState = NULL;
+
+
+ const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
+
+ for (size_t i=0, n=targetPhrase.GetSize(); i<n; i++) {
+
+ const Word& word = targetPhrase.GetWord(i);
+ const Factor* factor = word.GetFactor(m_factorType);
+
+ const std::string& string = factor->GetString();
+
+ if (i==0) {
+ nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>(&prev, string);
+ } else {
+ currentState = nextState;
+ nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>(currentState, string);
+ }
+
+ double score = nextState->getScore();
+ VERBOSE(3,"SynLM evaluated a score of " << score << endl);
+ accumulator->Assign( this, score );
+ }
+
+
+
+ return nextState;
+
+ }
+
+}
diff --git a/moses/src/SyntacticLanguageModel.h b/moses/src/SyntacticLanguageModel.h
new file mode 100755
index 000000000..977a57680
--- /dev/null
+++ b/moses/src/SyntacticLanguageModel.h
@@ -0,0 +1,52 @@
+//
+
+#ifndef moses_SyntacticLanguageModel_h
+#define moses_SyntacticLanguageModel_h
+
+#include "FeatureFunction.h"
+
+
+class YModel; // hidden model
+class XModel; // observed model
+
+namespace Moses
+{
+
+ template <class MH, class MO> class SyntacticLanguageModelFiles;
+
+ class SyntacticLanguageModel : public StatefulFeatureFunction {
+
+ public:
+
+ SyntacticLanguageModel(const std::vector<std::string>& filePaths,
+ const std::vector<float>& weights,
+ const FactorType factorType,
+ const size_t beamWidth);
+
+ ~SyntacticLanguageModel();
+
+ size_t GetNumScoreComponents() const;
+ std::string GetScoreProducerDescription() const;
+ std::string GetScoreProducerWeightShortName() const;
+
+ const FFState* EmptyHypothesisState(const InputType &input) const;
+
+ FFState* Evaluate(const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
+
+ // double perplexity();
+
+ private:
+
+ const size_t m_NumScoreComponents;
+ SyntacticLanguageModelFiles<YModel,XModel>* m_files;
+ const FactorType m_factorType;
+ const size_t m_beamWidth;
+
+ };
+
+
+}
+
+#endif
diff --git a/moses/src/SyntacticLanguageModelFiles.h b/moses/src/SyntacticLanguageModelFiles.h
new file mode 100755
index 000000000..318e22636
--- /dev/null
+++ b/moses/src/SyntacticLanguageModelFiles.h
@@ -0,0 +1,95 @@
+//
+
+#ifndef moses_SyntacticLanguageModelFiles_h
+#define moses_SyntacticLanguageModelFiles_h
+
+#include "nl-iomacros.h"
+#include "nl-string.h"
+
+namespace Moses
+{
+
+template <class MH, class MO>
+class SyntacticLanguageModelFiles {
+
+ public:
+
+ SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths);
+ ~SyntacticLanguageModelFiles();
+
+ MH* getHiddenModel();
+ MO* getObservedModel();
+
+ private:
+ MH* hiddenModel;
+ MO* observedModel;
+
+};
+
+
+template <class MH, class MO>
+ SyntacticLanguageModelFiles<MH,MO>::SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths) {
+
+ this->hiddenModel = new MH();
+ this->observedModel = new MO();
+
+ //// I. LOAD MODELS...
+ std::cerr << "Reading syntactic language model files...\n";
+ // For each model file...
+ for ( int a=0, n=filePaths.size(); a<n; a++ ) { // read models
+ FILE* pf = fopen(filePaths[a].c_str(),"r"); //assert(pf); // Read model file
+ if(!pf){
+ std::cerr << "Error loading model file " << filePaths[a] << std::endl;
+ return;
+ }
+ std::cerr << "Loading model \'" << filePaths[a] << "\'...\n";
+ int c=' '; int i=0; int line=1; String sBuff(1000); // Lookahead/ctrs/buffers
+ CONSUME_ALL ( pf, c, WHITESPACE(c), line); // Get to first record
+ while ( c!=-1 && c!='\0' && c!='\5' ) { // For each record
+ CONSUME_STR ( pf, c, (c!='\n' && c!='\0' && c!='\5'), sBuff, i, line ); // Consume line
+ StringInput si(sBuff.c_array());
+ if ( !( sBuff[0]=='#' // Accept comments/fields
+ || si>>*(this->hiddenModel)>>"\0"!=NULL
+ || si>>*(this->observedModel)>>"\0"!=NULL
+ ))
+ std::cerr<<"\nERROR: can't parse \'"<<sBuff<<"\' in line "<<line<<"\n\n";
+ CONSUME_ALL ( pf, c, WHITESPACE(c), line); // Consume whitespace
+ if ( line%100000==0 ) std::cerr<<" "<<line<<" lines read...\n"; // Progress for big models
+ }
+ std::cerr << "Model \'" << filePaths[a] << "\' loaded.\n";
+ }
+
+ std::cerr << "...reading syntactic language model files completed\n";
+
+
+}
+
+
+template <class MH, class MO>
+ SyntacticLanguageModelFiles<MH,MO>::~SyntacticLanguageModelFiles() {
+
+ std::cerr<<"Destructing syntactic language model files" << std::endl;
+ //delete hiddenModel;
+ //delete observedModel;
+
+}
+
+
+template <class MH, class MO>
+ MH* SyntacticLanguageModelFiles<MH,MO>::getHiddenModel() {
+
+ return this->hiddenModel;
+
+}
+
+template <class MH, class MO>
+ MO* SyntacticLanguageModelFiles<MH,MO>::getObservedModel() {
+
+ return this->observedModel;
+
+}
+
+
+}
+
+#endif
diff --git a/moses/src/SyntacticLanguageModelState.h b/moses/src/SyntacticLanguageModelState.h
new file mode 100755
index 000000000..0877a59b3
--- /dev/null
+++ b/moses/src/SyntacticLanguageModelState.h
@@ -0,0 +1,303 @@
+//
+
+#ifndef moses_SyntacticLanguageModelState_h
+#define moses_SyntacticLanguageModelState_h
+
+#include "nl-iomacros.h"
+#include "nl-cpt.h"
+#include "nl-hmm.h"
+
+#include "SyntacticLanguageModelFiles.h"
+#include "FFState.h"
+#include <string>
+
+namespace Moses
+{
+
+template <class MY, class MX, class YS=typename MY::RandVarType, class B=NullBackDat<typename MY::RandVarType> >
+ class SyntacticLanguageModelState : public FFState {
+ public:
+
+ // Initialize an empty LM state
+ SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize );
+
+ // Get the next LM state from an existing LM state and the next word
+ SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word );
+
+
+ ~SyntacticLanguageModelState() {
+ //cerr << "Deleting SyntacticLanguageModelState" << std::endl;
+ //delete randomVariableStore;
+ }
+
+ virtual int Compare(const FFState& other) const;
+
+ // Get the LM score from this LM state
+ double getScore() const;
+
+ double getProb() const;
+
+ private:
+
+ void setScore(double score);
+ void printRV();
+
+ SafeArray1D<Id<int>,pair<YS,LogProb> >* randomVariableStore;
+ double prob;
+ double score;
+ int beamSize;
+ SyntacticLanguageModelFiles<MY,MX>* modelData;
+ bool sentenceStart;
+};
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+ template <class MY, class MX, class YS, class B>
+ void SyntacticLanguageModelState<MY,MX,YS,B>::printRV() {
+
+ cerr << "*********** BEGIN printRV() ******************" << endl;
+ int size=randomVariableStore->getSize();
+ cerr << "randomVariableStore->getSize() == " << size << endl;
+
+ for (int depth=0; depth<size; depth+=1) {
+
+
+ const pair<YS,LogProb> *data = &(randomVariableStore->get(depth));
+ std::cerr << "randomVariableStore[" << depth << "]\t" << data->first << "\tprob = " << data->second.toProb() << "\tlogProb = " << double(data->second.toInt())/100 << std::endl;
+
+ }
+ cerr << "*********** END printRV() ******************" << endl;
+
+ }
+
+// Initialize an empty LM state from grammar files
+//
+// nArgs is the number of model files
+// argv is the list of model file names
+//
+template <class MY, class MX, class YS, class B>
+ SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize ) {
+
+ this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
+ this->modelData = modelData;
+ this->beamSize = beamSize;
+
+ // Initialize an empty random variable value
+ YS xBEG;
+ StringInput(String(BEG_STATE).c_array())>>xBEG>>"\0";
+ cerr<<xBEG<<"\n";
+
+ // cout << "Examining RV store just before RV init" << endl;
+ //printRV();
+
+ // Initialize the random variable store
+ this->randomVariableStore->init(1,pair<YS,LogProb>(xBEG,0));
+
+ this->sentenceStart = true;
+
+ IFVERBOSE(3) {
+ VERBOSE(3,"Examining RV store just after RV init" << endl);
+ printRV();
+ }
+
+ // Get score of final frame in HHMM
+ LogProb l(1.0);
+ //score = l.toDouble();
+ setScore(l.toDouble());
+ // MY::F_ROOT_OBS = true;
+ // this->modelData->getHiddenModel()->setRootObs(true);
+
+
+}
+
+
+template <class MY, class MX, class YS, class B>
+ int SyntacticLanguageModelState<MY,MX,YS,B>::Compare(const FFState& other) const {
+ /*
+ const SyntacticLanguageModelState<MY,MX,YS,B>& o =
+ static_cast<const SyntacticLanguageModelState<MY,MX,YS,B>&>(other);
+
+ if (o.score > score) return 1;
+ else if (o.score < score) return -1;
+ else return 0;
+ */
+ return 0;
+ }
+
+
+template <class MY, class MX, class YS, class B>
+ SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word ) {
+
+ // Initialize member variables
+ this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >();
+ this->modelData = prev->modelData;
+ this->beamSize = prev->beamSize;
+ this->randomVariableStore->init(this->beamSize);
+ this->sentenceStart=false;
+
+ YS ysEND;
+ StringInput(String(END_STATE).c_array())>>ysEND>>"\0";
+
+ // Get HHMM model files
+ MY& mH = *(modelData->getHiddenModel());
+ MX& mO = *(modelData->getObservedModel());
+
+ // Initialize HHMM
+ HMM<MY,MX,YS,B> hmm(mH,mO);
+ int MAX_WORDS = 2;
+ hmm.init(MAX_WORDS,this->beamSize,prev->randomVariableStore);
+ typename MX::RandVarType x(word.c_str());
+ // cout << "Examining HHMM just after hmm.init" << endl;
+ // hmm.debugPrint();
+
+
+ /* cerr << "*********** BEGIN writeCurr() ******************" << endl;
+ hmm.writeCurr(cout,0);
+ hmm.writeCurr(cout,1);
+ cerr << "*********** END writeCurr() ******************" << endl;
+ */
+/*
+ {
+
+ int wnum=1;
+ list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND); // get mls list
+ for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) { // for each frame
+ cout << "HYPOTH " << wnum
+ << " " << i->getBackData()
+ << " " << x
+ << " " << i->getId()
+ << " (" << i->getLogProb() << ")"
+ << endl; // print RV val
+ }
+ }
+ */
+
+
+ /*
+ cerr << "Writing hmm.writeCurr" << endl;
+ hmm.writeCurr(cerr,0);
+ hmm.writeCurr(cerr,1);
+ cerr << "...done writing hmm.writeCurr" << endl;
+ */
+ hmm.getCurrSum();
+
+
+
+ // Initialize observed variable
+ // typename MX::RandVarType ov;
+ // ov.set(word.c_str(),mO);
+ // MY::WORD = ov.getW();
+ //bool endOfSentence = prev->sentenceStart;//true;
+
+ // std::cerr << "About to give HHMM a word of input:\t" << word << std::endl;
+
+ hmm.updateRanked(x, prev->sentenceStart);
+
+ // cout << "Examining HHMM just after hmm.updateRanked(" << x << "," << prev->sentenceStart << ")" << endl;
+ // hmm.debugPrint();
+/*
+ cerr << "*********** BEGIN writeCurr() ******************" << endl;
+ hmm.writeCurr(cout,0);
+ hmm.writeCurr(cout,1);
+ cerr << "*********** END writeCurr() ******************" << endl;
+ */
+/*
+{
+
+ int wnum=1;
+ list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND); // get mls list
+ for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) { // for each frame
+ cout << "HYPOTH " << wnum
+ << " " << i->getBackData()
+ << " " << x
+ << " " << i->getId()
+ << " (" << i->getLogProb() << ")"
+ << endl; // print RV val
+ }
+ }
+ */
+// X ov(word.c_str());
+ //mH.setWord(ov);
+ // MY::WORD = ov;//ov.getW();
+
+ // Update HHMM based on observed variable
+ //hmm.updateRanked(ov);
+ //mH.setRootObs(true);
+ //MY::F_ROOT_OBS = false;
+
+ // Get the current score
+ double currSum = hmm.getCurrSum();
+ //VERBOSE(3,"Setting score using currSum for " << scientific << x << " = " << currSum << endl);
+ setScore(currSum);
+ // cout << "Examining RV store just before RV init via gatherElementsInBeam" << endl;
+ // printRV();
+
+ // Get new hidden random variable store from HHMM
+ hmm.gatherElementsInBeam(randomVariableStore);
+ // cout << "Examining RV store just after RV init via gatherElementsInBeam" << endl;
+ // printRV();
+ /*
+ cerr << "Writing hmm.writeCurr..." << endl;
+ hmm.writeCurr(cerr,0);
+ hmm.writeCurr(cerr,1);
+ cerr << "...done writing hmm.writeCurr" << endl;
+ */
+}
+
+
+template <class MY, class MX, class YS, class B>
+double SyntacticLanguageModelState<MY,MX,YS,B>::getProb() const {
+
+ return prob;
+}
+
+template <class MY, class MX, class YS, class B>
+double SyntacticLanguageModelState<MY,MX,YS,B>::getScore() const {
+
+ return score;
+}
+
+
+template <class MY, class MX, class YS, class B>
+ void SyntacticLanguageModelState<MY,MX,YS,B>::setScore(double score) {
+
+
+
+
+ this->prob = score;
+
+ // We want values to range from -100 to 0
+ //
+ // If the minimum positive value for a double is min=4.94065645841246544e-324
+ // then to scale, we want a logarithmic base such that log_b(min)=-100
+ //
+ // -100 = log(min) / log(b)
+ //
+ // log(b) = log(min) / -100
+ //
+ // b = exp( log(min) / -100 )
+ //
+ // b = 7.44440071921381
+
+ // Check for score==0 to avoid causing -infinity with log(score)
+ if (score==0) {
+ this->score = -100;
+ } else {
+ double x = log(score) / 7.44440071921381;
+ if ( x >= -100) {
+ this->score = x;
+ } else {
+ this->score = -100;
+ }
+ }
+
+ VERBOSE(3,"\tSyntacticLanguageModelState has score=" << this->score << endl);
+
+}
+
+
+}
+
+#endif
diff --git a/regenerate-makefiles.sh b/regenerate-makefiles.sh
index 747dabf88..c4c2e8bee 100755
--- a/regenerate-makefiles.sh
+++ b/regenerate-makefiles.sh
@@ -54,7 +54,7 @@ $LIBTOOLIZE || die "libtoolize failed"
echo
echo "You should now be able to configure and build:"
-echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--without-kenlm] [--with-xmlrpc-c=/path/to/xmlrpc-c-config]"
+echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--without-kenlm] [--with-synlm=/path/to/modelblocks] [--with-xmlrpc-c=/path/to/xmlrpc-c-config]"
echo " make -j 4"
echo