diff options
-rw-r--r-- | config.h.in | 13 | ||||
-rw-r--r-- | configure.in | 42 | ||||
-rw-r--r-- | moses/src/Makefile.am | 8 | ||||
-rw-r--r-- | moses/src/Parameter.cpp | 6 | ||||
-rw-r--r-- | moses/src/StaticData.cpp | 71 | ||||
-rw-r--r-- | moses/src/StaticData.h | 6 | ||||
-rwxr-xr-x | moses/src/SyntacticLanguageModel.cpp | 123 | ||||
-rwxr-xr-x | moses/src/SyntacticLanguageModel.h | 52 | ||||
-rwxr-xr-x | moses/src/SyntacticLanguageModelFiles.h | 95 | ||||
-rwxr-xr-x | moses/src/SyntacticLanguageModelState.h | 303 | ||||
-rwxr-xr-x | regenerate-makefiles.sh | 2 |
11 files changed, 710 insertions, 11 deletions
diff --git a/config.h.in b/config.h.in index 264a888a5..f2f6ed881 100644 --- a/config.h.in +++ b/config.h.in @@ -30,6 +30,9 @@ /* Define to 1 if you have the <memory.h> header file. */ #undef HAVE_MEMORY_H +/* Define to 1 if you have the <nl-cpt.h> header file. */ +#undef HAVE_NL_CPT_H + /* flag for protobuf */ #undef HAVE_PROTOBUF @@ -51,6 +54,9 @@ /* Define to 1 if you have the <string.h> header file. */ #undef HAVE_STRING_H +/* flag for Syntactic Parser */ +#undef HAVE_SYNLM + /* Define to 1 if you have the <sys/stat.h> header file. */ #undef HAVE_SYS_STAT_H @@ -60,10 +66,6 @@ /* Define to 1 if you have the <unistd.h> header file. */ #undef HAVE_UNISTD_H -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#undef LT_OBJDIR - /* Name of package */ #undef PACKAGE @@ -79,9 +81,6 @@ /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME -/* Define to the home page for this package. */ -#undef PACKAGE_URL - /* Define to the version of this package. */ #undef PACKAGE_VERSION diff --git a/configure.in b/configure.in index 8778fa91a..255d8fadd 100644 --- a/configure.in +++ b/configure.in @@ -34,6 +34,13 @@ AC_ARG_WITH(srilm-dynamic, [with_srilm_dynamic=no] ) +AC_ARG_WITH(srilm-arch, + [AC_HELP_STRING([--with-srilm-arch=ARCH], [(optional) architecture for which SRILM was built])], + [with_srilm_arch=$withval], + [with_srilm_arch=no] + ) + + AC_ARG_WITH(irstlm, [AC_HELP_STRING([--with-irstlm=PATH], [(optional) path to IRST's LM toolkit])], [with_irstlm=$withval], @@ -52,6 +59,12 @@ AC_ARG_WITH(randlm, [with_randlm=no] ) +AC_ARG_WITH(synlm, + [AC_HELP_STRING([--with-synlm=PATH], [(optional) path to syntactic language model parser])], + [with_synlm=$withval], + [with_synlm=no] + ) + AC_ARG_WITH(notrace, [AC_HELP_STRING([--notrace], [disable trace])], [without_trace=yes], @@ -94,7 +107,7 @@ AC_ARG_WITH(tcmalloc, ) require_boost=no -if test "x$enable_threads" != 'xno' || test "x$enable_boost" != 'xno' +if test "x$enable_threads" != 'xno' || test "x$enable_boost" != 'xno' || test "x$with_synlm" != 'xno' then require_boost=yes fi @@ -111,6 +124,7 @@ AM_CONDITIONAL([SRI_LM], false) AM_CONDITIONAL([IRST_LM], false) AM_CONDITIONAL([KEN_LM], false) AM_CONDITIONAL([RAND_LM], false) +AM_CONDITIONAL([SYN_LM], false) AM_CONDITIONAL([PROTOBUF], false) AM_CONDITIONAL([am__fastdepCC], false) AM_CONDITIONAL([WITH_THREADS],false) @@ -130,7 +144,7 @@ then BOOST_REQUIRE([1.36.0]) fi -if test "x$enable_threads" = 'xyes' +if test "x$with_threads" = 'xyes' || test "x$with_synlm" then AC_MSG_NOTICE([Building threaded moses]) BOOST_THREADS @@ -172,7 +186,12 @@ then # ROOT/lib/i686-m64/liboolm.a # ROOT/lib/i686-m64/libdstruct.a # ROOT/lib/i686-m64/libmisc.a - MY_ARCH=`${with_srilm}/sbin/machine-type` + if test "x$with_srilm_arch" != 'xno' + then + MY_ARCH=${with_srilm_arch} + else + MY_ARCH=`${with_srilm}/sbin/machine-type` + fi LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH} -L${with_srilm}/flm/obj/${MY_ARCH}" LIBS="$LIBS $LIB_SRILM" FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a" @@ -252,6 +271,7 @@ then AC_CHECK_LIB([tcmalloc], [malloc], [], [AC_MSG_ERROR([Cannot find tcmalloc])]) fi + if test "x$enable_boost_pool" != 'xno' then AC_CHECK_HEADER(boost/pool/object_pool.hpp, @@ -260,6 +280,22 @@ then ) fi +if test "x$with_synlm" != 'xno' +then + SAVE_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -DWITH_THREADS -I${with_synlm}/rvtl/include -I${with_synlm}/wsjparse/include -lm" + + + + + AC_CHECK_HEADERS(nl-cpt.h, + [AC_DEFINE([HAVE_SYNLM], [], [flag for Syntactic Parser])]) + + AM_CONDITIONAL([SYN_LM], true) + +fi + + AM_CONDITIONAL([WITH_MERT],false) AC_CHECK_HEADERS([getopt.h], [AM_CONDITIONAL([WITH_MERT],true)], diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am index 3ca22d3ec..7cb610430 100644 --- a/moses/src/Makefile.am +++ b/moses/src/Makefile.am @@ -149,6 +149,10 @@ libmoses_la_HEADERS += LanguageModelInternal.h \ NGramNode.h endif +if SYN_LM +libmoses_la_HEADERS += SyntacticLanguageModel.h +endif + libmoses_la_SOURCES = \ AlignmentInfo.cpp \ BilingualDynSuffixArray.cpp \ @@ -306,6 +310,10 @@ libmoses_la_SOURCES += LanguageModelInternal.cpp \ NGramNode.cpp endif +if SYN_LM +libmoses_la_SOURCES += SyntacticLanguageModel.cpp +endif + if KEN_LM libmoses_la_SOURCES += LanguageModelKen.cpp endif diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp index e0173e49a..27170e0f8 100644 --- a/moses/src/Parameter.cpp +++ b/moses/src/Parameter.cpp @@ -68,6 +68,12 @@ Parameter::Parameter() AddParam("report-all-factors", "report all factors in output, not just first"); AddParam("report-all-factors-in-n-best", "Report all factors in n-best-lists. Default is false"); AddParam("report-segmentation", "t", "report phrase segmentation in the output"); +#ifdef HAVE_SYNLM + AddParam("slmodel-file", "location of the syntactic language model file(s)"); + AddParam("weight-slm", "slm", "weight(s) for syntactic language model"); + AddParam("slmodel-factor", "factor to use with syntactic language model"); + AddParam("slmodel-beam", "beam width to use with syntactic language model's parser"); +#endif AddParam("stack", "s", "maximum stack size for histogram pruning"); AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)"); AddParam("threads","th", "number of threads to use in decoding (defaults to single-threaded)"); diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp index cb2981444..55cb82134 100644 --- a/moses/src/StaticData.cpp +++ b/moses/src/StaticData.cpp @@ -41,6 +41,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "DecodeGraph.h" #include "InputFileStream.h" +#ifdef HAVE_SYNLM +#include "SyntacticLanguageModel.h" +#endif + using namespace std; namespace Moses @@ -420,6 +424,12 @@ bool StaticData::LoadData(Parameter *parameter) return false; } +#ifdef HAVE_SYNLM + if (m_parameter->GetParam("slmodel-file").size() > 0) { + if (!LoadSyntacticLanguageModel()) return false; + } +#endif + if (!LoadLexicalReorderingModel()) return false; if (!LoadLanguageModels()) return false; if (!LoadGenerationTables()) return false; @@ -542,6 +552,12 @@ StaticData::~StaticData() RemoveAllInColl(m_generationDictionary); RemoveAllInColl(m_reorderModels); RemoveAllInColl(m_globalLexicalModels); + +#ifdef HAVE_SYNLM + delete m_syntacticLanguageModel; +#endif + + RemoveAllInColl(m_decodeGraphs); RemoveAllInColl(m_wordPenaltyProducers); RemoveAllInColl(m_distortionScoreProducers); @@ -564,6 +580,61 @@ StaticData::~StaticData() } +#ifdef HAVE_SYNLM + bool StaticData::LoadSyntacticLanguageModel() { + cerr << "Loading syntactic language models..." << std::endl; + + const vector<float> weights = Scan<float>(m_parameter->GetParam("weight-slm")); + const vector<string> files = m_parameter->GetParam("slmodel-file"); + + const FactorType factorType = (m_parameter->GetParam("slmodel-factor").size() > 0) ? + TransformScore(Scan<int>(m_parameter->GetParam("slmodel-factor")[0])) + : 0; + + const size_t beamWidth = (m_parameter->GetParam("slmodel-beam").size() > 0) ? + TransformScore(Scan<int>(m_parameter->GetParam("slmodel-beam")[0])) + : 500; + + if (files.size() < 1) { + cerr << "No syntactic language model files specified!" << std::endl; + return false; + } + + // check if feature is used + if (weights.size() >= 1) { + + //cout.setf(ios::scientific,ios::floatfield); + //cerr.setf(ios::scientific,ios::floatfield); + + // create the feature + m_syntacticLanguageModel = new SyntacticLanguageModel(files,weights,factorType,beamWidth); + + /* + ///////////////////////////////////////// + // BEGIN LANE's UNSTABLE EXPERIMENT :) + // + + double ppl = m_syntacticLanguageModel->perplexity(); + cerr << "Probability is " << ppl << endl; + + + // + // END LANE's UNSTABLE EXPERIMENT + ///////////////////////////////////////// + */ + + + if (m_syntacticLanguageModel==NULL) { + return false; + } + + } + + return true; + + } +#endif + bool StaticData::LoadLexicalReorderingModel() { VERBOSE(1, "Loading lexical distortion models..."); diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h index b3ea80a60..115d6a1f9 100644 --- a/moses/src/StaticData.h +++ b/moses/src/StaticData.h @@ -63,6 +63,9 @@ class GenerationDictionary; class DistortionScoreProducer; class DecodeStep; class UnknownWordPenaltyProducer; +#ifdef HAVE_SYNLM +class SyntacticLanguageModel; +#endif class TranslationSystem; typedef std::pair<std::string, float> UnknownLHSEntry; @@ -81,6 +84,9 @@ protected: Parameter *m_parameter; std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder; LMList m_languageModel; +#ifdef HAVE_SYNLM + SyntacticLanguageModel* m_syntacticLanguageModel; +#endif ScoreIndexManager m_scoreIndexManager; std::vector<float> m_allWeights; std::vector<LexicalReordering*> m_reorderModels; diff --git a/moses/src/SyntacticLanguageModel.cpp b/moses/src/SyntacticLanguageModel.cpp new file mode 100755 index 000000000..85c19bdc0 --- /dev/null +++ b/moses/src/SyntacticLanguageModel.cpp @@ -0,0 +1,123 @@ +// + +#include "StaticData.h" +#include "SyntacticLanguageModel.h" +#include "HHMMLangModel-gf.h" +#include "TextObsModel.h" +#include "SyntacticLanguageModelFiles.h" +#include "SyntacticLanguageModelState.h" + + +namespace Moses +{ + // asnteousntaoheisnthaoesntih + SyntacticLanguageModel::SyntacticLanguageModel(const std::vector<std::string>& filePath, + const std::vector<float>& weights, + const FactorType factorType, + size_t beamWidth) + // Initialize member variables + : m_NumScoreComponents(weights.size()) + , m_beamWidth(beamWidth) + , m_factorType(factorType) + , m_files(new SyntacticLanguageModelFiles<YModel,XModel>(filePath)) { + + // Inform Moses score manager of this feature and its weight(s) + const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this); + const_cast<StaticData&>(StaticData::Instance()).SetWeightsForScoreProducer(this, weights); + VERBOSE(3,"Constructed SyntacticLanguageModel" << endl); + } + + SyntacticLanguageModel::~SyntacticLanguageModel() { + VERBOSE(3,"Destructing SyntacticLanguageModel" << std::endl); + // delete m_files; + } + + size_t SyntacticLanguageModel::GetNumScoreComponents() const { + return m_NumScoreComponents; + } + + std::string SyntacticLanguageModel::GetScoreProducerDescription() const { + return "Syntactic Language Model"; + } + + std::string SyntacticLanguageModel::GetScoreProducerWeightShortName() const { + return "slm"; + } + + const FFState* SyntacticLanguageModel::EmptyHypothesisState(const InputType &input) const { + + return new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth); + + } + + /* + double SyntacticLanguageModel::perplexity() { + + SyntacticLanguageModelState<YModel,XModel,S,R> *prev = + new SyntacticLanguageModelState<YModel,XModel,S,R>(m_files,m_beamWidth); + + std::cerr << "Initial prob:" << "\t" << prev->getProb() <<std::endl; + + + std::vector<std::string> words(3); + words[0] = "no"; + words[1] = ","; + words[2] = "zxvth"; + + + for (std::vector<std::string>::iterator i=words.begin(); + i != words.end(); + i++) { + + prev = new SyntacticLanguageModelState<YModel,XModel,S,R>(prev, *i); + std::cerr << *i << "\t" << prev->getProb() <<std::endl; + + } + + if (true) exit(-1); + + return prev->getProb(); + + } + */ + FFState* SyntacticLanguageModel::Evaluate(const Hypothesis& cur_hypo, + const FFState* prev_state, + ScoreComponentCollection* accumulator) const { + + VERBOSE(3,"Evaluating SyntacticLanguageModel for a hypothesis" << endl); + + const SyntacticLanguageModelState<YModel,XModel,S,R>& prev = + static_cast<const SyntacticLanguageModelState<YModel,XModel,S,R>&>(*prev_state); + + const SyntacticLanguageModelState<YModel,XModel,S,R>* currentState = &prev; + SyntacticLanguageModelState<YModel,XModel,S,R>* nextState = NULL; + + + const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase(); + + for (size_t i=0, n=targetPhrase.GetSize(); i<n; i++) { + + const Word& word = targetPhrase.GetWord(i); + const Factor* factor = word.GetFactor(m_factorType); + + const std::string& string = factor->GetString(); + + if (i==0) { + nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>(&prev, string); + } else { + currentState = nextState; + nextState = new SyntacticLanguageModelState<YModel,XModel,S,R>(currentState, string); + } + + double score = nextState->getScore(); + VERBOSE(3,"SynLM evaluated a score of " << score << endl); + accumulator->Assign( this, score ); + } + + + + return nextState; + + } + +} diff --git a/moses/src/SyntacticLanguageModel.h b/moses/src/SyntacticLanguageModel.h new file mode 100755 index 000000000..977a57680 --- /dev/null +++ b/moses/src/SyntacticLanguageModel.h @@ -0,0 +1,52 @@ +// + +#ifndef moses_SyntacticLanguageModel_h +#define moses_SyntacticLanguageModel_h + +#include "FeatureFunction.h" + + +class YModel; // hidden model +class XModel; // observed model + +namespace Moses +{ + + template <class MH, class MO> class SyntacticLanguageModelFiles; + + class SyntacticLanguageModel : public StatefulFeatureFunction { + + public: + + SyntacticLanguageModel(const std::vector<std::string>& filePaths, + const std::vector<float>& weights, + const FactorType factorType, + const size_t beamWidth); + + ~SyntacticLanguageModel(); + + size_t GetNumScoreComponents() const; + std::string GetScoreProducerDescription() const; + std::string GetScoreProducerWeightShortName() const; + + const FFState* EmptyHypothesisState(const InputType &input) const; + + FFState* Evaluate(const Hypothesis& cur_hypo, + const FFState* prev_state, + ScoreComponentCollection* accumulator) const; + + // double perplexity(); + + private: + + const size_t m_NumScoreComponents; + SyntacticLanguageModelFiles<YModel,XModel>* m_files; + const FactorType m_factorType; + const size_t m_beamWidth; + + }; + + +} + +#endif diff --git a/moses/src/SyntacticLanguageModelFiles.h b/moses/src/SyntacticLanguageModelFiles.h new file mode 100755 index 000000000..318e22636 --- /dev/null +++ b/moses/src/SyntacticLanguageModelFiles.h @@ -0,0 +1,95 @@ +// + +#ifndef moses_SyntacticLanguageModelFiles_h +#define moses_SyntacticLanguageModelFiles_h + +#include "nl-iomacros.h" +#include "nl-string.h" + +namespace Moses +{ + +template <class MH, class MO> +class SyntacticLanguageModelFiles { + + public: + + SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths); + ~SyntacticLanguageModelFiles(); + + MH* getHiddenModel(); + MO* getObservedModel(); + + private: + MH* hiddenModel; + MO* observedModel; + +}; + + +template <class MH, class MO> + SyntacticLanguageModelFiles<MH,MO>::SyntacticLanguageModelFiles(const std::vector<std::string>& filePaths) { + + this->hiddenModel = new MH(); + this->observedModel = new MO(); + + //// I. LOAD MODELS... + std::cerr << "Reading syntactic language model files...\n"; + // For each model file... + for ( int a=0, n=filePaths.size(); a<n; a++ ) { // read models + FILE* pf = fopen(filePaths[a].c_str(),"r"); //assert(pf); // Read model file + if(!pf){ + std::cerr << "Error loading model file " << filePaths[a] << std::endl; + return; + } + std::cerr << "Loading model \'" << filePaths[a] << "\'...\n"; + int c=' '; int i=0; int line=1; String sBuff(1000); // Lookahead/ctrs/buffers + CONSUME_ALL ( pf, c, WHITESPACE(c), line); // Get to first record + while ( c!=-1 && c!='\0' && c!='\5' ) { // For each record + CONSUME_STR ( pf, c, (c!='\n' && c!='\0' && c!='\5'), sBuff, i, line ); // Consume line + StringInput si(sBuff.c_array()); + if ( !( sBuff[0]=='#' // Accept comments/fields + || si>>*(this->hiddenModel)>>"\0"!=NULL + || si>>*(this->observedModel)>>"\0"!=NULL + )) + std::cerr<<"\nERROR: can't parse \'"<<sBuff<<"\' in line "<<line<<"\n\n"; + CONSUME_ALL ( pf, c, WHITESPACE(c), line); // Consume whitespace + if ( line%100000==0 ) std::cerr<<" "<<line<<" lines read...\n"; // Progress for big models + } + std::cerr << "Model \'" << filePaths[a] << "\' loaded.\n"; + } + + std::cerr << "...reading syntactic language model files completed\n"; + + +} + + +template <class MH, class MO> + SyntacticLanguageModelFiles<MH,MO>::~SyntacticLanguageModelFiles() { + + std::cerr<<"Destructing syntactic language model files" << std::endl; + //delete hiddenModel; + //delete observedModel; + +} + + +template <class MH, class MO> + MH* SyntacticLanguageModelFiles<MH,MO>::getHiddenModel() { + + return this->hiddenModel; + +} + +template <class MH, class MO> + MO* SyntacticLanguageModelFiles<MH,MO>::getObservedModel() { + + return this->observedModel; + +} + + +} + +#endif diff --git a/moses/src/SyntacticLanguageModelState.h b/moses/src/SyntacticLanguageModelState.h new file mode 100755 index 000000000..0877a59b3 --- /dev/null +++ b/moses/src/SyntacticLanguageModelState.h @@ -0,0 +1,303 @@ +// + +#ifndef moses_SyntacticLanguageModelState_h +#define moses_SyntacticLanguageModelState_h + +#include "nl-iomacros.h" +#include "nl-cpt.h" +#include "nl-hmm.h" + +#include "SyntacticLanguageModelFiles.h" +#include "FFState.h" +#include <string> + +namespace Moses +{ + +template <class MY, class MX, class YS=typename MY::RandVarType, class B=NullBackDat<typename MY::RandVarType> > + class SyntacticLanguageModelState : public FFState { + public: + + // Initialize an empty LM state + SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize ); + + // Get the next LM state from an existing LM state and the next word + SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word ); + + + ~SyntacticLanguageModelState() { + //cerr << "Deleting SyntacticLanguageModelState" << std::endl; + //delete randomVariableStore; + } + + virtual int Compare(const FFState& other) const; + + // Get the LM score from this LM state + double getScore() const; + + double getProb() const; + + private: + + void setScore(double score); + void printRV(); + + SafeArray1D<Id<int>,pair<YS,LogProb> >* randomVariableStore; + double prob; + double score; + int beamSize; + SyntacticLanguageModelFiles<MY,MX>* modelData; + bool sentenceStart; +}; + + +//////////////////////////////////////////////////////////////////////////////// + + + template <class MY, class MX, class YS, class B> + void SyntacticLanguageModelState<MY,MX,YS,B>::printRV() { + + cerr << "*********** BEGIN printRV() ******************" << endl; + int size=randomVariableStore->getSize(); + cerr << "randomVariableStore->getSize() == " << size << endl; + + for (int depth=0; depth<size; depth+=1) { + + + const pair<YS,LogProb> *data = &(randomVariableStore->get(depth)); + std::cerr << "randomVariableStore[" << depth << "]\t" << data->first << "\tprob = " << data->second.toProb() << "\tlogProb = " << double(data->second.toInt())/100 << std::endl; + + } + cerr << "*********** END printRV() ******************" << endl; + + } + +// Initialize an empty LM state from grammar files +// +// nArgs is the number of model files +// argv is the list of model file names +// +template <class MY, class MX, class YS, class B> + SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( SyntacticLanguageModelFiles<MY,MX>* modelData, int beamSize ) { + + this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >(); + this->modelData = modelData; + this->beamSize = beamSize; + + // Initialize an empty random variable value + YS xBEG; + StringInput(String(BEG_STATE).c_array())>>xBEG>>"\0"; + cerr<<xBEG<<"\n"; + + // cout << "Examining RV store just before RV init" << endl; + //printRV(); + + // Initialize the random variable store + this->randomVariableStore->init(1,pair<YS,LogProb>(xBEG,0)); + + this->sentenceStart = true; + + IFVERBOSE(3) { + VERBOSE(3,"Examining RV store just after RV init" << endl); + printRV(); + } + + // Get score of final frame in HHMM + LogProb l(1.0); + //score = l.toDouble(); + setScore(l.toDouble()); + // MY::F_ROOT_OBS = true; + // this->modelData->getHiddenModel()->setRootObs(true); + + +} + + +template <class MY, class MX, class YS, class B> + int SyntacticLanguageModelState<MY,MX,YS,B>::Compare(const FFState& other) const { + /* + const SyntacticLanguageModelState<MY,MX,YS,B>& o = + static_cast<const SyntacticLanguageModelState<MY,MX,YS,B>&>(other); + + if (o.score > score) return 1; + else if (o.score < score) return -1; + else return 0; + */ + return 0; + } + + +template <class MY, class MX, class YS, class B> + SyntacticLanguageModelState<MY,MX,YS,B>::SyntacticLanguageModelState( const SyntacticLanguageModelState* prev, std::string word ) { + + // Initialize member variables + this->randomVariableStore = new SafeArray1D<Id<int>,pair<YS,LogProb> >(); + this->modelData = prev->modelData; + this->beamSize = prev->beamSize; + this->randomVariableStore->init(this->beamSize); + this->sentenceStart=false; + + YS ysEND; + StringInput(String(END_STATE).c_array())>>ysEND>>"\0"; + + // Get HHMM model files + MY& mH = *(modelData->getHiddenModel()); + MX& mO = *(modelData->getObservedModel()); + + // Initialize HHMM + HMM<MY,MX,YS,B> hmm(mH,mO); + int MAX_WORDS = 2; + hmm.init(MAX_WORDS,this->beamSize,prev->randomVariableStore); + typename MX::RandVarType x(word.c_str()); + // cout << "Examining HHMM just after hmm.init" << endl; + // hmm.debugPrint(); + + + /* cerr << "*********** BEGIN writeCurr() ******************" << endl; + hmm.writeCurr(cout,0); + hmm.writeCurr(cout,1); + cerr << "*********** END writeCurr() ******************" << endl; + */ +/* + { + + int wnum=1; + list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND); // get mls list + for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) { // for each frame + cout << "HYPOTH " << wnum + << " " << i->getBackData() + << " " << x + << " " << i->getId() + << " (" << i->getLogProb() << ")" + << endl; // print RV val + } + } + */ + + + /* + cerr << "Writing hmm.writeCurr" << endl; + hmm.writeCurr(cerr,0); + hmm.writeCurr(cerr,1); + cerr << "...done writing hmm.writeCurr" << endl; + */ + hmm.getCurrSum(); + + + + // Initialize observed variable + // typename MX::RandVarType ov; + // ov.set(word.c_str(),mO); + // MY::WORD = ov.getW(); + //bool endOfSentence = prev->sentenceStart;//true; + + // std::cerr << "About to give HHMM a word of input:\t" << word << std::endl; + + hmm.updateRanked(x, prev->sentenceStart); + + // cout << "Examining HHMM just after hmm.updateRanked(" << x << "," << prev->sentenceStart << ")" << endl; + // hmm.debugPrint(); +/* + cerr << "*********** BEGIN writeCurr() ******************" << endl; + hmm.writeCurr(cout,0); + hmm.writeCurr(cout,1); + cerr << "*********** END writeCurr() ******************" << endl; + */ +/* +{ + + int wnum=1; + list<TrellNode<YS,B> > lys = hmm.getMLSnodes(ysEND); // get mls list + for ( typename list<TrellNode<YS,B> >::iterator i=lys.begin(); i!=lys.end(); i++, wnum++ ) { // for each frame + cout << "HYPOTH " << wnum + << " " << i->getBackData() + << " " << x + << " " << i->getId() + << " (" << i->getLogProb() << ")" + << endl; // print RV val + } + } + */ +// X ov(word.c_str()); + //mH.setWord(ov); + // MY::WORD = ov;//ov.getW(); + + // Update HHMM based on observed variable + //hmm.updateRanked(ov); + //mH.setRootObs(true); + //MY::F_ROOT_OBS = false; + + // Get the current score + double currSum = hmm.getCurrSum(); + //VERBOSE(3,"Setting score using currSum for " << scientific << x << " = " << currSum << endl); + setScore(currSum); + // cout << "Examining RV store just before RV init via gatherElementsInBeam" << endl; + // printRV(); + + // Get new hidden random variable store from HHMM + hmm.gatherElementsInBeam(randomVariableStore); + // cout << "Examining RV store just after RV init via gatherElementsInBeam" << endl; + // printRV(); + /* + cerr << "Writing hmm.writeCurr..." << endl; + hmm.writeCurr(cerr,0); + hmm.writeCurr(cerr,1); + cerr << "...done writing hmm.writeCurr" << endl; + */ +} + + +template <class MY, class MX, class YS, class B> +double SyntacticLanguageModelState<MY,MX,YS,B>::getProb() const { + + return prob; +} + +template <class MY, class MX, class YS, class B> +double SyntacticLanguageModelState<MY,MX,YS,B>::getScore() const { + + return score; +} + + +template <class MY, class MX, class YS, class B> + void SyntacticLanguageModelState<MY,MX,YS,B>::setScore(double score) { + + + + + this->prob = score; + + // We want values to range from -100 to 0 + // + // If the minimum positive value for a double is min=4.94065645841246544e-324 + // then to scale, we want a logarithmic base such that log_b(min)=-100 + // + // -100 = log(min) / log(b) + // + // log(b) = log(min) / -100 + // + // b = exp( log(min) / -100 ) + // + // b = 7.44440071921381 + + // Check for score==0 to avoid causing -infinity with log(score) + if (score==0) { + this->score = -100; + } else { + double x = log(score) / 7.44440071921381; + if ( x >= -100) { + this->score = x; + } else { + this->score = -100; + } + } + + VERBOSE(3,"\tSyntacticLanguageModelState has score=" << this->score << endl); + +} + + +} + +#endif diff --git a/regenerate-makefiles.sh b/regenerate-makefiles.sh index 747dabf88..c4c2e8bee 100755 --- a/regenerate-makefiles.sh +++ b/regenerate-makefiles.sh @@ -54,7 +54,7 @@ $LIBTOOLIZE || die "libtoolize failed" echo echo "You should now be able to configure and build:" -echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--without-kenlm] [--with-xmlrpc-c=/path/to/xmlrpc-c-config]" +echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--without-kenlm] [--with-synlm=/path/to/modelblocks] [--with-xmlrpc-c=/path/to/xmlrpc-c-config]" echo " make -j 4" echo |