From 7a5a95009b5417b72d863980929e8061f614d8a8 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 18 Nov 2011 10:40:56 -0500 Subject: Rename RandLM to Rand, avoiding a self-conflict for header names. Reduce header pollution. Add Jam support for RandLM. --- moses/src/Jamfile | 8 ++- moses/src/LM/Factory.cpp | 4 +- moses/src/LM/Jamfile | 22 +++++-- moses/src/LM/Rand.cpp | 164 +++++++++++++++++++++++++++++++++++++++++++++++ moses/src/LM/Rand.h | 30 +++++++++ moses/src/LM/RandLM.cpp | 119 ---------------------------------- moses/src/LM/RandLM.h | 71 -------------------- moses/src/Makefile.am | 4 +- 8 files changed, 218 insertions(+), 204 deletions(-) create mode 100644 moses/src/LM/Rand.cpp create mode 100644 moses/src/LM/Rand.h delete mode 100644 moses/src/LM/RandLM.cpp delete mode 100644 moses/src/LM/RandLM.h (limited to 'moses') diff --git a/moses/src/Jamfile b/moses/src/Jamfile index f70cbb849..a5b0e2210 100644 --- a/moses/src/Jamfile +++ b/moses/src/Jamfile @@ -1,10 +1,12 @@ +alias headers : ../../util//util : : : . ; + lib z ; -alias InputFileStream : InputFileStream.cpp z ; +alias InputFileStream : InputFileStream.cpp z headers ; if [ option.get "with-synlm" : no : yes ] = yes { lib m ; - obj SyntacticLanguageModel.o : SyntacticLanguageModel.cpp ../../util//util : $(TOP)/synlm/hhmm/rvtl/include $(TOP)/synlm/hhmm/wsjparse/include ; + obj SyntacticLanguageModel.o : SyntacticLanguageModel.cpp headers : $(TOP)/synlm/hhmm/rvtl/include $(TOP)/synlm/hhmm/wsjparse/include ; alias synlm : SyntacticLanguageModel.o m : : : HAVE_SYNLM ; } else { alias synlm ; @@ -121,4 +123,4 @@ WordsRange.cpp XmlOption.cpp synlm -InputFileStream LM//LM ../../util//util : . : : . ; +InputFileStream LM//LM headers ; diff --git a/moses/src/LM/Factory.cpp b/moses/src/LM/Factory.cpp index df1e6bc01..2815c2694 100644 --- a/moses/src/LM/Factory.cpp +++ b/moses/src/LM/Factory.cpp @@ -64,7 +64,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # include "LM/IRST.h" #endif #ifdef LM_RAND -# include "LM/RandLM.h" +# include "LM/Rand.h" #endif #ifdef LM_ORLM # include "LM/ORLM.h" @@ -102,7 +102,7 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation switch (lmImplementation) { case RandLM: #ifdef LM_RAND - lm = new LanguageModelRandLM(); + lm = NewRandLM(); #endif break; case ORLM: diff --git a/moses/src/LM/Jamfile b/moses/src/LM/Jamfile index bb8944e12..dd25775a2 100644 --- a/moses/src/LM/Jamfile +++ b/moses/src/LM/Jamfile @@ -15,7 +15,7 @@ with-irstlm = [ option.get "with-irstlm" ] ; if $(with-irstlm) != "" { lib irstlm : : $(with-irstlm)/lib ; - obj IRST.o : IRST.cpp ../../../util//util : $(with-irstlm)/include .. ; + obj IRST.o : IRST.cpp ..//headers : $(with-irstlm)/include ; alias irst : IRST.o irstlm : : : LM_IRST ; echo "" ; echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; @@ -29,7 +29,7 @@ if $(with-irstlm) != "" with-srilm = [ option.get "with-srilm" ] ; if $(with-srilm) != "" { - if [ option.get "with-srilm-dynamic" ] != "" + if [ option.get "with-srilm-dynamic" : no : yes ] = yes { alias sri-libs : srilm ; } else { @@ -45,14 +45,22 @@ if $(with-srilm) != "" alias sri-libs : oolm dstruct misc flm ; } - obj SRI.o : SRI.cpp ../../../util//util : $(with-srilm)/include .. ; - obj ParallelBackoff.o : ParallelBackoff.cpp ../../../util//util : $(with-srilm)/include .. ; + obj SRI.o : SRI.cpp ..//headers : $(with-srilm)/include ; + obj ParallelBackoff.o : ParallelBackoff.cpp ..//headers : $(with-srilm)/include ; alias sri : SRI.o ParallelBackoff.o sri-libs : : : LM_SRI ; } else { alias sri ; } +with-randlm = [ option.get "with-randlm" ] ; +if $(with-randlm) != "" +{ + lib randlm : : $(with-randlm)/lib ; + obj Rand.o : Rand.cpp randlm ..//headers : $(with-randlm)/include ; + alias rand : Rand.o : : : LM_RAND ; +} else { + alias rand ; +} + lib LM : Base.cpp Factory.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp - ../../../lm//lm - irst sri -: .. $(requirements) : : .. ; + ../../../lm//lm irst sri rand ..//headers ; diff --git a/moses/src/LM/Rand.cpp b/moses/src/LM/Rand.cpp new file mode 100644 index 000000000..93411b8d3 --- /dev/null +++ b/moses/src/LM/Rand.cpp @@ -0,0 +1,164 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2006 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include "util/check.hh" +#include +#include +#include +#include +#include +#include "Factor.h" +#include "Util.h" +#include "LM/SingleFactor.h" +#include "RandLM.h" + + +#include "LM/Rand.h" +#include "FactorCollection.h" +#include "Phrase.h" +#include "InputFileStream.h" +#include "StaticData.h" + + +namespace Moses +{ +namespace +{ +using namespace std; + +class LanguageModelRandLM : public LanguageModelPointerState +{ +public: + LanguageModelRandLM() + : m_lm(0) {} + bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder); + virtual LMResult GetValue(const std::vector &contextFactor, State* finalState = NULL) const; + ~LanguageModelRandLM() { + delete m_lm; + } + void CleanUpAfterSentenceProcessing() { + m_lm->clearCaches(); // clear caches + } + void InitializeBeforeSentenceProcessing() { + m_lm->initThreadSpecificData(); // Creates thread specific data iff + // compiled with multithreading. + } +protected: + std::vector m_randlm_ids_vec; + randlm::RandLM* m_lm; + randlm::WordID m_oov_id; + void CreateFactors(FactorCollection &factorCollection); + randlm::WordID GetLmID( const std::string &str ) const; + randlm::WordID GetLmID( const Factor *factor ) const { + size_t factorId = factor->GetId(); + return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId]; + }; + +}; + + +bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType, + size_t nGramOrder) +{ + cerr << "Loading LanguageModelRandLM..." << endl; + FactorCollection &factorCollection = FactorCollection::Instance(); + m_filePath = filePath; + m_factorType = factorType; + m_nGramOrder = nGramOrder; + int cache_MB = 50; // increase cache size + m_lm = randlm::RandLM::initRandLM(filePath, nGramOrder, cache_MB); + CHECK(m_lm != NULL); + // get special word ids + m_oov_id = m_lm->getWordID(m_lm->getOOV()); + CreateFactors(factorCollection); + m_lm->initThreadSpecificData(); + return true; +} + +void LanguageModelRandLM::CreateFactors(FactorCollection &factorCollection) // add factors which have randlm id +{ + // code copied & paste from SRI LM class. should do template function + // first get all bf vocab in map + std::map randlm_ids_map; // map from factor id -> randlm id + size_t maxFactorId = 0; // to create lookup vector later on + for(std::map::const_iterator vIter = m_lm->vocabStart(); + vIter != m_lm->vocabEnd(); vIter++) { + // get word from randlm vocab and associate with (new) factor id + size_t factorId=factorCollection.AddFactor(Output,m_factorType,vIter->first)->GetId(); + randlm_ids_map[factorId] = vIter->second; + maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; + } + // add factors for BOS and EOS and store bf word ids + size_t factorId; + m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, m_lm->getBOS()); + factorId = m_sentenceStart->GetId(); + maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; + m_sentenceStartArray[m_factorType] = m_sentenceStart; + + m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, m_lm->getEOS()); + factorId = m_sentenceEnd->GetId(); + maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; + m_sentenceEndArray[m_factorType] = m_sentenceEnd; + + // add to lookup vector in object + m_randlm_ids_vec.resize(maxFactorId+1); + // fill with OOV code + fill(m_randlm_ids_vec.begin(), m_randlm_ids_vec.end(), m_oov_id); + + for (map::const_iterator iter = randlm_ids_map.begin(); + iter != randlm_ids_map.end() ; ++iter) + m_randlm_ids_vec[iter->first] = iter->second; + +} + +randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const +{ + return m_lm->getWordID(str); +} + +LMResult LanguageModelRandLM::GetValue(const vector &contextFactor, + State* finalState) const +{ + FactorType factorType = GetFactorType(); + // set up context + randlm::WordID ngram[MAX_NGRAM_SIZE]; + int count = contextFactor.size(); + for (int i = 0 ; i < count ; i++) { + ngram[i] = GetLmID((*contextFactor[i])[factorType]); + //std::cerr << m_lm->getWord(ngram[i]) << " "; + } + int found = 0; + LMResult ret; + ret.score = FloorScore(TransformLMScore(m_lm->getProb(&ngram[0], count, &found, finalState))); + ret.unknown = count && (ngram[count - 1] == m_oov_id); + //if (finalState) + // std::cerr << " = " << logprob << "(" << *finalState << ", " <<")"<< std::endl; + //else + // std::cerr << " = " << logprob << std::endl; + return ret; +} + +} + +LanguageModelPointerState *NewRandLM() { + return new LanguageModelRandLM(); +} + +} + diff --git a/moses/src/LM/Rand.h b/moses/src/LM/Rand.h new file mode 100644 index 000000000..c4ef99947 --- /dev/null +++ b/moses/src/LM/Rand.h @@ -0,0 +1,30 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2006 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#ifndef moses_LM_Rand_h +#define moses_LM_Rand_h + +namespace Moses +{ +class LanguageModelPointerState; +LanguageModelPointerState *NewRandLM(); + +} + +#endif diff --git a/moses/src/LM/RandLM.cpp b/moses/src/LM/RandLM.cpp deleted file mode 100644 index 232f2bf94..000000000 --- a/moses/src/LM/RandLM.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#include "util/check.hh" -#include -#include -#include - -#include "LM/RandLM.h" -#include "FactorCollection.h" -#include "Phrase.h" -#include "InputFileStream.h" -#include "StaticData.h" - - -namespace Moses -{ -using namespace std; - -bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType, - size_t nGramOrder) -{ - cerr << "Loading LanguageModelRandLM..." << endl; - FactorCollection &factorCollection = FactorCollection::Instance(); - m_filePath = filePath; - m_factorType = factorType; - m_nGramOrder = nGramOrder; - int cache_MB = 50; // increase cache size - m_lm = randlm::RandLM::initRandLM(filePath, nGramOrder, cache_MB); - CHECK(m_lm != NULL); - // get special word ids - m_oov_id = m_lm->getWordID(m_lm->getOOV()); - CreateFactors(factorCollection); - m_lm->initThreadSpecificData(); - return true; -} - -void LanguageModelRandLM::CreateFactors(FactorCollection &factorCollection) // add factors which have randlm id -{ - // code copied & paste from SRI LM class. should do template function - // first get all bf vocab in map - std::map randlm_ids_map; // map from factor id -> randlm id - size_t maxFactorId = 0; // to create lookup vector later on - for(std::map::const_iterator vIter = m_lm->vocabStart(); - vIter != m_lm->vocabEnd(); vIter++) { - // get word from randlm vocab and associate with (new) factor id - size_t factorId=factorCollection.AddFactor(Output,m_factorType,vIter->first)->GetId(); - randlm_ids_map[factorId] = vIter->second; - maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; - } - // add factors for BOS and EOS and store bf word ids - size_t factorId; - m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, m_lm->getBOS()); - factorId = m_sentenceStart->GetId(); - maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; - m_sentenceStartArray[m_factorType] = m_sentenceStart; - - m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, m_lm->getEOS()); - factorId = m_sentenceEnd->GetId(); - maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; - m_sentenceEndArray[m_factorType] = m_sentenceEnd; - - // add to lookup vector in object - m_randlm_ids_vec.resize(maxFactorId+1); - // fill with OOV code - fill(m_randlm_ids_vec.begin(), m_randlm_ids_vec.end(), m_oov_id); - - for (map::const_iterator iter = randlm_ids_map.begin(); - iter != randlm_ids_map.end() ; ++iter) - m_randlm_ids_vec[iter->first] = iter->second; - -} - -randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const -{ - return m_lm->getWordID(str); -} - -LMResult LanguageModelRandLM::GetValue(const vector &contextFactor, - State* finalState) const -{ - FactorType factorType = GetFactorType(); - // set up context - randlm::WordID ngram[MAX_NGRAM_SIZE]; - int count = contextFactor.size(); - for (int i = 0 ; i < count ; i++) { - ngram[i] = GetLmID((*contextFactor[i])[factorType]); - //std::cerr << m_lm->getWord(ngram[i]) << " "; - } - int found = 0; - LMResult ret; - ret.score = FloorScore(TransformLMScore(m_lm->getProb(&ngram[0], count, &found, finalState))); - ret.unknown = count && (ngram[count - 1] == m_oov_id); - //if (finalState) - // std::cerr << " = " << logprob << "(" << *finalState << ", " <<")"<< std::endl; - //else - // std::cerr << " = " << logprob << std::endl; - return ret; -} - -} - - diff --git a/moses/src/LM/RandLM.h b/moses/src/LM/RandLM.h deleted file mode 100644 index 8de5f3bb1..000000000 --- a/moses/src/LM/RandLM.h +++ /dev/null @@ -1,71 +0,0 @@ -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#ifndef moses_LanguageModelRandLM_h -#define moses_LanguageModelRandLM_h - -#include -#include -#include "Factor.h" -#include "Util.h" -#include "LM/SingleFactor.h" -#include "RandLM.h" - -class randlm::RandLM; - -namespace Moses -{ -class Factor; -class Phrase; - -// RandLM wrapper (single factor LM) - -class LanguageModelRandLM : public LanguageModelPointerState -{ -public: - LanguageModelRandLM() - : m_lm(0) {} - bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder); - virtual LMResult GetValue(const std::vector &contextFactor, State* finalState = NULL) const; - ~LanguageModelRandLM() { - delete m_lm; - } - void CleanUpAfterSentenceProcessing() { - m_lm->clearCaches(); // clear caches - } - void InitializeBeforeSentenceProcessing() { - m_lm->initThreadSpecificData(); // Creates thread specific data iff - // compiled with multithreading. - } -protected: - std::vector m_randlm_ids_vec; - randlm::RandLM* m_lm; - randlm::WordID m_oov_id; - void CreateFactors(FactorCollection &factorCollection); - randlm::WordID GetLmID( const std::string &str ) const; - randlm::WordID GetLmID( const Factor *factor ) const { - size_t factorId = factor->GetId(); - return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId]; - }; - -}; - -} - -#endif diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am index 9e3e65cb9..96568da5b 100644 --- a/moses/src/Makefile.am +++ b/moses/src/Makefile.am @@ -148,7 +148,7 @@ libmoses_la_HEADERS += LM/IRST.h endif if RAND_LM -libmoses_la_HEADERS += LM/RandLM.h +libmoses_la_HEADERS += LM/Rand.h endif if ORLM_LM @@ -318,7 +318,7 @@ libmoses_la_SOURCES += LM/IRST.cpp endif if RAND_LM -libmoses_la_SOURCES += LM/RandLM.cpp +libmoses_la_SOURCES += LM/Rand.cpp endif if ORLM_LM -- cgit v1.2.3