diff options
author | Hieu Hoang <hieuhoang@Hieus-MacBook.local> | 2011-11-18 19:13:46 +0400 |
---|---|---|
committer | Hieu Hoang <hieuhoang@Hieus-MacBook.local> | 2011-11-18 19:13:46 +0400 |
commit | ce0065ff2aa71de2ca2fa9453b7ac50610fede67 (patch) | |
tree | 978495dc296631748cc1e3b59c56e247e0d768c1 /moses | |
parent | b7366dc045087092ba46e7462a00a488ada89586 (diff) | |
parent | a82c3ee347867f7243b90676900fe5ef595f6f9f (diff) |
Merge branch 'master' of github.com:moses-smt/mosesdecoder
Diffstat (limited to 'moses')
-rw-r--r-- | moses/src/Jamfile | 124 | ||||
-rw-r--r-- | moses/src/LM/Factory.cpp | 13 | ||||
-rw-r--r-- | moses/src/LM/Jamfile | 58 | ||||
-rw-r--r-- | moses/src/LM/ParallelBackoff.cpp | 59 | ||||
-rw-r--r-- | moses/src/LM/ParallelBackoff.h | 64 | ||||
-rw-r--r-- | moses/src/LM/SRI.cpp | 3 | ||||
-rw-r--r-- | moses/src/LM/SRI.h | 14 |
7 files changed, 254 insertions, 81 deletions
diff --git a/moses/src/Jamfile b/moses/src/Jamfile new file mode 100644 index 000000000..f70cbb849 --- /dev/null +++ b/moses/src/Jamfile @@ -0,0 +1,124 @@ +lib z ; +alias InputFileStream : InputFileStream.cpp z ; + +if [ option.get "with-synlm" : no : yes ] = yes +{ + lib m ; + obj SyntacticLanguageModel.o : SyntacticLanguageModel.cpp ../../util//util : <include>$(TOP)/synlm/hhmm/rvtl/include <include>$(TOP)/synlm/hhmm/wsjparse/include ; + alias synlm : SyntacticLanguageModel.o m : : : <define>HAVE_SYNLM ; +} else { + alias synlm ; +} + +lib moses : +AlignmentInfo.cpp +AlignmentInfoCollection.cpp +BilingualDynSuffixArray.cpp +BitmapContainer.cpp +ChartCell.cpp +ChartCellCollection.cpp +ChartHypothesis.cpp +ChartHypothesisCollection.cpp +ChartManager.cpp +ChartRuleLookupManager.cpp +ChartRuleLookupManagerMemory.cpp +ChartRuleLookupManagerOnDisk.cpp +ChartTranslationOption.cpp +ChartTranslationOptionCollection.cpp +ChartTranslationOptionList.cpp +ChartTrellisDetour.cpp +ChartTrellisDetourQueue.cpp +ChartTrellisNode.cpp +ChartTrellisPath.cpp +ConfusionNet.cpp +DecodeFeature.cpp +DecodeGraph.cpp +DecodeStep.cpp +DecodeStepGeneration.cpp +DecodeStepTranslation.cpp +Dictionary.cpp +DotChart.cpp +DotChartInMemory.cpp +DotChartOnDisk.cpp +DummyScoreProducers.cpp +DynSAInclude/file.cpp +DynSAInclude/vocab.cpp +DynSuffixArray.cpp +FFState.cpp +Factor.cpp +FactorCollection.cpp +FactorTypeSet.cpp +FeatureFunction.cpp +FloydWarshall.cpp +GenerationDictionary.cpp +GlobalLexicalModel.cpp +hash.cpp +Hypothesis.cpp +HypothesisStack.cpp +HypothesisStackCubePruning.cpp +HypothesisStackNormal.cpp +InputType.cpp +LMList.cpp +LVoc.cpp +LexicalReordering.cpp +LexicalReorderingState.cpp +LexicalReorderingTable.cpp +Manager.cpp +PCNTools.cpp +Parameter.cpp +PartialTranslOptColl.cpp +Phrase.cpp +PhraseDictionary.cpp +PhraseDictionaryALSuffixArray.cpp +PhraseDictionaryDynSuffixArray.cpp +PhraseDictionaryHiero.cpp +PhraseDictionaryMemory.cpp +PhraseDictionarySCFG.cpp +PhraseDictionaryNode.cpp +PhraseDictionaryNodeSCFG.cpp +PhraseDictionaryOnDisk.cpp +PhraseDictionaryTree.cpp +PhraseDictionaryTreeAdaptor.cpp +PrefixTreeMap.cpp +ReorderingConstraint.cpp +ReorderingStack.cpp +RuleCube.cpp +RuleCubeItem.cpp +RuleCubeQueue.cpp +RuleTableLoaderCompact.cpp +RuleTableLoaderFactory.cpp +RuleTableLoaderHiero.cpp +RuleTableLoaderStandard.cpp +ScoreComponentCollection.cpp +ScoreIndexManager.cpp +ScoreProducer.cpp +Search.cpp +SearchCubePruning.cpp +SearchNormal.cpp +Sentence.cpp +SentenceStats.cpp +SquareMatrix.cpp +StaticData.cpp +TargetPhrase.cpp +TargetPhraseCollection.cpp +ThreadPool.cpp +Timer.cpp +TranslationOption.cpp +TranslationOptionCollection.cpp +TranslationOptionCollectionConfusionNet.cpp +TranslationOptionCollectionText.cpp +TranslationOptionList.cpp +TranslationSystem.cpp +TreeInput.cpp +TrellisPath.cpp +TrellisPathCollection.cpp +UserMessage.cpp +Util.cpp +Word.cpp +WordLattice.cpp +WordsBitmap.cpp +WordsRange.cpp +XmlOption.cpp + +synlm +InputFileStream LM//LM ../../util//util : <include>. : : <include>. ; diff --git a/moses/src/LM/Factory.cpp b/moses/src/LM/Factory.cpp index d2fa32f30..df1e6bc01 100644 --- a/moses/src/LM/Factory.cpp +++ b/moses/src/LM/Factory.cpp @@ -50,8 +50,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # define LM_ORLM 1 # endif -# define LM_KEN 1 - # ifdef HAVE_DMAPLM # define LM_DMAP # endif @@ -74,9 +72,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #ifdef LM_REMOTE # include "LM/Remote.h" #endif -#ifdef LM_KEN -# include "LM/Ken.h" -#endif +#include "LM/Ken.h" #ifdef LM_DMAP # include "LM/DMapLM.h" #endif @@ -100,12 +96,7 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation , int dub ) { if (lmImplementation == Ken || lmImplementation == LazyKen) { -#ifdef LM_KEN return ConstructKenLM(languageModelFile, scoreIndexManager, factorTypes[0], lmImplementation == LazyKen); -#else - UserMessage::Add("KenLM isn't compiled in but your config asked for it"); - return NULL; -#endif } LanguageModelImplementation *lm = NULL; switch (lmImplementation) { @@ -142,7 +133,7 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation break; case ParallelBackoff: #ifdef LM_SRI - lm = new LanguageModelParallelBackoff(); + lm = NewParallelBackoff(); #endif break; case DMapLM: diff --git a/moses/src/LM/Jamfile b/moses/src/LM/Jamfile new file mode 100644 index 000000000..bb8944e12 --- /dev/null +++ b/moses/src/LM/Jamfile @@ -0,0 +1,58 @@ +import option ; + +# Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php +rule trim-nl ( str ) +{ + return [ MATCH "([^ +]*)" : $(str) ] ; +} +rule _shell ( cmd ) +{ + return [ trim-nl [ SHELL $(cmd) ] ] ; +} + +with-irstlm = [ option.get "with-irstlm" ] ; +if $(with-irstlm) != "" +{ + lib irstlm : : <search>$(with-irstlm)/lib ; + obj IRST.o : IRST.cpp ../../../util//util : <include>$(with-irstlm)/include <include>.. ; + alias irst : IRST.o irstlm : : : <define>LM_IRST ; + echo "" ; + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; + echo "!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!" ; + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; + echo "" ; +} else { + alias irst ; +} + +with-srilm = [ option.get "with-srilm" ] ; +if $(with-srilm) != "" +{ + if [ option.get "with-srilm-dynamic" ] != "" + { + alias sri-libs : srilm ; + } else { + sri-arch = [ option.get "with-srilm-arch" ] ; + sri-arch ?= [ _shell $(with-srilm)/sbin/machine-type ] ; + sri-lib = <search>$(with-srilm)/lib/$(sri-arch) <search>$(with-srilm)/flm/obj/$(sri-arch) ; + + lib flm : : $(sri-lib) ; + lib misc : flm : $(sri-lib) ; + lib dstruct : misc flm : $(sri-lib) ; + lib oolm : dstruct misc flm : $(sri-lib) ; + + alias sri-libs : oolm dstruct misc flm ; + } + + obj SRI.o : SRI.cpp ../../../util//util : <include>$(with-srilm)/include <include>.. ; + obj ParallelBackoff.o : ParallelBackoff.cpp ../../../util//util : <include>$(with-srilm)/include <include>.. ; + alias sri : SRI.o ParallelBackoff.o sri-libs : : : <define>LM_SRI ; +} else { + alias sri ; +} + +lib LM : Base.cpp Factory.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp + ../../../lm//lm + irst sri +: <include>.. $(requirements) : : <include>.. ; diff --git a/moses/src/LM/ParallelBackoff.cpp b/moses/src/LM/ParallelBackoff.cpp index 0dc7fe053..ec2fb2f78 100644 --- a/moses/src/LM/ParallelBackoff.cpp +++ b/moses/src/LM/ParallelBackoff.cpp @@ -20,21 +20,71 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include "LM/ParallelBackoff.h" -#include "File.h" + +#include <vector> +#include <string> +#include <sstream> +#include <fstream> + +#include "LM/MultiFactor.h" +#include "Word.h" +#include "Factor.h" +#include "FactorTypeSet.h" +#include "FactorCollection.h" +#include "Phrase.h" #include "TypeDef.h" #include "Util.h" + #include "FNgramSpecs.h" #include "FNgramStats.h" #include "FactoredVocab.h" #include "FNgram.h" #include "wmatrix.h" #include "Vocab.h" +#include "File.h" using namespace std; namespace Moses { +namespace +{ +class LanguageModelParallelBackoff : public LanguageModelMultiFactor +{ +private: + std::vector<FactorType> m_factorTypesOrdered; + + FactoredVocab *m_srilmVocab; + FNgram *m_srilmModel; + VocabIndex m_unknownId; + VocabIndex m_wtid; + VocabIndex m_wtbid; + VocabIndex m_wteid; + FNgramSpecs<FNgramCount>* fnSpecs; + //std::vector<VocabIndex> m_lmIdLookup; + std::map<size_t, VocabIndex>* lmIdMap; + std::fstream* debugStream; + + WidMatrix *widMatrix; + +public: + ~LanguageModelParallelBackoff(); + + bool Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, size_t nGramOrder); + + VocabIndex GetLmID( const std::string &str ) const; + + VocabIndex GetLmID( const Factor *factor, FactorType ft ) const; + + void CreateFactors(); + + LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const; + const FFState *GetNullContextState() const; + const FFState *GetBeginSentenceState() const; + FFState *NewState(const FFState *from) const; +}; + LanguageModelParallelBackoff::~LanguageModelParallelBackoff() { /// @@ -294,5 +344,12 @@ const FFState *LanguageModelParallelBackoff::GetBeginSentenceState() const { return NULL; } + +} + +LanguageModelMultiFactor *NewParallelBackoff() { + return new LanguageModelParallelBackoff(); +} + } diff --git a/moses/src/LM/ParallelBackoff.h b/moses/src/LM/ParallelBackoff.h index 74a598737..8e4241395 100644 --- a/moses/src/LM/ParallelBackoff.h +++ b/moses/src/LM/ParallelBackoff.h @@ -21,71 +21,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #pragma once -#include <vector> -#include <string> -#include <sstream> -#include <fstream> - -#include "LM/MultiFactor.h" -#include "Word.h" -#include "Factor.h" -#include "FactorTypeSet.h" -#include "FactorCollection.h" -#include "Phrase.h" - -#include "FNgramStats.h" -#include "FactoredVocab.h" -#include "FNgram.h" -#include "wmatrix.h" -#include "Vocab.h" - -using namespace std; - -//class FactoredVocab; -//class FNgram; -//class WidMatrix; - - namespace Moses { -/** LM of multiple factors. A simple extension of single factor LM - factors backoff together. - * Rather slow as this uses string concatenation/split -*/ -class LanguageModelParallelBackoff : public LanguageModelMultiFactor -{ -private: - std::vector<FactorType> m_factorTypesOrdered; - - FactoredVocab *m_srilmVocab; - FNgram *m_srilmModel; - VocabIndex m_unknownId; - VocabIndex m_wtid; - VocabIndex m_wtbid; - VocabIndex m_wteid; - FNgramSpecs<FNgramCount>* fnSpecs; - //std::vector<VocabIndex> m_lmIdLookup; - std::map<size_t, VocabIndex>* lmIdMap; - std::fstream* debugStream; - - WidMatrix *widMatrix; - -public: - ~LanguageModelParallelBackoff(); - - bool Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, size_t nGramOrder); - - VocabIndex GetLmID( const std::string &str ) const; - - VocabIndex GetLmID( const Factor *factor, FactorType ft ) const; - - void CreateFactors(); - - LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const; - const FFState *GetNullContextState() const; - const FFState *GetBeginSentenceState() const; - FFState *NewState(const FFState *from) const; +class LanguageModelMultiFactor; -}; +LanguageModelMultiFactor *NewParallelBackoff(); } diff --git a/moses/src/LM/SRI.cpp b/moses/src/LM/SRI.cpp index e677b305c..9c5a9c3d4 100644 --- a/moses/src/LM/SRI.cpp +++ b/moses/src/LM/SRI.cpp @@ -31,6 +31,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "Phrase.h" #include "StaticData.h" +#include "Vocab.h" +#include "Ngram.h" + using namespace std; namespace Moses diff --git a/moses/src/LM/SRI.h b/moses/src/LM/SRI.h index 19932b9c9..69d55f117 100644 --- a/moses/src/LM/SRI.h +++ b/moses/src/LM/SRI.h @@ -26,12 +26,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include <vector> #include "Factor.h" #include "TypeDef.h" -#include "Vocab.h" -#include "Ngram.h" #include "LM/SingleFactor.h" class Factor; class Phrase; +class Vocab; +class Ngram; namespace Moses { @@ -39,15 +39,15 @@ namespace Moses class LanguageModelSRI : public LanguageModelPointerState { protected: - std::vector<VocabIndex> m_lmIdLookup; + std::vector<unsigned int> m_lmIdLookup; ::Vocab *m_srilmVocab; Ngram *m_srilmModel; - VocabIndex m_unknownId; + unsigned int m_unknownId; - LMResult GetValue(VocabIndex wordId, VocabIndex *context) const; + LMResult GetValue(unsigned int wordId, unsigned int *context) const; void CreateFactors(); - VocabIndex GetLmID( const std::string &str ) const; - VocabIndex GetLmID( const Factor *factor ) const; + unsigned int GetLmID( const std::string &str ) const; + unsigned int GetLmID( const Factor *factor ) const; public: LanguageModelSRI(); |