From ca8653a99962b8d660e5af34040bc320cdca486c Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 18 Nov 2011 13:40:20 +0000 Subject: Reduce SRI header pollution --- moses/src/LM/SRI.cpp | 3 +++ moses/src/LM/SRI.h | 14 +++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'moses') diff --git a/moses/src/LM/SRI.cpp b/moses/src/LM/SRI.cpp index e677b305c..9c5a9c3d4 100644 --- a/moses/src/LM/SRI.cpp +++ b/moses/src/LM/SRI.cpp @@ -31,6 +31,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "Phrase.h" #include "StaticData.h" +#include "Vocab.h" +#include "Ngram.h" + using namespace std; namespace Moses diff --git a/moses/src/LM/SRI.h b/moses/src/LM/SRI.h index 19932b9c9..69d55f117 100644 --- a/moses/src/LM/SRI.h +++ b/moses/src/LM/SRI.h @@ -26,12 +26,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include #include "Factor.h" #include "TypeDef.h" -#include "Vocab.h" -#include "Ngram.h" #include "LM/SingleFactor.h" class Factor; class Phrase; +class Vocab; +class Ngram; namespace Moses { @@ -39,15 +39,15 @@ namespace Moses class LanguageModelSRI : public LanguageModelPointerState { protected: - std::vector m_lmIdLookup; + std::vector m_lmIdLookup; ::Vocab *m_srilmVocab; Ngram *m_srilmModel; - VocabIndex m_unknownId; + unsigned int m_unknownId; - LMResult GetValue(VocabIndex wordId, VocabIndex *context) const; + LMResult GetValue(unsigned int wordId, unsigned int *context) const; void CreateFactors(); - VocabIndex GetLmID( const std::string &str ) const; - VocabIndex GetLmID( const Factor *factor ) const; + unsigned int GetLmID( const std::string &str ) const; + unsigned int GetLmID( const Factor *factor ) const; public: LanguageModelSRI(); -- cgit v1.2.3 From 8614d8f566ef3c417b3c3741b0c5a38690e42e80 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 18 Nov 2011 14:10:15 +0000 Subject: Reduce header pollution --- moses/src/LM/Factory.cpp | 2 +- moses/src/LM/ParallelBackoff.cpp | 59 +++++++++++++++++++++++++++++++++++- moses/src/LM/ParallelBackoff.h | 64 ++-------------------------------------- 3 files changed, 61 insertions(+), 64 deletions(-) (limited to 'moses') diff --git a/moses/src/LM/Factory.cpp b/moses/src/LM/Factory.cpp index d2fa32f30..77d2ce81a 100644 --- a/moses/src/LM/Factory.cpp +++ b/moses/src/LM/Factory.cpp @@ -142,7 +142,7 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation break; case ParallelBackoff: #ifdef LM_SRI - lm = new LanguageModelParallelBackoff(); + lm = NewParallelBackoff(); #endif break; case DMapLM: diff --git a/moses/src/LM/ParallelBackoff.cpp b/moses/src/LM/ParallelBackoff.cpp index 0dc7fe053..ec2fb2f78 100644 --- a/moses/src/LM/ParallelBackoff.cpp +++ b/moses/src/LM/ParallelBackoff.cpp @@ -20,21 +20,71 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include "LM/ParallelBackoff.h" -#include "File.h" + +#include +#include +#include +#include + +#include "LM/MultiFactor.h" +#include "Word.h" +#include "Factor.h" +#include "FactorTypeSet.h" +#include "FactorCollection.h" +#include "Phrase.h" #include "TypeDef.h" #include "Util.h" + #include "FNgramSpecs.h" #include "FNgramStats.h" #include "FactoredVocab.h" #include "FNgram.h" #include "wmatrix.h" #include "Vocab.h" +#include "File.h" using namespace std; namespace Moses { +namespace +{ +class LanguageModelParallelBackoff : public LanguageModelMultiFactor +{ +private: + std::vector m_factorTypesOrdered; + + FactoredVocab *m_srilmVocab; + FNgram *m_srilmModel; + VocabIndex m_unknownId; + VocabIndex m_wtid; + VocabIndex m_wtbid; + VocabIndex m_wteid; + FNgramSpecs* fnSpecs; + //std::vector m_lmIdLookup; + std::map* lmIdMap; + std::fstream* debugStream; + + WidMatrix *widMatrix; + +public: + ~LanguageModelParallelBackoff(); + + bool Load(const std::string &filePath, const std::vector &factorTypes, size_t nGramOrder); + + VocabIndex GetLmID( const std::string &str ) const; + + VocabIndex GetLmID( const Factor *factor, FactorType ft ) const; + + void CreateFactors(); + + LMResult GetValueForgotState(const std::vector &contextFactor, FFState &outState) const; + const FFState *GetNullContextState() const; + const FFState *GetBeginSentenceState() const; + FFState *NewState(const FFState *from) const; +}; + LanguageModelParallelBackoff::~LanguageModelParallelBackoff() { /// @@ -294,5 +344,12 @@ const FFState *LanguageModelParallelBackoff::GetBeginSentenceState() const { return NULL; } + +} + +LanguageModelMultiFactor *NewParallelBackoff() { + return new LanguageModelParallelBackoff(); +} + } diff --git a/moses/src/LM/ParallelBackoff.h b/moses/src/LM/ParallelBackoff.h index 74a598737..8e4241395 100644 --- a/moses/src/LM/ParallelBackoff.h +++ b/moses/src/LM/ParallelBackoff.h @@ -21,71 +21,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #pragma once -#include -#include -#include -#include - -#include "LM/MultiFactor.h" -#include "Word.h" -#include "Factor.h" -#include "FactorTypeSet.h" -#include "FactorCollection.h" -#include "Phrase.h" - -#include "FNgramStats.h" -#include "FactoredVocab.h" -#include "FNgram.h" -#include "wmatrix.h" -#include "Vocab.h" - -using namespace std; - -//class FactoredVocab; -//class FNgram; -//class WidMatrix; - - namespace Moses { -/** LM of multiple factors. A simple extension of single factor LM - factors backoff together. - * Rather slow as this uses string concatenation/split -*/ -class LanguageModelParallelBackoff : public LanguageModelMultiFactor -{ -private: - std::vector m_factorTypesOrdered; - - FactoredVocab *m_srilmVocab; - FNgram *m_srilmModel; - VocabIndex m_unknownId; - VocabIndex m_wtid; - VocabIndex m_wtbid; - VocabIndex m_wteid; - FNgramSpecs* fnSpecs; - //std::vector m_lmIdLookup; - std::map* lmIdMap; - std::fstream* debugStream; - - WidMatrix *widMatrix; - -public: - ~LanguageModelParallelBackoff(); - - bool Load(const std::string &filePath, const std::vector &factorTypes, size_t nGramOrder); - - VocabIndex GetLmID( const std::string &str ) const; - - VocabIndex GetLmID( const Factor *factor, FactorType ft ) const; - - void CreateFactors(); - - LMResult GetValueForgotState(const std::vector &contextFactor, FFState &outState) const; - const FFState *GetNullContextState() const; - const FFState *GetBeginSentenceState() const; - FFState *NewState(const FFState *from) const; +class LanguageModelMultiFactor; -}; +LanguageModelMultiFactor *NewParallelBackoff(); } -- cgit v1.2.3 From 380b6c71971c308ba06cacbb9f3add6ba26e9d4c Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 18 Nov 2011 14:16:07 +0000 Subject: KenLM compilation macro cleanup --- moses/src/LM/Factory.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'moses') diff --git a/moses/src/LM/Factory.cpp b/moses/src/LM/Factory.cpp index 77d2ce81a..df1e6bc01 100644 --- a/moses/src/LM/Factory.cpp +++ b/moses/src/LM/Factory.cpp @@ -50,8 +50,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # define LM_ORLM 1 # endif -# define LM_KEN 1 - # ifdef HAVE_DMAPLM # define LM_DMAP # endif @@ -74,9 +72,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #ifdef LM_REMOTE # include "LM/Remote.h" #endif -#ifdef LM_KEN -# include "LM/Ken.h" -#endif +#include "LM/Ken.h" #ifdef LM_DMAP # include "LM/DMapLM.h" #endif @@ -100,12 +96,7 @@ LanguageModel* CreateLanguageModel(LMImplementation lmImplementation , int dub ) { if (lmImplementation == Ken || lmImplementation == LazyKen) { -#ifdef LM_KEN return ConstructKenLM(languageModelFile, scoreIndexManager, factorTypes[0], lmImplementation == LazyKen); -#else - UserMessage::Add("KenLM isn't compiled in but your config asked for it"); - return NULL; -#endif } LanguageModelImplementation *lm = NULL; switch (lmImplementation) { -- cgit v1.2.3 From 5c9b28c55abc3ecf45fc07481f93151c58df2520 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 18 Nov 2011 14:37:01 +0000 Subject: Introducing Jamfiles Currently supports kenlm, irstlm, and srilm options. bjam [--with-srilm=/path/to/sri] [--with-irstlm=/path/to/irst] -j8 Places binaries in dist/ that pass regression tests. --- moses/src/Jamfile | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++ moses/src/LM/Jamfile | 58 ++++++++++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 moses/src/Jamfile create mode 100644 moses/src/LM/Jamfile (limited to 'moses') diff --git a/moses/src/Jamfile b/moses/src/Jamfile new file mode 100644 index 000000000..6ff1e4091 --- /dev/null +++ b/moses/src/Jamfile @@ -0,0 +1,114 @@ +lib z ; +alias InputFileStream : InputFileStream.cpp z ; + +lib moses : +AlignmentInfo.cpp +AlignmentInfoCollection.cpp +BilingualDynSuffixArray.cpp +BitmapContainer.cpp +ChartCell.cpp +ChartCellCollection.cpp +ChartHypothesis.cpp +ChartHypothesisCollection.cpp +ChartManager.cpp +ChartRuleLookupManager.cpp +ChartRuleLookupManagerMemory.cpp +ChartRuleLookupManagerOnDisk.cpp +ChartTranslationOption.cpp +ChartTranslationOptionCollection.cpp +ChartTranslationOptionList.cpp +ChartTrellisDetour.cpp +ChartTrellisDetourQueue.cpp +ChartTrellisNode.cpp +ChartTrellisPath.cpp +ConfusionNet.cpp +DecodeFeature.cpp +DecodeGraph.cpp +DecodeStep.cpp +DecodeStepGeneration.cpp +DecodeStepTranslation.cpp +Dictionary.cpp +DotChart.cpp +DotChartInMemory.cpp +DotChartOnDisk.cpp +DummyScoreProducers.cpp +DynSAInclude/file.cpp +DynSAInclude/vocab.cpp +DynSuffixArray.cpp +FFState.cpp +Factor.cpp +FactorCollection.cpp +FactorTypeSet.cpp +FeatureFunction.cpp +FloydWarshall.cpp +GenerationDictionary.cpp +GlobalLexicalModel.cpp +hash.cpp +Hypothesis.cpp +HypothesisStack.cpp +HypothesisStackCubePruning.cpp +HypothesisStackNormal.cpp +InputType.cpp +LMList.cpp +LVoc.cpp +LexicalReordering.cpp +LexicalReorderingState.cpp +LexicalReorderingTable.cpp +Manager.cpp +PCNTools.cpp +Parameter.cpp +PartialTranslOptColl.cpp +Phrase.cpp +PhraseDictionary.cpp +PhraseDictionaryALSuffixArray.cpp +PhraseDictionaryDynSuffixArray.cpp +PhraseDictionaryHiero.cpp +PhraseDictionaryMemory.cpp +PhraseDictionarySCFG.cpp +PhraseDictionaryNode.cpp +PhraseDictionaryNodeSCFG.cpp +PhraseDictionaryOnDisk.cpp +PhraseDictionaryTree.cpp +PhraseDictionaryTreeAdaptor.cpp +PrefixTreeMap.cpp +ReorderingConstraint.cpp +ReorderingStack.cpp +RuleCube.cpp +RuleCubeItem.cpp +RuleCubeQueue.cpp +RuleTableLoaderCompact.cpp +RuleTableLoaderFactory.cpp +RuleTableLoaderHiero.cpp +RuleTableLoaderStandard.cpp +ScoreComponentCollection.cpp +ScoreIndexManager.cpp +ScoreProducer.cpp +Search.cpp +SearchCubePruning.cpp +SearchNormal.cpp +Sentence.cpp +SentenceStats.cpp +SquareMatrix.cpp +StaticData.cpp +TargetPhrase.cpp +TargetPhraseCollection.cpp +ThreadPool.cpp +Timer.cpp +TranslationOption.cpp +TranslationOptionCollection.cpp +TranslationOptionCollectionConfusionNet.cpp +TranslationOptionCollectionText.cpp +TranslationOptionList.cpp +TranslationSystem.cpp +TreeInput.cpp +TrellisPath.cpp +TrellisPathCollection.cpp +UserMessage.cpp +Util.cpp +Word.cpp +WordLattice.cpp +WordsBitmap.cpp +WordsRange.cpp +XmlOption.cpp + +InputFileStream LM//LM ../../util//util : . : : . ; diff --git a/moses/src/LM/Jamfile b/moses/src/LM/Jamfile new file mode 100644 index 000000000..bb8944e12 --- /dev/null +++ b/moses/src/LM/Jamfile @@ -0,0 +1,58 @@ +import option ; + +# Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php +rule trim-nl ( str ) +{ + return [ MATCH "([^ +]*)" : $(str) ] ; +} +rule _shell ( cmd ) +{ + return [ trim-nl [ SHELL $(cmd) ] ] ; +} + +with-irstlm = [ option.get "with-irstlm" ] ; +if $(with-irstlm) != "" +{ + lib irstlm : : $(with-irstlm)/lib ; + obj IRST.o : IRST.cpp ../../../util//util : $(with-irstlm)/include .. ; + alias irst : IRST.o irstlm : : : LM_IRST ; + echo "" ; + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; + echo "!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!" ; + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; + echo "" ; +} else { + alias irst ; +} + +with-srilm = [ option.get "with-srilm" ] ; +if $(with-srilm) != "" +{ + if [ option.get "with-srilm-dynamic" ] != "" + { + alias sri-libs : srilm ; + } else { + sri-arch = [ option.get "with-srilm-arch" ] ; + sri-arch ?= [ _shell $(with-srilm)/sbin/machine-type ] ; + sri-lib = $(with-srilm)/lib/$(sri-arch) $(with-srilm)/flm/obj/$(sri-arch) ; + + lib flm : : $(sri-lib) ; + lib misc : flm : $(sri-lib) ; + lib dstruct : misc flm : $(sri-lib) ; + lib oolm : dstruct misc flm : $(sri-lib) ; + + alias sri-libs : oolm dstruct misc flm ; + } + + obj SRI.o : SRI.cpp ../../../util//util : $(with-srilm)/include .. ; + obj ParallelBackoff.o : ParallelBackoff.cpp ../../../util//util : $(with-srilm)/include .. ; + alias sri : SRI.o ParallelBackoff.o sri-libs : : : LM_SRI ; +} else { + alias sri ; +} + +lib LM : Base.cpp Factory.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp + ../../../lm//lm + irst sri +: .. $(requirements) : : .. ; -- cgit v1.2.3 From a82c3ee347867f7243b90676900fe5ef595f6f9f Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 18 Nov 2011 15:07:59 +0000 Subject: Fix --without-trace, add synlm compilation --- moses/src/Jamfile | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'moses') diff --git a/moses/src/Jamfile b/moses/src/Jamfile index 6ff1e4091..f70cbb849 100644 --- a/moses/src/Jamfile +++ b/moses/src/Jamfile @@ -1,6 +1,15 @@ lib z ; alias InputFileStream : InputFileStream.cpp z ; +if [ option.get "with-synlm" : no : yes ] = yes +{ + lib m ; + obj SyntacticLanguageModel.o : SyntacticLanguageModel.cpp ../../util//util : $(TOP)/synlm/hhmm/rvtl/include $(TOP)/synlm/hhmm/wsjparse/include ; + alias synlm : SyntacticLanguageModel.o m : : : HAVE_SYNLM ; +} else { + alias synlm ; +} + lib moses : AlignmentInfo.cpp AlignmentInfoCollection.cpp @@ -111,4 +120,5 @@ WordsBitmap.cpp WordsRange.cpp XmlOption.cpp +synlm InputFileStream LM//LM ../../util//util : . : : . ; -- cgit v1.2.3