diff options
author | Eva Hasler <evahasler@gmail.com> | 2012-03-15 22:43:19 +0400 |
---|---|---|
committer | Eva Hasler <evahasler@gmail.com> | 2012-03-15 22:43:19 +0400 |
commit | b9015a8e5194fa4c6c24620425a2019d5bcb42e9 (patch) | |
tree | eee305590c1b58d960547e225ea6e6ca7893c986 /moses | |
parent | 4a1e7fd07d4975bc8f1963c5af332530020c9043 (diff) | |
parent | 1783ae522e28650ba7208aa3f0a765524e7cc2d2 (diff) |
Merge branch 'miramerge' of thor.inf.ed.ac.uk:/fs/saxnot3/ehasler/mosesdecoder_github_mira into miramerge
Diffstat (limited to 'moses')
-rw-r--r-- | moses/src/Parameter.cpp | 3 | ||||
-rw-r--r-- | moses/src/PhraseBoundaryFeature.cpp | 2 | ||||
-rw-r--r-- | moses/src/PhraseBoundaryFeature.h | 7 | ||||
-rw-r--r-- | moses/src/PhrasePairFeature.cpp | 3 | ||||
-rw-r--r-- | moses/src/PhrasePairFeature.h | 5 | ||||
-rw-r--r-- | moses/src/StaticData.cpp | 79 | ||||
-rw-r--r-- | moses/src/WordTranslationFeature.h | 8 |
7 files changed, 84 insertions, 23 deletions
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp index a366a6f5e..a0c8b6872 100644 --- a/moses/src/Parameter.cpp +++ b/moses/src/Parameter.cpp @@ -98,6 +98,9 @@ Parameter::Parameter() AddParam("weight-l", "lm", "weight(s) for language models"); AddParam("weight-lex", "lex", "weight for global lexical model"); AddParam("weight-glm", "glm", "weight for global lexical model"); + AddParam("weight-wt", "wt", "weight for global lexical model"); + AddParam("weight-pp", "pp", "weight for global lexical model"); + AddParam("weight-pb", "pb", "weight for global lexical model"); AddParam("weight-t", "tm", "weights for translation model components"); AddParam("weight-w", "w", "weight for word penalty"); AddParam("weight-u", "u", "weight for unknown word penalty"); diff --git a/moses/src/PhraseBoundaryFeature.cpp b/moses/src/PhraseBoundaryFeature.cpp index 78e937979..461fcbe57 100644 --- a/moses/src/PhraseBoundaryFeature.cpp +++ b/moses/src/PhraseBoundaryFeature.cpp @@ -18,7 +18,7 @@ int PhraseBoundaryState::Compare(const FFState& other) const PhraseBoundaryFeature::PhraseBoundaryFeature (const FactorList& sourceFactors, const FactorList& targetFactors) : StatefulFeatureFunction("pb", ScoreProducer::unlimited), m_sourceFactors(sourceFactors), - m_targetFactors(targetFactors) + m_targetFactors(targetFactors), m_sparseProducerWeight(1) { } diff --git a/moses/src/PhraseBoundaryFeature.h b/moses/src/PhraseBoundaryFeature.h index 11dea3d14..13e08c376 100644 --- a/moses/src/PhraseBoundaryFeature.h +++ b/moses/src/PhraseBoundaryFeature.h @@ -49,16 +49,19 @@ public: /* Not implemented */ assert(0); } - + + void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; } + float GetSparseProducerWeight() const { return m_sparseProducerWeight; } + private: void AddFeatures( const Word* leftWord, const Word* rightWord, const FactorList& factors, const std::string& side, ScoreComponentCollection* scores) const ; FactorList m_sourceFactors; FactorList m_targetFactors; + float m_sparseProducerWeight; }; - } diff --git a/moses/src/PhrasePairFeature.cpp b/moses/src/PhrasePairFeature.cpp index baae22b68..34121a26c 100644 --- a/moses/src/PhrasePairFeature.cpp +++ b/moses/src/PhrasePairFeature.cpp @@ -12,7 +12,8 @@ PhrasePairFeature::PhrasePairFeature (FactorType sourceFactorId, FactorType targetFactorId) : StatelessFeatureFunction("pp", ScoreProducer::unlimited), m_sourceFactorId(sourceFactorId), - m_targetFactorId(targetFactorId) { + m_targetFactorId(targetFactorId), + m_sparseProducerWeight(1) { std::cerr << "Creating phrase pair feature.. " << endl; } diff --git a/moses/src/PhrasePairFeature.h b/moses/src/PhrasePairFeature.h index e0f2ebccf..c61037724 100644 --- a/moses/src/PhrasePairFeature.h +++ b/moses/src/PhrasePairFeature.h @@ -25,12 +25,13 @@ class PhrasePairFeature: public StatelessFeatureFunction { std::string GetScoreProducerWeightShortName(unsigned) const; size_t GetNumInputScores() const; + void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; } + float GetSparseProducerWeight() const { return m_sparseProducerWeight; } private: FactorType m_sourceFactorId; FactorType m_targetFactorId; - - + float m_sparseProducerWeight; }; diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp index 612999f0f..5a2349979 100644 --- a/moses/src/StaticData.cpp +++ b/moses/src/StaticData.cpp @@ -710,25 +710,50 @@ bool StaticData::LoadData(Parameter *parameter) // DLM: apply additional weight to sparse features if applicable for (size_t i = 0; i < m_targetNgramFeatures.size(); ++i) { - float dlmWeight = m_targetNgramFeatures[i]->GetSparseProducerWeight(); - cerr << "Set sparse producer weight: " << dlmWeight << endl; - cerr << "Size of loaded model: " << - extraWeights.GetNumberWeights(m_targetNgramFeatures[i]) << endl; - if (dlmWeight != 1) - extraWeights.MultiplyEquals(m_targetNgramFeatures[i], dlmWeight); + float weight = m_targetNgramFeatures[i]->GetSparseProducerWeight(); + if (weight != 1) { + extraWeights.MultiplyEquals(m_targetNgramFeatures[i], weight); + cerr << "Set dlm sparse producer weight: " << weight << endl; + } } // GLM: apply additional weight to sparse features if applicable for (size_t i = 0; i < m_globalLexicalModelsUnlimited.size(); ++i) { - float glmWeight = m_globalLexicalModelsUnlimited[i]->GetSparseProducerWeight(); - cerr << "Set sparse producer weight: " << glmWeight << endl; - cerr << "Size of loaded model: " << - extraWeights.GetNumberWeights(m_globalLexicalModelsUnlimited[i]) << endl; - if (glmWeight != 1) - extraWeights.MultiplyEquals(m_globalLexicalModelsUnlimited[i], glmWeight); + float weight = m_globalLexicalModelsUnlimited[i]->GetSparseProducerWeight(); + if (weight != 1) { + extraWeights.MultiplyEquals(m_globalLexicalModelsUnlimited[i], weight); + cerr << "Set glm sparse producer weight: " << weight << endl; + } } - m_allWeights.PlusEquals(extraWeights); + // WT: apply additional weight to sparse features if applicable + if (m_wordTranslationFeature) { + float weight = m_wordTranslationFeature->GetSparseProducerWeight(); + if (weight != 1) { + extraWeights.MultiplyEquals(m_wordTranslationFeature, weight); + cerr << "Set wt sparse producer weight: " << weight << endl; + } + } + + // PP: apply additional weight to sparse features if applicable + if (m_phrasePairFeature) { + float weight = m_phrasePairFeature->GetSparseProducerWeight(); + if (weight != 1) { + extraWeights.MultiplyEquals(m_phrasePairFeature, weight); + cerr << "Set pp sparse producer weight: " << weight << endl; + } + } + + // PB: apply additional weight to sparse features if applicable + if (m_phraseBoundaryFeature) { + float weight = m_phraseBoundaryFeature->GetSparseProducerWeight(); + if (weight != 1) { + extraWeights.MultiplyEquals(m_phraseBoundaryFeature, weight); + cerr << "Set pb sparse producer weight: " << weight << endl; + } + } + + m_allWeights.PlusEquals(extraWeights); } return true; @@ -969,13 +994,13 @@ bool StaticData::LoadGlobalLexicalModelUnlimited() const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-glm")); const vector<string> &modelSpec = m_parameter->GetParam("glm-model"); - if (weight.size() != modelSpec.size()) { + if (weight.size() != 0 && weight.size() != modelSpec.size()) { std::cerr << "number of sparse producer weights and model specs for the global lexical model unlimited " "does not match (" << weight.size() << " != " << modelSpec.size() << ")" << std::endl; return false; } - for (size_t i = 0; i < weight.size(); i++ ) { + for (size_t i = 0; i < modelSpec.size(); i++ ) { bool ignorePunctuation = false; bool biasFeature = false; bool restricted = false; @@ -1600,6 +1625,12 @@ bool StaticData::LoadDiscrimLMFeature() bool StaticData::LoadPhraseBoundaryFeature() { + const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-pb")); + if (weight.size() > 1) { + std::cerr << "only one sparse producer weight allowed for the phrase boundary feature" << std::endl; + return false; + } + const vector<string> &phraseBoundarySourceFactors = m_parameter->GetParam("phrase-boundary-source-feature"); const vector<string> &phraseBoundaryTargetFactors = @@ -1626,11 +1657,19 @@ bool StaticData::LoadPhraseBoundaryFeature() //cerr << "source "; for (size_t i = 0; i < sourceFactors.size(); ++i) cerr << sourceFactors[i] << " "; cerr << endl; //cerr << "target "; for (size_t i = 0; i < targetFactors.size(); ++i) cerr << targetFactors[i] << " "; cerr << endl; m_phraseBoundaryFeature = new PhraseBoundaryFeature(sourceFactors,targetFactors); + if (weight.size() > 0) + m_phraseBoundaryFeature->SetSparseProducerWeight(weight[0]); return true; } bool StaticData::LoadPhrasePairFeature() { + const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-pp")); + if (weight.size() > 1) { + std::cerr << "only one sparse producer weight allowed for the phrase pair feature" << std::endl; + return false; + } + const vector<string> &phrasePairFactors = m_parameter->GetParam("phrase-pair-feature"); if (phrasePairFactors.size() == 0) return true; @@ -1646,6 +1685,8 @@ bool StaticData::LoadPhrasePairFeature() size_t sourceFactorId = Scan<FactorType>(tokens[0]); size_t targetFactorId = Scan<FactorType>(tokens[1]); m_phrasePairFeature = new PhrasePairFeature(sourceFactorId, targetFactorId); + if (weight.size() > 0) + m_phrasePairFeature->SetSparseProducerWeight(weight[0]); return true; } @@ -1737,6 +1778,12 @@ bool StaticData::LoadSourceWordDeletionFeature() bool StaticData::LoadWordTranslationFeature() { + const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-wt")); + if (weight.size() > 1) { + std::cerr << "only one sparse producer weight allowed for the word translation feature" << std::endl; + return false; + } + const vector<string> ¶meters = m_parameter->GetParam("word-translation-feature"); if (parameters.empty()) return true; @@ -1773,6 +1820,8 @@ bool StaticData::LoadWordTranslationFeature() m_wordTranslationFeature = new WordTranslationFeature(factorIdSource, factorIdTarget, simple, sourceTrigger, targetTrigger); + if (weight.size() > 0) + m_wordTranslationFeature->SetSparseProducerWeight(weight[0]); // load word list for restricted feature set if (tokens.size() == 6) { diff --git a/moses/src/WordTranslationFeature.h b/moses/src/WordTranslationFeature.h index 44e2a84b1..adb455ea7 100644 --- a/moses/src/WordTranslationFeature.h +++ b/moses/src/WordTranslationFeature.h @@ -42,12 +42,13 @@ private: bool m_simple; bool m_sourceContext; bool m_targetContext; - + float m_sparseProducerWeight; + public: WordTranslationFeature(FactorType factorTypeSource, FactorType factorTypeTarget, bool simple, bool sourceContext, bool targetContext): // StatelessFeatureFunction("wt", ScoreProducer::unlimited), - StatefulFeatureFunction("wt", ScoreProducer::unlimited), + StatefulFeatureFunction("wt", ScoreProducer::unlimited), m_sparseProducerWeight(1), m_factorTypeSource(factorTypeSource), m_factorTypeTarget(factorTypeTarget), m_simple(simple), @@ -85,6 +86,9 @@ public: // basic properties std::string GetScoreProducerWeightShortName(unsigned) const { return "wt"; } size_t GetNumInputScores() const { return 0; } + + void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; } + float GetSparseProducerWeight() const { return m_sparseProducerWeight; } }; } |