Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorEva Hasler <evahasler@gmail.com>2012-03-15 22:43:19 +0400
committerEva Hasler <evahasler@gmail.com>2012-03-15 22:43:19 +0400
commitb9015a8e5194fa4c6c24620425a2019d5bcb42e9 (patch)
treeeee305590c1b58d960547e225ea6e6ca7893c986 /moses
parent4a1e7fd07d4975bc8f1963c5af332530020c9043 (diff)
parent1783ae522e28650ba7208aa3f0a765524e7cc2d2 (diff)
Merge branch 'miramerge' of thor.inf.ed.ac.uk:/fs/saxnot3/ehasler/mosesdecoder_github_mira into miramerge
Diffstat (limited to 'moses')
-rw-r--r--moses/src/Parameter.cpp3
-rw-r--r--moses/src/PhraseBoundaryFeature.cpp2
-rw-r--r--moses/src/PhraseBoundaryFeature.h7
-rw-r--r--moses/src/PhrasePairFeature.cpp3
-rw-r--r--moses/src/PhrasePairFeature.h5
-rw-r--r--moses/src/StaticData.cpp79
-rw-r--r--moses/src/WordTranslationFeature.h8
7 files changed, 84 insertions, 23 deletions
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
index a366a6f5e..a0c8b6872 100644
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@@ -98,6 +98,9 @@ Parameter::Parameter()
AddParam("weight-l", "lm", "weight(s) for language models");
AddParam("weight-lex", "lex", "weight for global lexical model");
AddParam("weight-glm", "glm", "weight for global lexical model");
+ AddParam("weight-wt", "wt", "weight for global lexical model");
+ AddParam("weight-pp", "pp", "weight for global lexical model");
+ AddParam("weight-pb", "pb", "weight for global lexical model");
AddParam("weight-t", "tm", "weights for translation model components");
AddParam("weight-w", "w", "weight for word penalty");
AddParam("weight-u", "u", "weight for unknown word penalty");
diff --git a/moses/src/PhraseBoundaryFeature.cpp b/moses/src/PhraseBoundaryFeature.cpp
index 78e937979..461fcbe57 100644
--- a/moses/src/PhraseBoundaryFeature.cpp
+++ b/moses/src/PhraseBoundaryFeature.cpp
@@ -18,7 +18,7 @@ int PhraseBoundaryState::Compare(const FFState& other) const
PhraseBoundaryFeature::PhraseBoundaryFeature
(const FactorList& sourceFactors, const FactorList& targetFactors) :
StatefulFeatureFunction("pb", ScoreProducer::unlimited), m_sourceFactors(sourceFactors),
- m_targetFactors(targetFactors)
+ m_targetFactors(targetFactors), m_sparseProducerWeight(1)
{
}
diff --git a/moses/src/PhraseBoundaryFeature.h b/moses/src/PhraseBoundaryFeature.h
index 11dea3d14..13e08c376 100644
--- a/moses/src/PhraseBoundaryFeature.h
+++ b/moses/src/PhraseBoundaryFeature.h
@@ -49,16 +49,19 @@ public:
/* Not implemented */
assert(0);
}
-
+
+ void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
+ float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
+
private:
void AddFeatures(
const Word* leftWord, const Word* rightWord, const FactorList& factors,
const std::string& side, ScoreComponentCollection* scores) const ;
FactorList m_sourceFactors;
FactorList m_targetFactors;
+ float m_sparseProducerWeight;
};
-
}
diff --git a/moses/src/PhrasePairFeature.cpp b/moses/src/PhrasePairFeature.cpp
index baae22b68..34121a26c 100644
--- a/moses/src/PhrasePairFeature.cpp
+++ b/moses/src/PhrasePairFeature.cpp
@@ -12,7 +12,8 @@ PhrasePairFeature::PhrasePairFeature
(FactorType sourceFactorId, FactorType targetFactorId) :
StatelessFeatureFunction("pp", ScoreProducer::unlimited),
m_sourceFactorId(sourceFactorId),
- m_targetFactorId(targetFactorId) {
+ m_targetFactorId(targetFactorId),
+ m_sparseProducerWeight(1) {
std::cerr << "Creating phrase pair feature.. " << endl;
}
diff --git a/moses/src/PhrasePairFeature.h b/moses/src/PhrasePairFeature.h
index e0f2ebccf..c61037724 100644
--- a/moses/src/PhrasePairFeature.h
+++ b/moses/src/PhrasePairFeature.h
@@ -25,12 +25,13 @@ class PhrasePairFeature: public StatelessFeatureFunction {
std::string GetScoreProducerWeightShortName(unsigned) const;
size_t GetNumInputScores() const;
+ void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
+ float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
private:
FactorType m_sourceFactorId;
FactorType m_targetFactorId;
-
-
+ float m_sparseProducerWeight;
};
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 612999f0f..5a2349979 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -710,25 +710,50 @@ bool StaticData::LoadData(Parameter *parameter)
// DLM: apply additional weight to sparse features if applicable
for (size_t i = 0; i < m_targetNgramFeatures.size(); ++i) {
- float dlmWeight = m_targetNgramFeatures[i]->GetSparseProducerWeight();
- cerr << "Set sparse producer weight: " << dlmWeight << endl;
- cerr << "Size of loaded model: " <<
- extraWeights.GetNumberWeights(m_targetNgramFeatures[i]) << endl;
- if (dlmWeight != 1)
- extraWeights.MultiplyEquals(m_targetNgramFeatures[i], dlmWeight);
+ float weight = m_targetNgramFeatures[i]->GetSparseProducerWeight();
+ if (weight != 1) {
+ extraWeights.MultiplyEquals(m_targetNgramFeatures[i], weight);
+ cerr << "Set dlm sparse producer weight: " << weight << endl;
+ }
}
// GLM: apply additional weight to sparse features if applicable
for (size_t i = 0; i < m_globalLexicalModelsUnlimited.size(); ++i) {
- float glmWeight = m_globalLexicalModelsUnlimited[i]->GetSparseProducerWeight();
- cerr << "Set sparse producer weight: " << glmWeight << endl;
- cerr << "Size of loaded model: " <<
- extraWeights.GetNumberWeights(m_globalLexicalModelsUnlimited[i]) << endl;
- if (glmWeight != 1)
- extraWeights.MultiplyEquals(m_globalLexicalModelsUnlimited[i], glmWeight);
+ float weight = m_globalLexicalModelsUnlimited[i]->GetSparseProducerWeight();
+ if (weight != 1) {
+ extraWeights.MultiplyEquals(m_globalLexicalModelsUnlimited[i], weight);
+ cerr << "Set glm sparse producer weight: " << weight << endl;
+ }
}
- m_allWeights.PlusEquals(extraWeights);
+ // WT: apply additional weight to sparse features if applicable
+ if (m_wordTranslationFeature) {
+ float weight = m_wordTranslationFeature->GetSparseProducerWeight();
+ if (weight != 1) {
+ extraWeights.MultiplyEquals(m_wordTranslationFeature, weight);
+ cerr << "Set wt sparse producer weight: " << weight << endl;
+ }
+ }
+
+ // PP: apply additional weight to sparse features if applicable
+ if (m_phrasePairFeature) {
+ float weight = m_phrasePairFeature->GetSparseProducerWeight();
+ if (weight != 1) {
+ extraWeights.MultiplyEquals(m_phrasePairFeature, weight);
+ cerr << "Set pp sparse producer weight: " << weight << endl;
+ }
+ }
+
+ // PB: apply additional weight to sparse features if applicable
+ if (m_phraseBoundaryFeature) {
+ float weight = m_phraseBoundaryFeature->GetSparseProducerWeight();
+ if (weight != 1) {
+ extraWeights.MultiplyEquals(m_phraseBoundaryFeature, weight);
+ cerr << "Set pb sparse producer weight: " << weight << endl;
+ }
+ }
+
+ m_allWeights.PlusEquals(extraWeights);
}
return true;
@@ -969,13 +994,13 @@ bool StaticData::LoadGlobalLexicalModelUnlimited()
const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-glm"));
const vector<string> &modelSpec = m_parameter->GetParam("glm-model");
- if (weight.size() != modelSpec.size()) {
+ if (weight.size() != 0 && weight.size() != modelSpec.size()) {
std::cerr << "number of sparse producer weights and model specs for the global lexical model unlimited "
"does not match (" << weight.size() << " != " << modelSpec.size() << ")" << std::endl;
return false;
}
- for (size_t i = 0; i < weight.size(); i++ ) {
+ for (size_t i = 0; i < modelSpec.size(); i++ ) {
bool ignorePunctuation = false;
bool biasFeature = false;
bool restricted = false;
@@ -1600,6 +1625,12 @@ bool StaticData::LoadDiscrimLMFeature()
bool StaticData::LoadPhraseBoundaryFeature()
{
+ const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-pb"));
+ if (weight.size() > 1) {
+ std::cerr << "only one sparse producer weight allowed for the phrase boundary feature" << std::endl;
+ return false;
+ }
+
const vector<string> &phraseBoundarySourceFactors =
m_parameter->GetParam("phrase-boundary-source-feature");
const vector<string> &phraseBoundaryTargetFactors =
@@ -1626,11 +1657,19 @@ bool StaticData::LoadPhraseBoundaryFeature()
//cerr << "source "; for (size_t i = 0; i < sourceFactors.size(); ++i) cerr << sourceFactors[i] << " "; cerr << endl;
//cerr << "target "; for (size_t i = 0; i < targetFactors.size(); ++i) cerr << targetFactors[i] << " "; cerr << endl;
m_phraseBoundaryFeature = new PhraseBoundaryFeature(sourceFactors,targetFactors);
+ if (weight.size() > 0)
+ m_phraseBoundaryFeature->SetSparseProducerWeight(weight[0]);
return true;
}
bool StaticData::LoadPhrasePairFeature()
{
+ const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-pp"));
+ if (weight.size() > 1) {
+ std::cerr << "only one sparse producer weight allowed for the phrase pair feature" << std::endl;
+ return false;
+ }
+
const vector<string> &phrasePairFactors =
m_parameter->GetParam("phrase-pair-feature");
if (phrasePairFactors.size() == 0) return true;
@@ -1646,6 +1685,8 @@ bool StaticData::LoadPhrasePairFeature()
size_t sourceFactorId = Scan<FactorType>(tokens[0]);
size_t targetFactorId = Scan<FactorType>(tokens[1]);
m_phrasePairFeature = new PhrasePairFeature(sourceFactorId, targetFactorId);
+ if (weight.size() > 0)
+ m_phrasePairFeature->SetSparseProducerWeight(weight[0]);
return true;
}
@@ -1737,6 +1778,12 @@ bool StaticData::LoadSourceWordDeletionFeature()
bool StaticData::LoadWordTranslationFeature()
{
+ const vector<float> &weight = Scan<float>(m_parameter->GetParam("weight-wt"));
+ if (weight.size() > 1) {
+ std::cerr << "only one sparse producer weight allowed for the word translation feature" << std::endl;
+ return false;
+ }
+
const vector<string> &parameters = m_parameter->GetParam("word-translation-feature");
if (parameters.empty())
return true;
@@ -1773,6 +1820,8 @@ bool StaticData::LoadWordTranslationFeature()
m_wordTranslationFeature = new WordTranslationFeature(factorIdSource, factorIdTarget, simple,
sourceTrigger, targetTrigger);
+ if (weight.size() > 0)
+ m_wordTranslationFeature->SetSparseProducerWeight(weight[0]);
// load word list for restricted feature set
if (tokens.size() == 6) {
diff --git a/moses/src/WordTranslationFeature.h b/moses/src/WordTranslationFeature.h
index 44e2a84b1..adb455ea7 100644
--- a/moses/src/WordTranslationFeature.h
+++ b/moses/src/WordTranslationFeature.h
@@ -42,12 +42,13 @@ private:
bool m_simple;
bool m_sourceContext;
bool m_targetContext;
-
+ float m_sparseProducerWeight;
+
public:
WordTranslationFeature(FactorType factorTypeSource, FactorType factorTypeTarget,
bool simple, bool sourceContext, bool targetContext):
// StatelessFeatureFunction("wt", ScoreProducer::unlimited),
- StatefulFeatureFunction("wt", ScoreProducer::unlimited),
+ StatefulFeatureFunction("wt", ScoreProducer::unlimited), m_sparseProducerWeight(1),
m_factorTypeSource(factorTypeSource),
m_factorTypeTarget(factorTypeTarget),
m_simple(simple),
@@ -85,6 +86,9 @@ public:
// basic properties
std::string GetScoreProducerWeightShortName(unsigned) const { return "wt"; }
size_t GetNumInputScores() const { return 0; }
+
+ void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
+ float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
};
}