Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorchardmeier <chardmeier@1f5c12ca-751b-0410-a591-d2e778427230>2009-01-21 01:16:31 +0300
committerchardmeier <chardmeier@1f5c12ca-751b-0410-a591-d2e778427230>2009-01-21 01:16:31 +0300
commit59f5a5e9bd0e1cbb0cd0ebc9239348ff1219721b (patch)
tree30c5cf11c4d5c1cf39874804df7753c077d1e89c /moses
parent99b263439a1d41fe042f94b66dd9e741235c82e5 (diff)
LDC: Made priors configurable from ini file.
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/chardmeier-ldc@1988 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r--moses/src/LexicalDistortionCost.cpp36
-rw-r--r--moses/src/LexicalDistortionCost.h41
-rw-r--r--moses/src/Parameter.cpp1
-rw-r--r--moses/src/StaticData.cpp22
4 files changed, 76 insertions, 24 deletions
diff --git a/moses/src/LexicalDistortionCost.cpp b/moses/src/LexicalDistortionCost.cpp
index cd7749702..3dd877711 100644
--- a/moses/src/LexicalDistortionCost.cpp
+++ b/moses/src/LexicalDistortionCost.cpp
@@ -8,6 +8,7 @@
#include "WordsRange.h"
#include "TranslationOption.h"
+/******** LexicalDistortionCost: static member functions ************/
LexicalDistortionCost *LexicalDistortionCost::CreateModel(const std::string &modelType,
const std::string &filePath,
@@ -25,6 +26,8 @@ LexicalDistortionCost *LexicalDistortionCost::CreateModel(const std::string &mod
}
}
+/******** LexicalDistortionCost ************/
+
LexicalDistortionCost::LexicalDistortionCost(const std::string &filePath,
Direction direction,
Condition condition,
@@ -47,7 +50,10 @@ LexicalDistortionCost::~LexicalDistortionCost()
const std::vector<float> LexicalDistortionCost::GetDistortionParameters(const Phrase &src, const Phrase &tgt) const
{
std::vector<float> params = m_distortionTable->GetScore(src, tgt, Phrase(Output));
- if(params.size() == 0) return GetDefaultDistortion();
+
+ // if we can't find find the phrase, return zeroes - the prior will be used
+ if(params.size() == 0)
+ return std::vector<float>(GetNumParameterSets() * GetNumParametersPerDirection(), .0f);
assert(params.size() == GetNumParameterSets() * GetNumParametersPerDirection());
@@ -88,12 +94,14 @@ std::vector<float> LexicalDistortionCost::GetProb(const Phrase &src, const Phras
return GetDistortionParameters(src, tgt);
}
+/******** LDCBetaBinomial ************/
+
float LDCBetaBinomial::CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr,
const std::vector<float> *params) const
{
- // float p = px + 2.23;
- float p = params->at(0) + 2;
- float q = params->at(1) + 2;
+ const std::vector<float> &prior = GetPrior();
+ float p = params->at(0) + prior[0];
+ float q = params->at(1) + prior[1];
int x = StaticData::Instance().GetInput()->ComputeDistortionDistance(prev, curr);
if(x < -m_distortionRange) x = -m_distortionRange;
@@ -141,20 +149,24 @@ float LDCBetaBinomial::beta_binomial(float p, float q, int x) const
return FloorScore((float) score);
}
-std::vector<float> LDCBetaBinomial::GetDefaultDistortion() const
+std::vector<float> LDCBetaBinomial::GetDefaultPrior() const
{
return std::vector<float>(4, 12);
}
+/******** LDCBetaGeometric ************/
+
float LDCBetaGeometric::CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr,
const std::vector<float> *params) const
{
- float p_dir = params->at(0);
- float q_dir = params->at(1);
- float p_pos = params->at(2);
- float q_pos = params->at(3);
- float p_neg = params->at(4);
- float q_neg = params->at(5);
+ const std::vector<float> &prior = GetPrior();
+
+ float p_dir = params->at(0) + prior[0];
+ float q_dir = params->at(1) + prior[1];
+ float p_pos = params->at(2) + prior[2];
+ float q_pos = params->at(3) + prior[3];
+ float p_neg = params->at(4) + prior[4];
+ float q_neg = params->at(5) + prior[5];
int distance = StaticData::Instance().GetInput()->ComputeDistortionDistance(prev, curr);
@@ -188,7 +200,7 @@ float LDCBetaGeometric::beta_geometric(float p, float q, int x) const
return result;
}
-std::vector<float> LDCBetaGeometric::GetDefaultDistortion() const
+std::vector<float> LDCBetaGeometric::GetDefaultPrior() const
{
return std::vector<float>(12, 2);
}
diff --git a/moses/src/LexicalDistortionCost.h b/moses/src/LexicalDistortionCost.h
index 48dc4bec2..cceacae46 100644
--- a/moses/src/LexicalDistortionCost.h
+++ b/moses/src/LexicalDistortionCost.h
@@ -40,6 +40,11 @@ class LexicalDistortionCost : public ScoreProducer {
virtual std::string GetScoreProducerDescription() const = 0;
+ size_t GetNumPriorParameters() const {
+ // only one set of parameters, no matter how many directions!
+ return GetNumParametersPerDirection();
+ }
+
// number of parameters per direction
size_t GetNumParametersPerDirection() const {
return m_numParametersPerDirection;
@@ -53,15 +58,28 @@ class LexicalDistortionCost : public ScoreProducer {
std::vector<float> GetProb(const Phrase &src, const Phrase &tgt) const;
+ std::vector<float> GetPrior() const {
+ if(m_prior.size() > 0)
+ return m_prior;
+ else
+ return GetDefaultPrior();
+ }
+
+ void SetPrior(std::vector<float> prior) {
+ assert(prior.size() == GetNumParametersPerDirection());
+ m_prior = prior;
+ }
+
protected:
virtual float CalculateDistortionScore( const WordsRange &prev,
const WordsRange &curr,
const std::vector<float> *parameters) const = 0;
- virtual std::vector<float> GetDefaultDistortion() const = 0;
+ virtual std::vector<float> GetDefaultPrior() const = 0;
std::string m_modelFileName;
+ std::vector<float> m_prior;
private:
const std::vector<float> GetDistortionParameters(const Phrase &src, const Phrase &tgt) const;
@@ -69,7 +87,6 @@ class LexicalDistortionCost : public ScoreProducer {
Direction m_direction;
Condition m_condition;
std::vector<FactorType> m_srcfactors, m_tgtfactors;
- std::vector<float> m_defaultDistortion;
size_t m_numParametersPerDirection;
};
@@ -77,12 +94,14 @@ class LDCBetaBinomial : public LexicalDistortionCost {
friend class LexicalDistortionCost;
protected:
+ static const size_t NUM_PARAMETERS = 2;
+
LDCBetaBinomial(const std::string &filePath,
Direction direction,
Condition condition,
std::vector< FactorType >& f_factors,
std::vector< FactorType >& e_factors)
- : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, 2),
+ : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, NUM_PARAMETERS),
m_distortionRange(6) {
VERBOSE(1, "Created beta-binomial lexical distortion cost model\n");
}
@@ -91,17 +110,12 @@ class LDCBetaBinomial : public LexicalDistortionCost {
virtual std::string GetScoreProducerDescription() const {
return "Beta-Binomial lexical distortion cost model, file=" + m_modelFileName;
};
- virtual size_t GetNumParameters() const {
- return 2;
- }
-
protected:
virtual float CalculateDistortionScore( const WordsRange &prev,
const WordsRange &curr,
const std::vector<float> *parameters) const;
- virtual std::vector<float> GetDefaultDistortion() const;
-
+ virtual std::vector<float> GetDefaultPrior() const;
private:
float beta_binomial(float p, float q, int x) const;
int m_distortionRange;
@@ -111,12 +125,14 @@ class LDCBetaGeometric : public LexicalDistortionCost {
friend class LexicalDistortionCost;
protected:
+ static const size_t NUM_PARAMETERS = 6;
+
LDCBetaGeometric(const std::string &filePath,
Direction direction,
Condition condition,
std::vector< FactorType >& f_factors,
std::vector< FactorType >& e_factors)
- : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, 6) {
+ : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, NUM_PARAMETERS) {
VERBOSE(1, "Created beta-geometric lexical distortion cost model\n");
}
@@ -124,17 +140,18 @@ class LDCBetaGeometric : public LexicalDistortionCost {
virtual std::string GetScoreProducerDescription() const {
return "Beta-Geometric lexical distortion cost model, file=" + m_modelFileName;
};
+/*
virtual size_t GetNumParameters() const {
return 6;
}
+*/
protected:
virtual float CalculateDistortionScore( const WordsRange &prev,
const WordsRange &curr,
const std::vector<float> *parameters) const;
- virtual std::vector<float> GetDefaultDistortion() const;
-
+ virtual std::vector<float> GetDefaultPrior() const;
private:
float beta_geometric(float p, float q, int x) const;
};
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
index e6827014c..049f71dc8 100644
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@@ -77,6 +77,7 @@ Parameter::Parameter()
AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
AddParam("lexical-distortion-cost", "source factors (0 if table independent of source), target factors, type, located of the lexicalised distortion cost models");
+ AddParam("ldc-priors", "prior parameters for lexical distortion cost models");
AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'");
AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
AddParam("mbr-size", "number of translation candidates considered in MBR decoding (default 200)");
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index d12b0b2b8..b5863cd16 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -540,6 +540,15 @@ bool StaticData::LoadLexicalDistortion() {
}
size_t weightIdx = 0;
+ std::vector<float> priors;
+ if(m_parameter->isParamSpecified("ldc-priors")) {
+ std::vector<std::string> priorStr = m_parameter->GetParam("ldc-priors");
+ for(size_t j = 0; j < priorStr.size(); ++j) {
+ priors.push_back(Scan<float>(priorStr[j]));
+ }
+ }
+ size_t priorIdx = 0;
+
for(size_t i = 0; i < fileStr.size(); ++i) {
vector<FactorType> input,output;
vector<string> spec = Tokenize<string>(fileStr[i], " ");
@@ -606,6 +615,19 @@ bool StaticData::LoadLexicalDistortion() {
cur_weights.push_back(weights[weightIdx]);
}
+ if(priors.size() > 0) {
+ std::vector<float> cur_prior;
+ for(size_t i = 0; i < newmodel->GetNumPriorParameters(); i++, priorIdx++) {
+ if(priorIdx >= priors.size()) {
+ UserMessage::Add("Insufficient number of prior parameters for lexical distortion models.");
+ delete newmodel;
+ return false;
+ }
+ cur_prior.push_back(priors[priorIdx]);
+ }
+ newmodel->SetPrior(cur_prior);
+ }
+
m_ldcModels.push_back(newmodel);
m_scoreIndexManager.AddScoreProducer(newmodel);
SetWeightsForScoreProducer(newmodel, cur_weights);