Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchardmeier <chardmeier@1f5c12ca-751b-0410-a591-d2e778427230>2009-03-16 16:11:34 +0300
committerchardmeier <chardmeier@1f5c12ca-751b-0410-a591-d2e778427230>2009-03-16 16:11:34 +0300
commit12a5e0a2d05e0c4bfbace5c48dd80ca161c7c960 (patch)
tree45da737f37103c55aff6b5412e2bc9cdca2e0d3c
parent59f5a5e9bd0e1cbb0cd0ebc9239348ff1219721b (diff)
3-way Beta-Geometric and Dirichlet-Multinomial LDC models.chardmeier-ldc
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/chardmeier-ldc@2247 1f5c12ca-751b-0410-a591-d2e778427230
-rw-r--r--moses/src/LexicalDistortionCost.cpp110
-rw-r--r--moses/src/LexicalDistortionCost.h69
2 files changed, 170 insertions, 9 deletions
diff --git a/moses/src/LexicalDistortionCost.cpp b/moses/src/LexicalDistortionCost.cpp
index 3dd877711..bbfe5c076 100644
--- a/moses/src/LexicalDistortionCost.cpp
+++ b/moses/src/LexicalDistortionCost.cpp
@@ -20,6 +20,10 @@ LexicalDistortionCost *LexicalDistortionCost::CreateModel(const std::string &mod
return new LDCBetaBinomial(filePath, direction, PhrasePair, f_factors, e_factors);
else if(modelType == "beta-geometric-phrase")
return new LDCBetaGeometric(filePath, direction, PhrasePair, f_factors, e_factors);
+ else if(modelType == "3way-beta-geometric-phrase")
+ return new LDC3BetaGeometric(filePath, direction, PhrasePair, f_factors, e_factors);
+ else if(modelType == "dirichlet-multinomial")
+ return new LDCDirichletMultinomial(filePath, direction, PhrasePair, f_factors, e_factors);
else {
UserMessage::Add("Lexical distortion model type not implemented: " + modelType);
return NULL;
@@ -204,3 +208,109 @@ std::vector<float> LDCBetaGeometric::GetDefaultPrior() const
{
return std::vector<float>(12, 2);
}
+
+/******** LDC3BetaGeometric ************/
+
+float LDC3BetaGeometric::CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr,
+ const std::vector<float> *params) const
+{
+ const std::vector<float> &prior = GetPrior();
+
+ float alpha_dir_neg = params->at(0) + prior[0];
+ float alpha_dir_none = params->at(1) + prior[1];
+ float alpha_dir_pos = params->at(2) + prior[2];
+
+ float log_alpha_total = log(alpha_dir_neg + alpha_dir_none + alpha_dir_pos);
+
+ float p_dir_neg = log(alpha_dir_neg) - log_alpha_total;
+ float p_dir_none = log(alpha_dir_none) - log_alpha_total;
+ float p_dir_pos = log(alpha_dir_pos) - log_alpha_total;
+
+ float p_pos = params->at(3) + prior[3];
+ float q_pos = params->at(4) + prior[4];
+ float p_neg = params->at(5) + prior[5];
+ float q_neg = params->at(6) + prior[6];
+
+ int distance = StaticData::Instance().GetInput()->ComputeDistortionDistance(prev, curr);
+
+ float score = .0f;
+
+ if(distance == 0) {
+ score = p_dir_none;
+ } else if(distance > 0) {
+ score += p_dir_pos;
+
+ int x = distance - 1;
+ score += beta_geometric(p_pos, q_pos, x);
+ } else if(distance < 0) {
+ score += p_dir_neg;
+
+ int x = -distance - 1;
+ score += beta_geometric(p_neg, q_neg, x);
+ }
+
+ assert(finite(score));
+ return FloorScore(score);
+}
+
+std::vector<float> LDC3BetaGeometric::GetDefaultPrior() const
+{
+ std::vector<float> prior(7);
+
+ prior.push_back(.015);
+ prior.push_back(.06);
+ prior.push_back(.023);
+
+ prior.push_back(2);
+ prior.push_back(2);
+ prior.push_back(2);
+ prior.push_back(2);
+
+ return prior;
+}
+
+/******** LDCDirichletMultinomial ************/
+
+float LDCDirichletMultinomial::CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr,
+ const std::vector<float> *params) const
+{
+ const std::vector<float> &prior = GetPrior();
+
+ int nullidx = (NUM_PARAMETERS - 1) / 2;
+
+ int distance = StaticData::Instance().GetInput()->ComputeDistortionDistance(prev, curr);
+
+ int index = distance + nullidx;
+ if(index < 0)
+ index = 0;
+ if(index >= NUM_PARAMETERS)
+ index = NUM_PARAMETERS - 1;
+
+ float total = .0f;
+ for(int i = 0; i < NUM_PARAMETERS; i++)
+ total += params->at(i) + prior[i];
+
+ float score = (params->at(index) + prior[index]) / total;
+
+ assert(finite(score));
+ return FloorScore(score);
+}
+
+std::vector<float> LDCDirichletMultinomial::GetDefaultPrior() const
+{
+ std::vector<float> prior(NUM_PARAMETERS);
+
+ int nullidx = (NUM_PARAMETERS - 1) / 2;
+
+ float p = 1.0f;
+ prior[nullidx] = p;
+
+ for(int i = 1; i <= nullidx; i++) {
+ p *= .5;
+ prior[nullidx + i] = prior[nullidx - i] = p;
+ }
+
+ return prior;
+}
+
+
diff --git a/moses/src/LexicalDistortionCost.h b/moses/src/LexicalDistortionCost.h
index cceacae46..d2463a93b 100644
--- a/moses/src/LexicalDistortionCost.h
+++ b/moses/src/LexicalDistortionCost.h
@@ -131,20 +131,16 @@ class LDCBetaGeometric : public LexicalDistortionCost {
Direction direction,
Condition condition,
std::vector< FactorType >& f_factors,
- std::vector< FactorType >& e_factors)
- : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, NUM_PARAMETERS) {
- VERBOSE(1, "Created beta-geometric lexical distortion cost model\n");
+ std::vector< FactorType >& e_factors,
+ size_t numParameters = NUM_PARAMETERS)
+ : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, numParameters) {
+ VERBOSE(1, "Created beta-geometric lexical distortion cost model or subclass\n");
}
public:
virtual std::string GetScoreProducerDescription() const {
return "Beta-Geometric lexical distortion cost model, file=" + m_modelFileName;
};
-/*
- virtual size_t GetNumParameters() const {
- return 6;
- }
-*/
protected:
virtual float CalculateDistortionScore( const WordsRange &prev,
@@ -152,6 +148,61 @@ class LDCBetaGeometric : public LexicalDistortionCost {
const std::vector<float> *parameters) const;
virtual std::vector<float> GetDefaultPrior() const;
- private:
+
float beta_geometric(float p, float q, int x) const;
};
+
+class LDC3BetaGeometric : public LDCBetaGeometric {
+ friend class LexicalDistortionCost;
+
+ protected:
+ static const size_t NUM_PARAMETERS = 7;
+
+ LDC3BetaGeometric(const std::string &filePath,
+ Direction direction,
+ Condition condition,
+ std::vector< FactorType >& f_factors,
+ std::vector< FactorType >& e_factors)
+ : LDCBetaGeometric(filePath, direction, condition, f_factors, e_factors, NUM_PARAMETERS) {
+ VERBOSE(1, "Created 3-way beta-geometric lexical distortion cost model\n");
+ }
+
+ public:
+ virtual std::string GetScoreProducerDescription() const {
+ return "3-way Beta-Geometric lexical distortion cost model, file=" + m_modelFileName;
+ };
+
+ protected:
+ virtual float CalculateDistortionScore( const WordsRange &prev,
+ const WordsRange &curr,
+ const std::vector<float> *parameters) const;
+
+ virtual std::vector<float> GetDefaultPrior() const;
+};
+
+class LDCDirichletMultinomial : public LexicalDistortionCost {
+ friend class LexicalDistortionCost;
+
+ protected:
+ static const size_t NUM_PARAMETERS = 23;
+
+ LDCDirichletMultinomial(const std::string &filePath,
+ Direction direction,
+ Condition condition,
+ std::vector< FactorType >& f_factors,
+ std::vector< FactorType >& e_factors)
+ : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, NUM_PARAMETERS) {
+ VERBOSE(1, "Created Dirichlet-Multinomial lexical distortion cost model\n");
+ }
+
+ public:
+ virtual std::string GetScoreProducerDescription() const {
+ return "Dirichlet-Multinomial lexical distortion cost model, file=" + m_modelFileName;
+ };
+ protected:
+ virtual float CalculateDistortionScore( const WordsRange &prev,
+ const WordsRange &curr,
+ const std::vector<float> *parameters) const;
+
+ virtual std::vector<float> GetDefaultPrior() const;
+};