diff options
author | chardmeier <chardmeier@1f5c12ca-751b-0410-a591-d2e778427230> | 2009-01-21 01:16:31 +0300 |
---|---|---|
committer | chardmeier <chardmeier@1f5c12ca-751b-0410-a591-d2e778427230> | 2009-01-21 01:16:31 +0300 |
commit | 59f5a5e9bd0e1cbb0cd0ebc9239348ff1219721b (patch) | |
tree | 30c5cf11c4d5c1cf39874804df7753c077d1e89c /moses/src | |
parent | 99b263439a1d41fe042f94b66dd9e741235c82e5 (diff) |
LDC: Made priors configurable from ini file.
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/chardmeier-ldc@1988 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses/src')
-rw-r--r-- | moses/src/LexicalDistortionCost.cpp | 36 | ||||
-rw-r--r-- | moses/src/LexicalDistortionCost.h | 41 | ||||
-rw-r--r-- | moses/src/Parameter.cpp | 1 | ||||
-rw-r--r-- | moses/src/StaticData.cpp | 22 |
4 files changed, 76 insertions, 24 deletions
diff --git a/moses/src/LexicalDistortionCost.cpp b/moses/src/LexicalDistortionCost.cpp index cd7749702..3dd877711 100644 --- a/moses/src/LexicalDistortionCost.cpp +++ b/moses/src/LexicalDistortionCost.cpp @@ -8,6 +8,7 @@ #include "WordsRange.h" #include "TranslationOption.h" +/******** LexicalDistortionCost: static member functions ************/ LexicalDistortionCost *LexicalDistortionCost::CreateModel(const std::string &modelType, const std::string &filePath, @@ -25,6 +26,8 @@ LexicalDistortionCost *LexicalDistortionCost::CreateModel(const std::string &mod } } +/******** LexicalDistortionCost ************/ + LexicalDistortionCost::LexicalDistortionCost(const std::string &filePath, Direction direction, Condition condition, @@ -47,7 +50,10 @@ LexicalDistortionCost::~LexicalDistortionCost() const std::vector<float> LexicalDistortionCost::GetDistortionParameters(const Phrase &src, const Phrase &tgt) const { std::vector<float> params = m_distortionTable->GetScore(src, tgt, Phrase(Output)); - if(params.size() == 0) return GetDefaultDistortion(); + + // if we can't find find the phrase, return zeroes - the prior will be used + if(params.size() == 0) + return std::vector<float>(GetNumParameterSets() * GetNumParametersPerDirection(), .0f); assert(params.size() == GetNumParameterSets() * GetNumParametersPerDirection()); @@ -88,12 +94,14 @@ std::vector<float> LexicalDistortionCost::GetProb(const Phrase &src, const Phras return GetDistortionParameters(src, tgt); } +/******** LDCBetaBinomial ************/ + float LDCBetaBinomial::CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr, const std::vector<float> *params) const { - // float p = px + 2.23; - float p = params->at(0) + 2; - float q = params->at(1) + 2; + const std::vector<float> &prior = GetPrior(); + float p = params->at(0) + prior[0]; + float q = params->at(1) + prior[1]; int x = StaticData::Instance().GetInput()->ComputeDistortionDistance(prev, curr); if(x < -m_distortionRange) x = -m_distortionRange; @@ -141,20 +149,24 @@ float LDCBetaBinomial::beta_binomial(float p, float q, int x) const return FloorScore((float) score); } -std::vector<float> LDCBetaBinomial::GetDefaultDistortion() const +std::vector<float> LDCBetaBinomial::GetDefaultPrior() const { return std::vector<float>(4, 12); } +/******** LDCBetaGeometric ************/ + float LDCBetaGeometric::CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr, const std::vector<float> *params) const { - float p_dir = params->at(0); - float q_dir = params->at(1); - float p_pos = params->at(2); - float q_pos = params->at(3); - float p_neg = params->at(4); - float q_neg = params->at(5); + const std::vector<float> &prior = GetPrior(); + + float p_dir = params->at(0) + prior[0]; + float q_dir = params->at(1) + prior[1]; + float p_pos = params->at(2) + prior[2]; + float q_pos = params->at(3) + prior[3]; + float p_neg = params->at(4) + prior[4]; + float q_neg = params->at(5) + prior[5]; int distance = StaticData::Instance().GetInput()->ComputeDistortionDistance(prev, curr); @@ -188,7 +200,7 @@ float LDCBetaGeometric::beta_geometric(float p, float q, int x) const return result; } -std::vector<float> LDCBetaGeometric::GetDefaultDistortion() const +std::vector<float> LDCBetaGeometric::GetDefaultPrior() const { return std::vector<float>(12, 2); } diff --git a/moses/src/LexicalDistortionCost.h b/moses/src/LexicalDistortionCost.h index 48dc4bec2..cceacae46 100644 --- a/moses/src/LexicalDistortionCost.h +++ b/moses/src/LexicalDistortionCost.h @@ -40,6 +40,11 @@ class LexicalDistortionCost : public ScoreProducer { virtual std::string GetScoreProducerDescription() const = 0; + size_t GetNumPriorParameters() const { + // only one set of parameters, no matter how many directions! + return GetNumParametersPerDirection(); + } + // number of parameters per direction size_t GetNumParametersPerDirection() const { return m_numParametersPerDirection; @@ -53,15 +58,28 @@ class LexicalDistortionCost : public ScoreProducer { std::vector<float> GetProb(const Phrase &src, const Phrase &tgt) const; + std::vector<float> GetPrior() const { + if(m_prior.size() > 0) + return m_prior; + else + return GetDefaultPrior(); + } + + void SetPrior(std::vector<float> prior) { + assert(prior.size() == GetNumParametersPerDirection()); + m_prior = prior; + } + protected: virtual float CalculateDistortionScore( const WordsRange &prev, const WordsRange &curr, const std::vector<float> *parameters) const = 0; - virtual std::vector<float> GetDefaultDistortion() const = 0; + virtual std::vector<float> GetDefaultPrior() const = 0; std::string m_modelFileName; + std::vector<float> m_prior; private: const std::vector<float> GetDistortionParameters(const Phrase &src, const Phrase &tgt) const; @@ -69,7 +87,6 @@ class LexicalDistortionCost : public ScoreProducer { Direction m_direction; Condition m_condition; std::vector<FactorType> m_srcfactors, m_tgtfactors; - std::vector<float> m_defaultDistortion; size_t m_numParametersPerDirection; }; @@ -77,12 +94,14 @@ class LDCBetaBinomial : public LexicalDistortionCost { friend class LexicalDistortionCost; protected: + static const size_t NUM_PARAMETERS = 2; + LDCBetaBinomial(const std::string &filePath, Direction direction, Condition condition, std::vector< FactorType >& f_factors, std::vector< FactorType >& e_factors) - : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, 2), + : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, NUM_PARAMETERS), m_distortionRange(6) { VERBOSE(1, "Created beta-binomial lexical distortion cost model\n"); } @@ -91,17 +110,12 @@ class LDCBetaBinomial : public LexicalDistortionCost { virtual std::string GetScoreProducerDescription() const { return "Beta-Binomial lexical distortion cost model, file=" + m_modelFileName; }; - virtual size_t GetNumParameters() const { - return 2; - } - protected: virtual float CalculateDistortionScore( const WordsRange &prev, const WordsRange &curr, const std::vector<float> *parameters) const; - virtual std::vector<float> GetDefaultDistortion() const; - + virtual std::vector<float> GetDefaultPrior() const; private: float beta_binomial(float p, float q, int x) const; int m_distortionRange; @@ -111,12 +125,14 @@ class LDCBetaGeometric : public LexicalDistortionCost { friend class LexicalDistortionCost; protected: + static const size_t NUM_PARAMETERS = 6; + LDCBetaGeometric(const std::string &filePath, Direction direction, Condition condition, std::vector< FactorType >& f_factors, std::vector< FactorType >& e_factors) - : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, 6) { + : LexicalDistortionCost(filePath, direction, condition, f_factors, e_factors, NUM_PARAMETERS) { VERBOSE(1, "Created beta-geometric lexical distortion cost model\n"); } @@ -124,17 +140,18 @@ class LDCBetaGeometric : public LexicalDistortionCost { virtual std::string GetScoreProducerDescription() const { return "Beta-Geometric lexical distortion cost model, file=" + m_modelFileName; }; +/* virtual size_t GetNumParameters() const { return 6; } +*/ protected: virtual float CalculateDistortionScore( const WordsRange &prev, const WordsRange &curr, const std::vector<float> *parameters) const; - virtual std::vector<float> GetDefaultDistortion() const; - + virtual std::vector<float> GetDefaultPrior() const; private: float beta_geometric(float p, float q, int x) const; }; diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp index e6827014c..049f71dc8 100644 --- a/moses/src/Parameter.cpp +++ b/moses/src/Parameter.cpp @@ -77,6 +77,7 @@ Parameter::Parameter() AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables"); AddParam("distortion", "configurations for each factorized/lexicalized reordering model."); AddParam("lexical-distortion-cost", "source factors (0 if table independent of source), target factors, type, located of the lexicalised distortion cost models"); + AddParam("ldc-priors", "prior parameters for lexical distortion cost models"); AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'"); AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation"); AddParam("mbr-size", "number of translation candidates considered in MBR decoding (default 200)"); diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp index d12b0b2b8..b5863cd16 100644 --- a/moses/src/StaticData.cpp +++ b/moses/src/StaticData.cpp @@ -540,6 +540,15 @@ bool StaticData::LoadLexicalDistortion() { } size_t weightIdx = 0; + std::vector<float> priors; + if(m_parameter->isParamSpecified("ldc-priors")) { + std::vector<std::string> priorStr = m_parameter->GetParam("ldc-priors"); + for(size_t j = 0; j < priorStr.size(); ++j) { + priors.push_back(Scan<float>(priorStr[j])); + } + } + size_t priorIdx = 0; + for(size_t i = 0; i < fileStr.size(); ++i) { vector<FactorType> input,output; vector<string> spec = Tokenize<string>(fileStr[i], " "); @@ -606,6 +615,19 @@ bool StaticData::LoadLexicalDistortion() { cur_weights.push_back(weights[weightIdx]); } + if(priors.size() > 0) { + std::vector<float> cur_prior; + for(size_t i = 0; i < newmodel->GetNumPriorParameters(); i++, priorIdx++) { + if(priorIdx >= priors.size()) { + UserMessage::Add("Insufficient number of prior parameters for lexical distortion models."); + delete newmodel; + return false; + } + cur_prior.push_back(priors[priorIdx]); + } + newmodel->SetPrior(cur_prior); + } + m_ldcModels.push_back(newmodel); m_scoreIndexManager.AddScoreProducer(newmodel); SetWeightsForScoreProducer(newmodel, cur_weights); |