Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorevahasler <evahasler@1f5c12ca-751b-0410-a591-d2e778427230>2008-11-02 22:04:29 +0300
committerevahasler <evahasler@1f5c12ca-751b-0410-a591-d2e778427230>2008-11-02 22:04:29 +0300
commitd594392e3d215616126d4a76e3ffe632e7f4ffc7 (patch)
treed6f26e49fa5aa882ee79b8b4e49e7824cfac8aef
parent42eb8992fc11e0e595c6f566fc5bd6098a6331bb (diff)
- block recombination when two hypotheses have different source word range and at least one is following a gap in the source sentenceeva_maxent
- change maxent scoring: subtract optimistic score for left movements when actual score is available git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/eva_maxent@1928 1f5c12ca-751b-0410-a591-d2e778427230
-rw-r--r--moses/src/HypothesisStackNormal.cpp27
-rw-r--r--moses/src/MaxentReordering.cpp81
-rw-r--r--moses/src/ScoreComponentCollection.h2
-rw-r--r--moses/src/StaticData.cpp3
4 files changed, 82 insertions, 31 deletions
diff --git a/moses/src/HypothesisStackNormal.cpp b/moses/src/HypothesisStackNormal.cpp
index 4b69e07a1..e2cd0d5b9 100644
--- a/moses/src/HypothesisStackNormal.cpp
+++ b/moses/src/HypothesisStackNormal.cpp
@@ -99,6 +99,33 @@ bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
iterator &iterExisting = addRet.first;
Hypothesis *hypoExisting = *iterExisting;
assert(iterExisting != m_hypos.end());
+
+
+ const WordsRange currSourceWordsRange = hypo->GetCurrSourceWordsRange();
+ const WordsRange existSourceWordsRange = hypoExisting->GetCurrSourceWordsRange();
+
+ /**
+ * if the two hypotheses differ in range and one of them follows a gap, do not recombine! May be needed for maxent scoring
+ */
+
+ if( StaticData::Instance().UseMaxentReordering()){
+ if( existSourceWordsRange.GetStartPos() != 0 && !(hypoExisting->GetWordsBitmap().GetValue( existSourceWordsRange.GetStartPos()-1)) ){
+ if(currSourceWordsRange.GetNumWordsCovered() != existSourceWordsRange.GetNumWordsCovered()){
+ IFVERBOSE(2){
+ std::cerr << "Words ranges are different and existing hypo is following a gap --> do not recombine hypotheses..\n";
+ }
+ return true;
+ }
+ }
+ else if( currSourceWordsRange.GetStartPos() != 0 && !(hypo->GetWordsBitmap().GetValue( currSourceWordsRange.GetStartPos()-1)) ){
+ if(currSourceWordsRange.GetNumWordsCovered() != existSourceWordsRange.GetNumWordsCovered()){
+ IFVERBOSE(2){
+ std::cerr << "Words ranges are different and new hypo is following a gap --> do not recombine hypotheses..\n";
+ }
+ return true;
+ }
+ }
+ }
StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
diff --git a/moses/src/MaxentReordering.cpp b/moses/src/MaxentReordering.cpp
index e502a83bf..baa291fd1 100644
--- a/moses/src/MaxentReordering.cpp
+++ b/moses/src/MaxentReordering.cpp
@@ -79,12 +79,8 @@ std::vector<float> MaxentReordering::CalcScore(Hypothesis* hypothesis) const {
OrientationType orientation_curr = orientations[0];
OrientationType orientation_next = orientations[1];
- assert(orientation_curr != orientation_next);
- assert(orientation_curr <= 5 && (orientation_next == 2 || orientation_next == 3 || orientation_next == 5) );
- IFVERBOSE(2){
- std::cerr << "Maxent orientation type (curr): " << orientation_curr << "\n";
- std::cerr << "Maxent orientation type (next): " << orientation_next << "\n";
- }
+ assert(orientation_curr == 0 || orientation_curr == 1 || orientation_curr == 4);
+ assert(orientation_next == 2 || orientation_next == 3 || orientation_next == 5);
// grab data for current hypothesis
const ScoreComponentCollection &reorderingScoreColl_curr =
@@ -104,17 +100,22 @@ std::vector<float> MaxentReordering::CalcScore(Hypothesis* hypothesis) const {
assert(values_next.size() == GetNumOrientationTypes());
nextHypoExists = true;
}
+
+ IFVERBOSE(2){
+ std::cerr << "Maxent orientation type (curr, ID = " << hypothesis->GetId() << "): " << orientation_curr << "\n";
+ if(nextHypoExists)
+ std::cerr << "Maxent orientation type (next, ID = " << nextHypo->GetId() << "): " << orientation_next << "\n";
+ else
+ std::cerr << "Maxent orientation type (next, no ID): " << orientation_next << "\n";
+ }
//add score
float value_curr = 0.0, value_next = 0.0;
IFVERBOSE(2){
std::cerr << "Curr scores: " << values_curr[0] << " " << values_curr[1] << " " << values_curr[2] << " " << values_curr[3] << "\n";
}
- if(orientation_curr < 4){
- value_curr = values_curr[orientation_curr];
- }
- else if(orientation_curr == 4){
- // optimistic guess: use better one of the values for LEFT and LEFT_PLUS
+ if(orientation_curr == 4){
+ // optimistic guess: use better one of the values for LEFT and LEFT_PLUS
if(values_curr[2] >= values_curr[3]){
value_curr = values_curr[2];
orientation_curr = 2;
@@ -123,23 +124,54 @@ std::vector<float> MaxentReordering::CalcScore(Hypothesis* hypothesis) const {
value_curr = values_curr[3];
orientation_curr = 3;
}
+
+ }
+ else{
+ value_curr = values_curr[orientation_curr];
}
IFVERBOSE(2){
std::cerr << "Maxent value (curr): " << value_curr << "\n";
}
- if(orientation_curr < 5)
- score[orientation_curr] = value_curr;
+ // assign score of current hypothesis
+ score[orientation_curr] = value_curr;
if(nextHypoExists){
if(orientation_next != 5){
- value_next = values_next[orientation_next];
+// float diff = 0.0;
+ // If the actual score is the worse of the LEFT and LEFT_PLUS scores, add difference of scores.
+ // In this case the optimistic guess has added the better score already.
+ // Otherwise do nothing, because the optimistic score was correct
+
+// // TODO: only problem: If the optimistic guess was better than the actual score, it was added
+// // with a different weight because of the different position (e.g. position 2 instead of 3).
+ if(orientation_next == 2)
+ if(values_next[2] < values_next[3]){
+// // add difference of score, optimistic guess has added the better score already
+// diff = values_next[2] - values_next[3];
+ // subtract optimistic score values_next[3] and add actual score values_next[2]
+ score[3] -= values_next[3];
+ // add score instead of assigning in case the current hypothesis uses the same slot
+ score[2] += values_next[2];
+ }
+ if(orientation_next == 3)
+ if(values_next[3] < values_next[2]){
+// // add difference of score, optimistic guess has added the better score already
+// diff = values_next[3] - values_next[2];
+ // subtract optimistic score values_next[2] and add actual score values_next[3]
+ score[2] -= values_next[2];
+ // add score instead of assigning in case the current hypothesis uses the same slot
+ score[3] += values_next[3];
+ }
+
+// value_next = diff;
+// // add score instead of assigning in case the current hypothesis uses the same slot
+// score[orientation_next] += value_next;
}
IFVERBOSE(2){
std::cerr << "Next scores: " << values_next[0] << " " << values_next[1] << " " << values_next[2] << " " << values_next[3] << "\n";
std::cerr << "Maxent value (next): " << value_next << "\n";
}
- if(orientation_next < 5)
- score[orientation_next] = value_next;
+
}
IFVERBOSE(2){
for(int i=0; i< score.size(); i++){
@@ -310,22 +342,15 @@ std::vector< MaxentReordering::OrientationType> MaxentOrientationReordering::Get
// CASE 4: Previous source word is not yet translated -> undefined left movement -> LEFT_undef
IFVERBOSE(2){
std::cerr << "Previous word is not translated yet.. \n";
- }
- // if next word is already translated, use ONLY the probability of this well-defined left jump
- // otherwise fallback to LEFT_undef and NONE
+ std::cerr << "fallback: jump LEFT_undef\n";
+ }
+ orientations.push_back(LEFT_undef);
+
if( (currHypothesis->GetWordsBitmap().GetSize() > currSourceWordsRange.GetEndPos()+1) &&
(currHypothesis->GetWordsBitmap().GetValue( currSourceWordsRange.GetEndPos()+1 )) ){
- orientations.push_back(NONE);
- IFVERBOSE(2){
- std::cerr << "no jump w.r.t. previous words..\n";
- }
orientations.push_back( ReEvaluateWithNextPhraseInSource(currHypothesis, currSourceWordsRange) );
}
else{
- IFVERBOSE(2){
- std::cerr << "fallback: jump LEFT_undef\n";
- }
- orientations.push_back(LEFT_undef);
orientations.push_back(NONE);
}
return orientations;
@@ -336,7 +361,7 @@ std::vector< MaxentReordering::OrientationType> MaxentOrientationReordering::Get
MaxentReordering::OrientationType MaxentOrientationReordering::ReEvaluateWithNextPhraseInSource(Hypothesis* currHypothesis, const WordsRange currSourceWordsRange) const {
// Word to the right of current phrase is already translated, TODO: what to do?
IFVERBOSE(2){
- std::cerr << "Following word is already translated: backwards evaluation?\n";
+ std::cerr << "Following word is already translated: backwards evaluation\n";
}
const Hypothesis *next = currHypothesis->GetHypoContainingPosition( currSourceWordsRange.GetEndPos()+1 );
assert( next != NULL);
diff --git a/moses/src/ScoreComponentCollection.h b/moses/src/ScoreComponentCollection.h
index e4b408e18..e7cc6ac15 100644
--- a/moses/src/ScoreComponentCollection.h
+++ b/moses/src/ScoreComponentCollection.h
@@ -118,7 +118,7 @@ public:
void Assign(const ScoreProducer* sp, const std::vector<float>& scores)
{
- assert(scores.size() == sp->GetNumScoreComponents());
+ assert(scores.size() == sp->GetNumScoreComponents());
size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
for (std::vector<float>::const_iterator vi = scores.begin();
vi != scores.end(); ++vi)
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index e18875752..709debfc2 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -603,8 +603,7 @@ bool StaticData::LoadMaxentReorderingModel()
//spec[1] = name // lexical reordering
//spec[2] = num weights // lexical reordering
//spec[3] = fileName
- //spec[4] = condition for maxent (a or b)
- //spec[5] = number of maxent outcomes
+ //spec[4] = num weights maxent
//decode data into these
vector<FactorType> input,output;