Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Huck <huck@i6.informatik.rwth-aachen.de>2014-12-03 23:04:26 +0300
committerMatthias Huck <huck@i6.informatik.rwth-aachen.de>2014-12-03 23:04:26 +0300
commit24a8a6a51104c8518fcb8d252f83ff580986c2db (patch)
tree1fc5f912f7623dda8d1a1f08fb0e9acfff32cba8
parent5bbd30ec12e4bcb88e29246e4fce71cc33528d44 (diff)
PhraseOrientationFeature
-rw-r--r--moses/FF/FeatureFunction.cpp4
-rw-r--r--moses/FF/FeatureFunction.h1
-rw-r--r--moses/FF/PhraseOrientationFeature.cpp616
-rw-r--r--moses/FF/PhraseOrientationFeature.h159
-rw-r--r--moses/PP/OrientationPhraseProperty.cpp6
-rw-r--r--moses/PP/OrientationPhraseProperty.h16
-rw-r--r--moses/StaticData.cpp2
-rw-r--r--moses/Util.h5
-rw-r--r--phrase-extract/ExtractionPhrasePair.cpp10
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.cpp10
-rw-r--r--phrase-extract/extract-ghkm/PhraseOrientation.cpp210
-rw-r--r--phrase-extract/extract-ghkm/PhraseOrientation.h41
12 files changed, 846 insertions, 234 deletions
diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp
index 5d4e0f91e..4b5faa91e 100644
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@@ -45,6 +45,7 @@ void FeatureFunction::CallChangeSource(InputType *&input)
FeatureFunction::
FeatureFunction(const std::string& line)
: m_tuneable(true)
+ , m_verbosity(1)
, m_numScoreComponents(1)
{
Initialize(line);
@@ -54,6 +55,7 @@ FeatureFunction::
FeatureFunction(size_t numScoreComponents,
const std::string& line)
: m_tuneable(true)
+ , m_verbosity(0)
, m_numScoreComponents(numScoreComponents)
{
Initialize(line);
@@ -115,6 +117,8 @@ void FeatureFunction::SetParameter(const std::string& key, const std::string& va
{
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
+ } else if (key == "verbosity") {
+ m_verbosity = Scan<size_t>(value);
} else if (key == "filterable") { //ignore
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h
index b30815e05..115797228 100644
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@@ -34,6 +34,7 @@ protected:
std::string m_description, m_argLine;
std::vector<std::vector<std::string> > m_args;
bool m_tuneable;
+ size_t m_verbosity;
size_t m_numScoreComponents;
//In case there's multiple producers with the same description
static std::multiset<std::string> description_counts;
diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp
index 0f6d8bcb1..4e2a8c637 100644
--- a/moses/FF/PhraseOrientationFeature.cpp
+++ b/moses/FF/PhraseOrientationFeature.cpp
@@ -1,4 +1,13 @@
-#include <vector>
+//
+// REFERENCE
+// ---------
+// When using this feature, please cite:
+//
+// Matthias Huck, Joern Wuebker, Felix Rietig, and Hermann Ney.
+// A Phrase Orientation Model for Hierarchical Machine Translation.
+// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
+//
+
#include "PhraseOrientationFeature.h"
#include "moses/InputFileStream.h"
#include "moses/ScoreComponentCollection.h"
@@ -10,82 +19,83 @@
#include "moses/PP/OrientationPhraseProperty.h"
#include "phrase-extract/extract-ghkm/Alignment.h"
-using namespace std;
namespace Moses
{
PhraseOrientationFeature::PhraseOrientationFeature(const std::string &line)
- : StatelessFeatureFunction(8, line)
+ : StatefulFeatureFunction(6, line)
+ , m_glueTargetLHSStr("Q")
+ , m_glueTargetLHS(true)
+ , m_offsetR2LScores(0)
{
VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
ReadParameters();
- VERBOSE(1, " Done.");
+ FactorCollection &fc = FactorCollection::Instance();
+ const Factor *factor = fc.AddFactor(m_glueTargetLHSStr, true);
+ m_glueTargetLHS.SetFactor(0, factor);
+ m_offsetR2LScores = m_numScoreComponents / 2;
+ VERBOSE(1, " Done." << std::endl);
}
void PhraseOrientationFeature::SetParameter(const std::string& key, const std::string& value)
{
- if (key == "tuneable") {
+ if (key == "tuneable")
+ {
m_tuneable = Scan<bool>(value);
- } else {
- StatelessFeatureFunction::SetParameter(key, value);
+ }
+ else if (key == "glueTargetLHS")
+ {
+ m_glueTargetLHSStr = value;
+ }
+ else
+ {
+ StatefulFeatureFunction::SetParameter(key, value);
}
}
-void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source
- , const TargetPhrase &targetPhrase
- , ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const
-{
- targetPhrase.SetRuleSource(source);
-}
-
-void PhraseOrientationFeature::EvaluateWhenApplied(
+FFState* PhraseOrientationFeature::EvaluateWhenApplied(
const ChartHypothesis& hypo,
+ int featureID, // used to index the state in the previous hypotheses
ScoreComponentCollection* accumulator) const
{
// Dense scores
- std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 8
+ std::vector<float> newScores(m_numScoreComponents,0);
+
+ // State: ignored wrt. recombination; used to propagate orientation probabilities in case of boundary non-terminals
+ PhraseOrientationFeatureState *state = new PhraseOrientationFeatureState();
// Read Orientation property
const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
+ const Word &currTarPhrLHS = currTarPhr.GetTargetLHS();
const Phrase *currSrcPhr = currTarPhr.GetRuleSource();
// const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
// bool isGlueGrammarRule = false;
- std::map<size_t,size_t> alignMap;
- alignMap.insert(
- currTarPhr.GetAlignTerm().begin(),
- currTarPhr.GetAlignTerm().end());
- alignMap.insert(
- currTarPhr.GetAlignNonTerm().begin(),
- currTarPhr.GetAlignNonTerm().end());
+ FEATUREVERBOSE(2, *currSrcPhr << std::endl);
+ FEATUREVERBOSE(2, currTarPhr << std::endl);
Moses::GHKM::Alignment alignment;
std::vector<int> alignmentNTs(currTarPhr.GetSize(),-1); // TODO: can be smaller (number of right-hand side non-terminals)
for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignTerm().begin();
- it!=currTarPhr.GetAlignTerm().end(); ++it) {
+ it!=currTarPhr.GetAlignTerm().end(); ++it)
+ {
alignment.push_back(std::make_pair(it->first, it->second));
-// std::cerr << "alignTerm " << it->first << " " << it->second << std::endl;
+ FEATUREVERBOSE(2, "alignTerm " << it->first << " " << it->second << std::endl);
}
for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignNonTerm().begin();
- it!=currTarPhr.GetAlignNonTerm().end(); ++it) {
+ it!=currTarPhr.GetAlignNonTerm().end(); ++it)
+ {
alignment.push_back(std::make_pair(it->first, it->second));
alignmentNTs[it->second] = it->first;
-// std::cerr << "alignNonTerm " << it->first << " " << it->second << std::endl;
+ FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl);
}
// Initialize phrase orientation scoring object
- Moses::GHKM::PhraseOrientation phraseOrientation(currSrcPhr->GetSize(), currTarPhr.GetSize(), alignment);
- // TODO: Efficiency! This should be precomputed.
-
-// std::cerr << *currSrcPhr << std::endl;
-// std::cerr << currTarPhr << std::endl;
-// std::cerr << currSrcPhr->GetSize() << std::endl;
-// std::cerr << currTarPhr.GetSize() << std::endl;
+ Moses::GHKM::PhraseOrientation phraseOrientation(currSrcPhr->GetSize(), currTarPhr.GetSize(), alignment); // TODO: Efficiency! This should be precomputed.
// Get index map for underlying hypotheses
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
@@ -95,94 +105,376 @@ void PhraseOrientationFeature::EvaluateWhenApplied(
size_t nonTerminalNumber = 0;
- for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
+ for (size_t targetIndex=0; targetIndex<currTarPhr.GetSize(); ++targetIndex)
+ {
// consult rule for either word or non-terminal
- const Word &word = currTarPhr.GetWord(phrasePos);
- if ( word.IsNonTerminal() ) {
+ const Word &word = currTarPhr.GetWord(targetIndex);
+ if ( word.IsNonTerminal() )
+ {
+
+ int sourceIndex = alignmentNTs[targetIndex];
+ FEATUREVERBOSE(2, "Scoring nonTerminalNumber= " << nonTerminalNumber << " targetIndex= " << targetIndex << " sourceIndex= " << sourceIndex << std::endl);
+
// non-terminal: consult subderivation
- size_t nonTermIndex = nonTermIndexMap[phrasePos];
+ size_t nonTermIndex = nonTermIndexMap[targetIndex];
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
const TargetPhrase &prevTarPhr = prevHypo->GetCurrTargetPhrase();
- if (const PhraseProperty *property = prevTarPhr.GetProperty("Orientation")) {
+ if (const PhraseProperty *property = prevTarPhr.GetProperty("Orientation"))
+ {
const OrientationPhraseProperty *orientationPhraseProperty = static_cast<const OrientationPhraseProperty*>(property);
-// std::cerr << "L2R_Mono " << orientationPhraseProperty->GetLeftToRightProbabilityMono();
-// std::cerr << " L2R_Swap " << orientationPhraseProperty->GetLeftToRightProbabilitySwap();
-// std::cerr << " L2R_Dright " << orientationPhraseProperty->GetLeftToRightProbabilityDright();
-// std::cerr << " L2R_Dleft " << orientationPhraseProperty->GetLeftToRightProbabilityDleft();
-// std::cerr << " R2L_Mono " << orientationPhraseProperty->GetRightToLeftProbabilityMono();
-// std::cerr << " R2L_Swap " << orientationPhraseProperty->GetRightToLeftProbabilitySwap();
-// std::cerr << " R2L_Dright " << orientationPhraseProperty->GetRightToLeftProbabilityDright();
-// std::cerr << " R2L_Dleft " << orientationPhraseProperty->GetRightToLeftProbabilityDleft();
-// std::cerr << std::endl;
-
- Moses::GHKM::REO_POS l2rOrientation=Moses::GHKM::UNKNOWN, r2lOrientation=Moses::GHKM::UNKNOWN;
- int sourceIndex = alignmentNTs[phrasePos];
-// std::cerr << "targetIndex " << phrasePos << " sourceIndex " << sourceIndex << std::endl;
- l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::L2R);
- r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::R2L);
-
-// std::cerr << "l2rOrientation ";
- switch(l2rOrientation) {
- case Moses::GHKM::LEFT:
+ FEATUREVERBOSE(5, "orientationPhraseProperty: "
+ << "L2R_Mono " << orientationPhraseProperty->GetLeftToRightProbabilityMono()
+ << " L2R_Swap " << orientationPhraseProperty->GetLeftToRightProbabilitySwap()
+ << " L2R_Dright " << orientationPhraseProperty->GetLeftToRightProbabilityDright()
+ << " L2R_Dleft " << orientationPhraseProperty->GetLeftToRightProbabilityDleft()
+ << " R2L_Mono " << orientationPhraseProperty->GetRightToLeftProbabilityMono()
+ << " R2L_Swap " << orientationPhraseProperty->GetRightToLeftProbabilitySwap()
+ << " R2L_Dright " << orientationPhraseProperty->GetRightToLeftProbabilityDright()
+ << " R2L_Dleft " << orientationPhraseProperty->GetRightToLeftProbabilityDleft()
+ << std::endl);
+
+ const PhraseOrientationFeatureState* prevState =
+ static_cast<const PhraseOrientationFeatureState*>(prevHypo->GetFFState(featureID));
+
+
+ // LEFT-TO-RIGHT DIRECTION
+
+ Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
+
+ IFFEATUREVERBOSE(2)
+ {
+ FEATUREVERBOSE(2, "l2rOrientation ");
+ switch (l2rOrientation)
+ {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+ FEATUREVERBOSE2(2, "mono" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+ FEATUREVERBOSE2(2, "swap" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ FEATUREVERBOSE2(2, "dleft" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ FEATUREVERBOSE2(2, "dright" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
+ break;
+ default:
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ break;
+ }
+ }
+
+ bool delayedScoringL2R = false;
+
+ if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
+ && (currTarPhrLHS != m_glueTargetLHS) ) // and not glue rule
+ {
+ // delay left-to-right scoring
+
+ FEATUREVERBOSE(3, "Left boundary");
+ if (targetIndex != 0) {
+ FEATUREVERBOSE2(3, " (with targetIndex!=0)");
+ }
+ FEATUREVERBOSE2(3, std::endl);
+
+ bool previousSourceSpanIsAligned = ( (sourceIndex > 0) && phraseOrientation.SourceSpanIsAligned(0,sourceIndex-1) );
+ bool followingSourceSpanIsAligned = ( (sourceIndex < ((int)currSrcPhr->GetSize())-1) && phraseOrientation.SourceSpanIsAligned(sourceIndex,currSrcPhr->GetSize()-1) );
+
+ FEATUREVERBOSE(4, "previousSourceSpanIsAligned = " << previousSourceSpanIsAligned << std::endl);
+ FEATUREVERBOSE(4, "followingSourceSpanIsAligned = " << followingSourceSpanIsAligned << std::endl;);
+
+ if (previousSourceSpanIsAligned && followingSourceSpanIsAligned)
+ {
+ // discontinuous
+ l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+ }
+ else
+ {
+ FEATUREVERBOSE(3, "Delaying left-to-right scoring" << std::endl);
+
+ delayedScoringL2R = true;
+ std::bitset<3> possibleFutureOrientationsL2R(0x7);
+ possibleFutureOrientationsL2R[0] = !previousSourceSpanIsAligned;
+ possibleFutureOrientationsL2R[1] = !followingSourceSpanIsAligned;
+
+ // add heuristic scores
+
+ std::vector<float> weightsVector = StaticData::Instance().GetAllWeights().GetScoresForProducer(this);
+ std::vector<float> scoresL2R;
+ scoresL2R.push_back( std::log(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );
+ scoresL2R.push_back( std::log(orientationPhraseProperty->GetLeftToRightProbabilitySwap()) );
+ scoresL2R.push_back( std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous()) );
+ std::vector<float> weightedScoresL2R;
+ for ( size_t i=0; i<3;++i )
+ {
+ weightedScoresL2R.push_back( weightsVector[i] * scoresL2R[i] );
+ }
+
+ size_t heuristicScoreIndex = 0;
+ for (size_t i=1; i<3; ++i)
+ {
+ if (possibleFutureOrientationsL2R[i])
+ {
+ if (weightedScoresL2R[i] > weightedScoresL2R[heuristicScoreIndex])
+ {
+ heuristicScoreIndex = i;
+ }
+ }
+ }
+
+ IFFEATUREVERBOSE(5)
+ {
+ FEATUREVERBOSE(5, "Heuristic score computation (L2R): "
+ << "heuristicScoreIndex= " << heuristicScoreIndex);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " weightsVector[" << i << "]= " << weightsVector[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " scoresL2R[" << i << "]= " << scoresL2R[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " weightedScoresL2R[" << i << "]= " << weightedScoresL2R[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " possibleFutureOrientationsL2R[" << i << "]= " << possibleFutureOrientationsL2R[i]);
+ if ( possibleFutureOrientationsL2R == 0x7 )
+ {
+ FEATUREVERBOSE2(5, " (all orientations possible)");
+ }
+ FEATUREVERBOSE2(5, std::endl);
+ }
+
+ newScores[heuristicScoreIndex] += scoresL2R[heuristicScoreIndex];
+ state->SetLeftBoundaryL2R(scoresL2R, heuristicScoreIndex, possibleFutureOrientationsL2R, nonTermIndex);
+
+ if ( (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) == 0x4 )
+ {
+ // recursive: discontinuous orientation
+ FEATUREVERBOSE(5, "previous state: L2R discontinuous orientation "
+ << possibleFutureOrientationsL2R << " & " << prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations
+ << " = " << (possibleFutureOrientationsL2R & prevState->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations)
+ << std::endl);
+ LeftBoundaryL2RScoreRecursive(featureID, prevHypo, prevState, 0x4, newScores);
+ state->m_leftBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
+ }
+ }
+ }
+
+ if (!delayedScoringL2R)
+ {
+ switch (l2rOrientation)
+ {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
newScores[0] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityMono());
-// std::cerr << "mono" << std::endl;
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevHypo, prevState, 0x1, newScores);
break;
- case Moses::GHKM::RIGHT:
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
newScores[1] += std::log(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
-// std::cerr << "swap" << std::endl;
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevHypo, prevState, 0x2, newScores);
break;
- case Moses::GHKM::DRIGHT:
- newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDright());
-// std::cerr << "dright" << std::endl;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevHypo, prevState, 0x4, newScores);
break;
- case Moses::GHKM::DLEFT:
- newScores[3] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDleft());
-// std::cerr << "dleft" << std::endl;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevHypo, prevState, 0x4, newScores);
break;
- case Moses::GHKM::UNKNOWN:
- // modelType == Moses::GHKM::REO_MSLR
- newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDright());
-// std::cerr << "unknown->dright" << std::endl;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
+ // if sub-derivation has left-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ LeftBoundaryL2RScoreRecursive(featureID, prevHypo, prevState, 0x4, newScores);
break;
default:
UTIL_THROW2(GetScoreProducerDescription()
<< ": Unsupported orientation type.");
break;
+ }
+ }
+
+
+ // RIGHT-TO-LEFT DIRECTION
+
+ Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
+
+ IFFEATUREVERBOSE(2)
+ {
+ FEATUREVERBOSE(2, "r2lOrientation ");
+ switch (r2lOrientation)
+ {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+ FEATUREVERBOSE2(2, "mono" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+ FEATUREVERBOSE2(2, "swap" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ FEATUREVERBOSE2(2, "dleft" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ FEATUREVERBOSE2(2, "dright" << std::endl);
+ break;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
+ break;
+ default:
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Unsupported orientation type.");
+ break;
+ }
}
-// std::cerr << "r2lOrientation ";
- switch(r2lOrientation) {
- case Moses::GHKM::LEFT:
- newScores[4] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityMono());
-// std::cerr << "mono" << std::endl;
+ bool delayedScoringR2L = false;
+
+ if ( ((targetIndex == currTarPhr.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,currTarPhr.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
+ && (currTarPhrLHS != m_glueTargetLHS) ) // and not glue rule
+ {
+ // delay right-to-left scoring
+
+ FEATUREVERBOSE(3, "Right boundary");
+ if (targetIndex != currTarPhr.GetSize()-1) {
+ FEATUREVERBOSE2(3, " (with targetIndex!=currTarPhr.GetSize()-1)");
+ }
+ FEATUREVERBOSE2(3, std::endl);
+
+ bool previousSourceSpanIsAligned = ( (sourceIndex > 0) && phraseOrientation.SourceSpanIsAligned(0,sourceIndex-1) );
+ bool followingSourceSpanIsAligned = ( (sourceIndex < ((int)currSrcPhr->GetSize())-1) && phraseOrientation.SourceSpanIsAligned(sourceIndex,currSrcPhr->GetSize()-1) );
+
+ FEATUREVERBOSE(4, "previousSourceSpanIsAligned = " << previousSourceSpanIsAligned << std::endl);
+ FEATUREVERBOSE(4, "followingSourceSpanIsAligned = " << followingSourceSpanIsAligned << std::endl;);
+
+ if (previousSourceSpanIsAligned && followingSourceSpanIsAligned)
+ {
+ // discontinuous
+ r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
+ }
+ else
+ {
+ FEATUREVERBOSE(3, "Delaying right-to-left scoring" << std::endl);
+
+ delayedScoringR2L = true;
+ std::bitset<3> possibleFutureOrientationsR2L(0x7);
+ possibleFutureOrientationsR2L[0] = !followingSourceSpanIsAligned;
+ possibleFutureOrientationsR2L[1] = !previousSourceSpanIsAligned;
+
+ // add heuristic scores
+
+ std::vector<float> weightsVector = StaticData::Instance().GetAllWeights().GetScoresForProducer(this);
+ std::vector<float> scoresR2L;
+ scoresR2L.push_back( std::log(orientationPhraseProperty->GetRightToLeftProbabilityMono()) );
+ scoresR2L.push_back( std::log(orientationPhraseProperty->GetRightToLeftProbabilitySwap()) );
+ scoresR2L.push_back( std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous()) );
+ std::vector<float> weightedScoresR2L;
+ for ( size_t i=0; i<3;++i )
+ {
+ weightedScoresR2L.push_back( weightsVector[m_offsetR2LScores+i] * scoresR2L[i] );
+ }
+
+ size_t heuristicScoreIndex = 0;
+ for (size_t i=1; i<3; ++i)
+ {
+ if (possibleFutureOrientationsR2L[i])
+ {
+ if (weightedScoresR2L[i] > weightedScoresR2L[heuristicScoreIndex])
+ {
+ heuristicScoreIndex = i;
+ }
+ }
+ }
+
+ IFFEATUREVERBOSE(5)
+ {
+ FEATUREVERBOSE(5, "Heuristic score computation (R2L): "
+ << "heuristicScoreIndex= " << heuristicScoreIndex);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " weightsVector[" << m_offsetR2LScores+i << "]= " << weightsVector[m_offsetR2LScores+i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " scoresR2L[" << i << "]= " << scoresR2L[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " weightedScoresR2L[" << i << "]= " << weightedScoresR2L[i]);
+ for (size_t i=0; i<3; ++i)
+ FEATUREVERBOSE2(5, " possibleFutureOrientationsR2L[" << i << "]= " << possibleFutureOrientationsR2L[i]);
+ if ( possibleFutureOrientationsR2L == 0x7 )
+ {
+ FEATUREVERBOSE2(5, " (all orientations possible)");
+ }
+ FEATUREVERBOSE2(5, std::endl);
+ }
+
+ newScores[m_offsetR2LScores+heuristicScoreIndex] += scoresR2L[heuristicScoreIndex];
+ state->SetRightBoundaryR2L(scoresR2L, heuristicScoreIndex, possibleFutureOrientationsR2L, nonTermIndex);
+
+ if ( (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) == 0x4 )
+ {
+ // recursive: discontinuous orientation
+ FEATUREVERBOSE(5, "previous state: R2L discontinuous orientation "
+ << possibleFutureOrientationsR2L << " & " << prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations
+ << " = " << (possibleFutureOrientationsR2L & prevState->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations)
+ << std::endl);
+ RightBoundaryR2LScoreRecursive(featureID, prevHypo, prevState, 0x4, newScores);
+ state->m_rightBoundaryRecursionGuard = true; // prevent subderivation from being scored recursively multiple times
+ }
+ }
+ }
+
+ if (!delayedScoringR2L)
+ {
+ switch (r2lOrientation)
+ {
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
+ newScores[m_offsetR2LScores+0] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityMono());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevHypo, prevState, 0x1, newScores);
break;
- case Moses::GHKM::RIGHT:
- newScores[5] += std::log(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
-// std::cerr << "swap" << std::endl;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
+ newScores[m_offsetR2LScores+1] += std::log(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevHypo, prevState, 0x2, newScores);
break;
- case Moses::GHKM::DRIGHT:
- newScores[6] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDright());
-// std::cerr << "dright" << std::endl;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
+ newScores[m_offsetR2LScores+2] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevHypo, prevState, 0x4, newScores);
break;
- case Moses::GHKM::DLEFT:
- newScores[7] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDleft());
-// std::cerr << "dleft" << std::endl;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
+ newScores[m_offsetR2LScores+2] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevHypo, prevState, 0x4, newScores);
break;
- case Moses::GHKM::UNKNOWN:
- // modelType == Moses::GHKM::REO_MSLR
- newScores[6] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDright());
-// std::cerr << "unknown->dright" << std::endl;
+ case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
+ // modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
+ newScores[m_offsetR2LScores+2] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
+ // if sub-derivation has right-boundary non-terminal:
+ // add recursive actual score of boundary non-terminal from subderivation
+ RightBoundaryR2LScoreRecursive(featureID, prevHypo, prevState, 0x4, newScores);
break;
default:
UTIL_THROW2(GetScoreProducerDescription()
<< ": Unsupported orientation type.");
break;
+ }
}
-
- // TODO: Handle degenerate cases (boundary non-terminals)
-
- } else {
+ }
+ else
+ {
// abort with error message if the phrase does not translate an unknown word
UTIL_THROW_IF2(!prevTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
<< ": Missing Orientation property. "
@@ -194,6 +486,142 @@ void PhraseOrientationFeature::EvaluateWhenApplied(
}
accumulator->PlusEquals(this, newScores);
+
+ return state;
+}
+
+void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID,
+ const ChartHypothesis *hypo,
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores) const
+{
+ if (state->m_leftBoundaryIsSet)
+ {
+ // subtract heuristic score from subderivation
+ newScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex] -= state->m_leftBoundaryNonTerminalL2RScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex];
+
+ // add actual score
+ std::bitset<3> recursiveOrientation = orientation;
+ if ( (orientation == 0x4) || (orientation == 0x0) )
+ {
+ // discontinuous
+ newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
+ }
+ else
+ {
+ recursiveOrientation &= state->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations;
+ if ( recursiveOrientation == 0x1 )
+ {
+ // monotone
+ newScores[0] += state->GetLeftBoundaryL2RScoreMono();
+ }
+ else if ( recursiveOrientation == 0x2 )
+ {
+ // swap
+ newScores[1] += state->GetLeftBoundaryL2RScoreSwap();
+ }
+ else if ( recursiveOrientation == 0x4 )
+ {
+ // discontinuous
+ newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
+ }
+ else if ( recursiveOrientation == 0x0 )
+ {
+ // discontinuous
+ newScores[2] += state->GetLeftBoundaryL2RScoreDiscontinuous();
+ }
+ else
+ {
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Error in recursive scoring.");
+ }
+ }
+
+ FEATUREVERBOSE(6, "Left boundary recursion: " << orientation << " & " << state->m_leftBoundaryNonTerminalL2RPossibleFutureOrientations << " = " << recursiveOrientation
+ << " --- Subtracted heuristic score: " << state->m_leftBoundaryNonTerminalL2RScores[state->m_leftBoundaryNonTerminalL2RHeuristicScoreIndex] << std::endl);
+
+ if (!state->m_leftBoundaryRecursionGuard)
+ {
+ // recursive call
+ const ChartHypothesis *prevHypo = hypo->GetPrevHypo(state->m_leftBoundaryNonTerminalIndex);
+ const PhraseOrientationFeatureState* prevState =
+ static_cast<const PhraseOrientationFeatureState*>(prevHypo->GetFFState(featureID));
+
+ LeftBoundaryL2RScoreRecursive(featureID, prevHypo, prevState, recursiveOrientation, newScores);
+ }
+ else
+ {
+ FEATUREVERBOSE(6, "m_leftBoundaryRecursionGuard" << std::endl);
+ }
+ }
+}
+
+void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
+ const ChartHypothesis *hypo,
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores) const
+{
+ if (state->m_rightBoundaryIsSet)
+ {
+ // subtract heuristic score from subderivation
+ newScores[m_offsetR2LScores+state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex] -= state->m_rightBoundaryNonTerminalR2LScores[state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex];
+
+ // add actual score
+ std::bitset<3> recursiveOrientation = orientation;
+ if ( (orientation == 0x4) || (orientation == 0x0) )
+ {
+ // discontinuous
+ newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
+ }
+ else
+ {
+ recursiveOrientation &= state->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations;
+ if ( recursiveOrientation == 0x1 )
+ {
+ // monotone
+ newScores[m_offsetR2LScores+0] += state->GetRightBoundaryR2LScoreMono();
+ }
+ else if ( recursiveOrientation == 0x2 )
+ {
+ // swap
+ newScores[m_offsetR2LScores+1] += state->GetRightBoundaryR2LScoreSwap();
+ }
+ else if ( recursiveOrientation == 0x4 )
+ {
+ // discontinuous
+ newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
+ }
+ else if ( recursiveOrientation == 0x0 )
+ {
+ // discontinuous
+ newScores[m_offsetR2LScores+2] += state->GetRightBoundaryR2LScoreDiscontinuous();
+ }
+ else
+ {
+ UTIL_THROW2(GetScoreProducerDescription()
+ << ": Error in recursive scoring.");
+ }
+ }
+
+ FEATUREVERBOSE(6, "Right boundary recursion: " << orientation << " & " << state->m_rightBoundaryNonTerminalR2LPossibleFutureOrientations << " = " << recursiveOrientation
+ << " --- Subtracted heuristic score: " << state->m_rightBoundaryNonTerminalR2LScores[state->m_rightBoundaryNonTerminalR2LHeuristicScoreIndex] << std::endl);
+
+ if (!state->m_rightBoundaryRecursionGuard)
+ {
+ // recursive call
+ const ChartHypothesis *prevHypo = hypo->GetPrevHypo(state->m_rightBoundaryNonTerminalIndex);
+ const PhraseOrientationFeatureState* prevState =
+ static_cast<const PhraseOrientationFeatureState*>(prevHypo->GetFFState(featureID));
+
+ RightBoundaryR2LScoreRecursive(featureID, prevHypo, prevState, recursiveOrientation, newScores);
+ }
+ else
+ {
+ FEATUREVERBOSE(6, "m_rightBoundaryRecursionGuard" << std::endl);
+ }
+ }
}
diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h
index a367bc58d..e56e394a2 100644
--- a/moses/FF/PhraseOrientationFeature.h
+++ b/moses/FF/PhraseOrientationFeature.h
@@ -1,18 +1,134 @@
+//
+// REFERENCE
+// ---------
+// When using this feature, please cite:
+//
+// Matthias Huck, Joern Wuebker, Felix Rietig, and Hermann Ney.
+// A Phrase Orientation Model for Hierarchical Machine Translation.
+// In ACL 2013 Eighth Workshop on Statistical Machine Translation (WMT 2013), pages 452-463, Sofia, Bulgaria, August 2013.
+//
+
#pragma once
+#include <bitset>
#include <string>
-#include "StatelessFeatureFunction.h"
+#include <vector>
+#include "StatefulFeatureFunction.h"
#include "FFState.h"
#include "moses/Factor.h"
#include "phrase-extract/extract-ghkm/PhraseOrientation.h"
+
namespace Moses
{
+class PhraseOrientationFeatureState : public FFState
+{
+public:
+
+ friend class PhraseOrientationFeature;
+
+ PhraseOrientationFeatureState()
+ : m_leftBoundaryNonTerminalL2RScores(3,0)
+ , m_rightBoundaryNonTerminalR2LScores(3,0)
+ , m_leftBoundaryNonTerminalL2RPossibleFutureOrientations(0x7)
+ , m_rightBoundaryNonTerminalR2LPossibleFutureOrientations(0x7)
+ , m_leftBoundaryRecursionGuard(false)
+ , m_rightBoundaryRecursionGuard(false)
+ , m_leftBoundaryIsSet(false)
+ , m_rightBoundaryIsSet(false)
+ {}
+
+ void SetLeftBoundaryL2R(const std::vector<float> &scores,
+ size_t heuristicScoreIndex,
+ std::bitset<3> &possibleFutureOrientations,
+ size_t nonTerminalIndex)
+ {
+ for (size_t i=0; i<3; ++i)
+ {
+ m_leftBoundaryNonTerminalL2RScores[i] = scores[i];
+ m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i] = possibleFutureOrientations[i];
+ }
+ m_leftBoundaryNonTerminalL2RHeuristicScoreIndex = heuristicScoreIndex;
+ m_leftBoundaryNonTerminalIndex = nonTerminalIndex;
+ m_leftBoundaryIsSet = true;
+ }
+
+ void SetRightBoundaryR2L(const std::vector<float> &scores,
+ size_t heuristicScoreIndex,
+ std::bitset<3> &possibleFutureOrientations,
+ size_t nonTerminalIndex)
+ {
+ for (size_t i=0; i<3; ++i)
+ {
+ m_rightBoundaryNonTerminalR2LScores[i] = scores[i];
+ m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i] = possibleFutureOrientations[i];
+ }
+ m_rightBoundaryNonTerminalR2LHeuristicScoreIndex = heuristicScoreIndex;
+ m_rightBoundaryNonTerminalIndex = nonTerminalIndex;
+ m_rightBoundaryIsSet = true;
+ }
+
+
+ float GetLeftBoundaryL2RScoreMono() const
+ {
+ return m_leftBoundaryNonTerminalL2RScores[0];
+ }
+
+ float GetLeftBoundaryL2RScoreSwap() const
+ {
+ return m_leftBoundaryNonTerminalL2RScores[1];
+ }
+
+ float GetLeftBoundaryL2RScoreDiscontinuous() const
+ {
+ return m_leftBoundaryNonTerminalL2RScores[2];
+ }
+
+
+ float GetRightBoundaryR2LScoreMono() const
+ {
+ return m_rightBoundaryNonTerminalR2LScores[0];
+ }
+
+ float GetRightBoundaryR2LScoreSwap() const
+ {
+ return m_rightBoundaryNonTerminalR2LScores[1];
+ }
+
+ float GetRightBoundaryR2LScoreDiscontinuous() const
+ {
+ return m_rightBoundaryNonTerminalR2LScores[2];
+ }
+
+
+ int Compare(const FFState& other) const { return 0; };
+
+private:
+
+ std::vector<float> m_leftBoundaryNonTerminalL2RScores;
+ std::vector<float> m_rightBoundaryNonTerminalR2LScores;
+
+ size_t m_leftBoundaryNonTerminalL2RHeuristicScoreIndex;
+ size_t m_rightBoundaryNonTerminalR2LHeuristicScoreIndex;
+
+ std::bitset<3> m_leftBoundaryNonTerminalL2RPossibleFutureOrientations;
+ std::bitset<3> m_rightBoundaryNonTerminalR2LPossibleFutureOrientations;
+
+ size_t m_leftBoundaryNonTerminalIndex;
+ size_t m_rightBoundaryNonTerminalIndex;
+ bool m_leftBoundaryRecursionGuard;
+ bool m_rightBoundaryRecursionGuard;
+ bool m_leftBoundaryIsSet;
+ bool m_rightBoundaryIsSet;
+};
-class PhraseOrientationFeature : public StatelessFeatureFunction
+
+
+class PhraseOrientationFeature : public StatefulFeatureFunction
{
public:
+
PhraseOrientationFeature(const std::string &line);
~PhraseOrientationFeature() {
@@ -22,12 +138,19 @@ public:
return true;
}
+ virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+ return new PhraseOrientationFeatureState();
+ }
+
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
- , ScoreComponentCollection &estimatedFutureScore) const;
+ , ScoreComponentCollection &estimatedFutureScore) const
+ {
+ targetPhrase.SetRuleSource(source);
+ };
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
@@ -37,15 +160,37 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{};
- void EvaluateWhenApplied(
+ FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const
- {};
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const
+ {
+ return new PhraseOrientationFeatureState();
+ };
- void EvaluateWhenApplied(
+ FFState* EvaluateWhenApplied(
const ChartHypothesis& cur_hypo,
+ int featureID, // used to index the state in the previous hypotheses
ScoreComponentCollection* accumulator) const;
+protected:
+
+ void LeftBoundaryL2RScoreRecursive(int featureID,
+ const ChartHypothesis *hypo,
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores) const;
+
+ void RightBoundaryR2LScoreRecursive(int featureID,
+ const ChartHypothesis *hypo,
+ const PhraseOrientationFeatureState *state,
+ const std::bitset<3> orientation,
+ std::vector<float>& newScores) const;
+
+ std::string m_glueTargetLHSStr;
+ Word m_glueTargetLHS;
+ size_t m_offsetR2LScores;
+
};
diff --git a/moses/PP/OrientationPhraseProperty.cpp b/moses/PP/OrientationPhraseProperty.cpp
index 653a1bf3b..1722a5383 100644
--- a/moses/PP/OrientationPhraseProperty.cpp
+++ b/moses/PP/OrientationPhraseProperty.cpp
@@ -8,13 +8,13 @@ namespace Moses
void OrientationPhraseProperty::ProcessValue(const std::string &value)
{
// bidirectional MSLR phrase orientation with 2x4 orientation classes:
- // mono swap dright dleft
+ // mono swap dleft dright
std::istringstream tokenizer(value);
try {
- if (! (tokenizer >> m_l2rMonoProbability >> m_l2rSwapProbability >> m_l2rDrightProbability >> m_l2rDleftProbability
- >> m_r2lMonoProbability >> m_r2lSwapProbability >> m_r2lDrightProbability >> m_r2lDleftProbability)) {
+ if (! (tokenizer >> m_l2rMonoProbability >> m_l2rSwapProbability >> m_l2rDleftProbability >> m_l2rDrightProbability
+ >> m_r2lMonoProbability >> m_r2lSwapProbability >> m_r2lDleftProbability >> m_r2lDrightProbability)) {
UTIL_THROW2("OrientationPhraseProperty: Not able to read value. Flawed property?");
}
} catch (const std::exception &e) {
diff --git a/moses/PP/OrientationPhraseProperty.h b/moses/PP/OrientationPhraseProperty.h
index 32c6ff208..f6344062c 100644
--- a/moses/PP/OrientationPhraseProperty.h
+++ b/moses/PP/OrientationPhraseProperty.h
@@ -24,12 +24,16 @@ public:
return m_l2rSwapProbability;
};
+ double GetLeftToRightProbabilityDleft() const {
+ return m_l2rDleftProbability;
+ };
+
double GetLeftToRightProbabilityDright() const {
return m_l2rDrightProbability;
};
- double GetLeftToRightProbabilityDleft() const {
- return m_l2rDleftProbability;
+ double GetLeftToRightProbabilityDiscontinuous() const {
+ return m_l2rDleftProbability + m_l2rDrightProbability;
};
@@ -41,12 +45,16 @@ public:
return m_r2lSwapProbability;
};
+ double GetRightToLeftProbabilityDleft() const {
+ return m_r2lDleftProbability;
+ };
+
double GetRightToLeftProbabilityDright() const {
return m_r2lDrightProbability;
};
- double GetRightToLeftProbabilityDleft() const {
- return m_r2lDleftProbability;
+ double GetRightToLeftProbabilityDiscontinuous() const {
+ return m_r2lDleftProbability + m_r2lDrightProbability;
};
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 0b5adaba8..49ec0ef99 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -63,8 +63,8 @@ StaticData::StaticData()
,m_lmEnableOOVFeature(false)
,m_isAlwaysCreateDirectTranslationOption(false)
,m_currentWeightSetting("default")
- ,m_treeStructure(NULL)
,m_useS2TDecoder(false)
+ ,m_treeStructure(NULL)
{
m_xmlBrackets.first="<";
m_xmlBrackets.second=">";
diff --git a/moses/Util.h b/moses/Util.h
index 4d2ccea10..ca34fcfb0 100644
--- a/moses/Util.h
+++ b/moses/Util.h
@@ -59,8 +59,11 @@ namespace Moses
#define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }
#define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
-#define XVERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR("[" << __FILE__ << ":" << __LINE__ << "] ");TRACE_ERR(str); } }
+#define XVERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR("[" << __FILE__ << ":" << __LINE__ << "] "); TRACE_ERR(str); } }
#define HERE __FILE__ << ":" << __LINE__
+#define FEATUREVERBOSE(level,str) { if (m_verbosity >= level) { TRACE_ERR("[" << GetScoreProducerDescription() << "] "); FEATUREVERBOSE2(level,str); } }
+#define FEATUREVERBOSE2(level,str) { if (m_verbosity >= level) { TRACE_ERR(str); } }
+#define IFFEATUREVERBOSE(level) if (m_verbosity >= level)
#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2)
diff --git a/phrase-extract/ExtractionPhrasePair.cpp b/phrase-extract/ExtractionPhrasePair.cpp
index ccf0fc275..b281a05b0 100644
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@@ -469,7 +469,7 @@ void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
double smoothingFactor,
std::ostream &out) const
{
- assert(orientationClassPriorsL2R.size()==4 && orientationClassPriorsR2L.size()==4); // mono swap dright dleft
+ assert(orientationClassPriorsL2R.size()==4 && orientationClassPriorsR2L.size()==4); // mono swap dleft dright
const PROPERTY_VALUES *allPropertyValues = GetProperty( key );
@@ -507,10 +507,10 @@ void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
if (!l2rOrientationClass.compare("swap")) {
l2rOrientationClassId = 1;
}
- if (!l2rOrientationClass.compare("dright")) {
+ if (!l2rOrientationClass.compare("dleft")) {
l2rOrientationClassId = 2;
}
- if (!l2rOrientationClass.compare("dleft")) {
+ if (!l2rOrientationClass.compare("dright")) {
l2rOrientationClassId = 3;
}
if (l2rOrientationClassId == -1) {
@@ -525,10 +525,10 @@ void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
if (!r2lOrientationClass.compare("swap")) {
r2lOrientationClassId = 1;
}
- if (!r2lOrientationClass.compare("dright")) {
+ if (!r2lOrientationClass.compare("dleft")) {
r2lOrientationClassId = 2;
}
- if (!r2lOrientationClass.compare("dleft")) {
+ if (!r2lOrientationClass.compare("dright")) {
r2lOrientationClassId = 3;
}
if (r2lOrientationClassId == -1) {
diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index 70d08e41a..7c210541d 100644
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -264,12 +264,12 @@ int ExtractGHKM::Main(int argc, char *argv[])
const std::vector<const Subgraph *> &rules = (*p)->GetRules();
- REO_POS l2rOrientation=UNKNOWN, r2lOrientation=UNKNOWN;
+ Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN, r2lOrientation=Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN;
if (options.phraseOrientation && !rules.empty()) {
int sourceSpanBegin = *((*p)->GetSpan().begin());
int sourceSpanEnd = *((*p)->GetSpan().rbegin());
- l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,L2R);
- r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,R2L);
+ l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
+ r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
// std::cerr << "span " << sourceSpanBegin << " " << sourceSpanEnd << std::endl;
// std::cerr << "phraseOrientation " << phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd) << std::endl;
}
@@ -304,8 +304,8 @@ int ExtractGHKM::Main(int argc, char *argv[])
fwdExtractStream << " ";
phraseOrientation.WriteOrientation(fwdExtractStream,r2lOrientation);
fwdExtractStream << "}}";
- phraseOrientation.IncrementPriorCount(L2R,l2rOrientation,1);
- phraseOrientation.IncrementPriorCount(R2L,r2lOrientation,1);
+ phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_L2R,l2rOrientation,1);
+ phraseOrientation.IncrementPriorCount(Moses::GHKM::PhraseOrientation::REO_DIR_R2L,r2lOrientation,1);
}
fwdExtractStream << std::endl;
invExtractStream << std::endl;
diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.cpp b/phrase-extract/extract-ghkm/PhraseOrientation.cpp
index aa843c3c1..5a8452f42 100644
--- a/phrase-extract/extract-ghkm/PhraseOrientation.cpp
+++ b/phrase-extract/extract-ghkm/PhraseOrientation.cpp
@@ -22,6 +22,7 @@
#include <iostream>
#include <sstream>
#include <limits>
+#include <cassert>
#include <boost/assign/list_of.hpp>
@@ -100,13 +101,15 @@ PhraseOrientation::PhraseOrientation(int sourceSize,
}
}
+ m_minAndMaxAlignedToTargetSpan[ std::pair<int,int>(startE,endE) ] = std::pair<int,int>(minF,maxF);
+
if (maxF >= 0) { // aligned to any source words at all
- // check if source words are aligned to out of bound target words
+ // check if source words are aligned to out of bounds target words
bool out_of_bounds = false;
for (int fi=minF; fi<=maxF && !out_of_bounds; ++fi)
if (usedF[fi]>0) {
- // cout << "ouf of bounds: " << fi << "\n";
+ // cout << "out of bounds: " << fi << "\n";
out_of_bounds = true;
}
@@ -175,7 +178,7 @@ const std::string PhraseOrientation::GetOrientationInfoString(int startF, int en
// << std::endl;
return GetOrientationInfoString(startF, startE, endF, endE, direction);
} else {
- std::cerr << "Error: not able to determine phrase orientation" << std::endl;
+ std::cerr << "PhraseOrientation::GetOrientationInfoString(): Error: not able to determine phrase orientation" << std::endl;
std::exit(1);
}
}
@@ -183,46 +186,33 @@ const std::string PhraseOrientation::GetOrientationInfoString(int startF, int en
const std::string PhraseOrientation::GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction) const
{
- REO_POS hierPrevOrient=UNKNOWN, hierNextOrient=UNKNOWN;
-
- bool connectedLeftTopP = IsAligned( startF-1, startE-1 );
- bool connectedRightTopP = IsAligned( endF+1, startE-1 );
- bool connectedLeftTopN = IsAligned( endF+1, endE+1 );
- bool connectedRightTopN = IsAligned( startF-1, endE+1 );
-
- if ( direction == L2R || direction == BIDIR )
- hierPrevOrient = GetOrientHierModel(REO_MSLR,
- connectedLeftTopP, connectedRightTopP,
- startF, endF, startE, endE, m_countF-1, 0, 1,
- &ge, &lt,
- m_bottomRight, m_bottomLeft);
-
- if ( direction == R2L || direction == BIDIR )
- hierNextOrient = GetOrientHierModel(REO_MSLR,
- connectedLeftTopN, connectedRightTopN,
- endF, startF, endE, startE, 0, m_countF-1, -1,
- &lt, &ge,
- m_bottomLeft, m_bottomRight);
+ REO_CLASS hierPrevOrient=REO_CLASS_UNKNOWN, hierNextOrient=REO_CLASS_UNKNOWN;
+
+ if ( direction == REO_DIR_L2R || direction == REO_DIR_BIDIR )
+ hierPrevOrient = GetOrientationInfo(startF, startE, endF, endE, REO_DIR_L2R);
+
+ if ( direction == REO_DIR_R2L || direction == REO_DIR_BIDIR )
+ hierNextOrient = GetOrientationInfo(startF, startE, endF, endE, REO_DIR_R2L);
switch (direction) {
- case L2R:
- return GetOrientationString(hierPrevOrient, REO_MSLR);
+ case REO_DIR_L2R:
+ return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR);
break;
- case R2L:
- return GetOrientationString(hierNextOrient, REO_MSLR);
+ case REO_DIR_R2L:
+ return GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
break;
- case BIDIR:
- return GetOrientationString(hierPrevOrient, REO_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MSLR);
+ case REO_DIR_BIDIR:
+ return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
break;
default:
- return GetOrientationString(hierPrevOrient, REO_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MSLR);
+ return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
break;
}
return "PhraseOrientationERROR";
}
-REO_POS PhraseOrientation::GetOrientationInfo(int startF, int endF, REO_DIR direction) const
+PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientationInfo(int startF, int endF, REO_DIR direction) const
{
boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax
= m_minAndMaxAlignedToSourceSpan.find( std::pair<int,int>(startF,endF) );
@@ -238,86 +228,114 @@ REO_POS PhraseOrientation::GetOrientationInfo(int startF, int endF, REO_DIR dire
// << std::endl;
return GetOrientationInfo(startF, startE, endF, endE, direction);
} else {
- std::cerr << "Error: not able to determine phrase orientation" << std::endl;
+ std::cerr << "PhraseOrientation::GetOrientationInfo(): Error: not able to determine phrase orientation" << std::endl;
std::exit(1);
}
}
-REO_POS PhraseOrientation::GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const
+PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const
{
- if ( direction != L2R && direction != R2L ) {
- std::cerr << "PhraseOrientation::GetOrientationInfo(): direction should be either L2R or R2L" << std::endl;
+ if ( direction != REO_DIR_L2R && direction != REO_DIR_R2L ) {
+ std::cerr << "PhraseOrientation::GetOrientationInfo(): Error: direction should be either L2R or R2L" << std::endl;
std::exit(1);
}
- bool connectedLeftTopP = IsAligned( startF-1, startE-1 );
- bool connectedRightTopP = IsAligned( endF+1, startE-1 );
- bool connectedLeftTopN = IsAligned( endF+1, endE+1 );
- bool connectedRightTopN = IsAligned( startF-1, endE+1 );
-
- if ( direction == L2R )
- return GetOrientHierModel(REO_MSLR,
- connectedLeftTopP, connectedRightTopP,
- startF, endF, startE, endE, m_countF-1, 0, 1,
- &ge, &lt,
+ if ( direction == REO_DIR_L2R )
+ return GetOrientHierModel(REO_MODEL_TYPE_MSLR,
+ startF, endF, startE, endE, m_countF-1, 0, 0, 1,
+ &ge, &le,
m_bottomRight, m_bottomLeft);
- if ( direction == R2L )
- return GetOrientHierModel(REO_MSLR,
- connectedLeftTopN, connectedRightTopN,
- endF, startF, endE, startE, 0, m_countF-1, -1,
- &lt, &ge,
- m_bottomLeft, m_bottomRight);
+ if ( direction == REO_DIR_R2L )
+ return GetOrientHierModel(REO_MODEL_TYPE_MSLR,
+ endF, startF, endE, startE, 0, m_countF-1, m_countE-1, -1,
+ &le, &ge,
+ m_topLeft, m_topRight);
- return UNKNOWN;
+ return REO_CLASS_UNKNOWN;
}
// to be called with countF-1 instead of countF
-REO_POS PhraseOrientation::GetOrientHierModel(REO_MODEL_TYPE modelType,
- bool connectedLeftTop, bool connectedRightTop,
- int startF, int endF, int startE, int endE, int countF, int zero, int unit,
- bool (*ge)(int, int), bool (*lt)(int, int),
+PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientHierModel(REO_MODEL_TYPE modelType,
+ int startF, int endF, int startE, int endE, int countF, int zeroF, int zeroE, int unit,
+ bool (*ge)(int, int), bool (*le)(int, int),
const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const
{
+ bool leftSourceSpanIsAligned = ( (startF != zeroF) && SourceSpanIsAligned(zeroF,startF-unit) );
+ bool topTargetSpanIsAligned = ( (startE != zeroE) && TargetSpanIsAligned(zeroE,startE-unit) );
+
+ if (!topTargetSpanIsAligned && !leftSourceSpanIsAligned)
+ return REO_CLASS_LEFT;
+
HSentenceVertices::const_iterator it;
- if ((connectedLeftTop && !connectedRightTop) ||
+ if (//(connectedLeftTop && !connectedRightTop) ||
((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
it->second.find(startF-unit) != it->second.end()))
- return LEFT;
+ return REO_CLASS_LEFT;
- if (modelType == REO_MONO)
- return UNKNOWN;
+ if (modelType == REO_MODEL_TYPE_MONO)
+ return REO_CLASS_UNKNOWN;
- if ((!connectedLeftTop && connectedRightTop) ||
+ if (//(!connectedLeftTop && connectedRightTop) ||
((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
it->second.find(endF + unit) != it->second.end()))
- return RIGHT;
+ return REO_CLASS_RIGHT;
- if (modelType == REO_MSD)
- return UNKNOWN;
+ if (modelType == REO_MODEL_TYPE_MSD)
+ return REO_CLASS_UNKNOWN;
- connectedLeftTop = false;
- for (int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit) {
- if ((connectedLeftTop = ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
- it->second.find(indexF) != it->second.end())))
- return DRIGHT;
+ for (int indexF=startF-2*unit; (*ge)(indexF, zeroF); indexF=indexF-unit)
+ {
+ if ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
+ it->second.find(indexF) != it->second.end())
+ return REO_CLASS_DLEFT;
}
- connectedRightTop = false;
- for (int indexF=endF+2*unit; (*lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit) {
- if ((connectedRightTop = ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
- it->second.find(indexF) != it->second.end())))
- return DLEFT;
+ for (int indexF=endF+2*unit; (*le)(indexF, countF); indexF=indexF+unit)
+ {
+ if ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
+ it->second.find(indexF) != it->second.end())
+ return REO_CLASS_DRIGHT;
}
- return UNKNOWN;
+ return REO_CLASS_UNKNOWN;
+}
+
+bool PhraseOrientation::SourceSpanIsAligned(int index1, int index2) const
+{
+ return SpanIsAligned(index1, index2, m_minAndMaxAlignedToSourceSpan);
}
+bool PhraseOrientation::TargetSpanIsAligned(int index1, int index2) const
+{
+ return SpanIsAligned(index1, index2, m_minAndMaxAlignedToTargetSpan);
+}
-const std::string PhraseOrientation::GetOrientationString(const REO_POS orient, const REO_MODEL_TYPE modelType)
+bool PhraseOrientation::SpanIsAligned(int index1, int index2, const boost::unordered_map< std::pair<int,int> , std::pair<int,int> > &minAndMaxAligned) const
+{
+ boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator itMinAndMaxAligned =
+ minAndMaxAligned.find(std::pair<int,int>(std::min(index1,index2),std::max(index1,index2)));
+
+ if (itMinAndMaxAligned == minAndMaxAligned.end())
+ {
+ std::cerr << "PhraseOrientation::SourceSpanIsAligned(): Error" << std::endl;
+ std::exit(1);
+ }
+ else
+ {
+ if (itMinAndMaxAligned->second.first == std::numeric_limits<int>::max())
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+const std::string PhraseOrientation::GetOrientationString(const REO_CLASS orient, const REO_MODEL_TYPE modelType)
{
std::ostringstream oss;
WriteOrientation(oss, orient, modelType);
@@ -325,31 +343,31 @@ const std::string PhraseOrientation::GetOrientationString(const REO_POS orient,
}
-void PhraseOrientation::WriteOrientation(std::ostream& out, const REO_POS orient, const REO_MODEL_TYPE modelType)
+void PhraseOrientation::WriteOrientation(std::ostream& out, const REO_CLASS orient, const REO_MODEL_TYPE modelType)
{
switch(orient) {
- case LEFT:
+ case REO_CLASS_LEFT:
out << "mono";
break;
- case RIGHT:
+ case REO_CLASS_RIGHT:
out << "swap";
break;
- case DRIGHT:
- out << "dright";
- break;
- case DLEFT:
+ case REO_CLASS_DLEFT:
out << "dleft";
break;
- case UNKNOWN:
+ case REO_CLASS_DRIGHT:
+ out << "dright";
+ break;
+ case REO_CLASS_UNKNOWN:
switch(modelType) {
- case REO_MONO:
+ case REO_MODEL_TYPE_MONO:
out << "nomono";
break;
- case REO_MSD:
+ case REO_MODEL_TYPE_MSD:
out << "other";
break;
- case REO_MSLR:
- out << "dright";
+ case REO_MODEL_TYPE_MSLR:
+ out << "dleft";
break;
}
break;
@@ -379,12 +397,12 @@ bool PhraseOrientation::IsAligned(int fi, int ei) const
}
-void PhraseOrientation::IncrementPriorCount(REO_DIR direction, REO_POS orient, float increment)
+void PhraseOrientation::IncrementPriorCount(REO_DIR direction, REO_CLASS orient, float increment)
{
- assert(direction==L2R || direction==R2L);
- if (direction == L2R) {
+ assert(direction==REO_DIR_L2R || direction==REO_DIR_R2L);
+ if (direction == REO_DIR_L2R) {
m_l2rOrientationPriorCounts[orient] += increment;
- } else if (direction == R2L) {
+ } else if (direction == REO_DIR_R2L) {
m_r2lOrientationPriorCounts[orient] += increment;
}
}
@@ -394,11 +412,11 @@ void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE
{
std::map<std::string,float> l2rOrientationPriorCountsMap;
std::map<std::string,float> r2lOrientationPriorCountsMap;
- for (int orient=0; orient<=UNKNOWN; ++orient) {
- l2rOrientationPriorCountsMap[GetOrientationString((REO_POS)orient, modelType)] += m_l2rOrientationPriorCounts[orient];
+ for (int orient=0; orient<=REO_CLASS_UNKNOWN; ++orient) {
+ l2rOrientationPriorCountsMap[GetOrientationString((REO_CLASS)orient, modelType)] += m_l2rOrientationPriorCounts[orient];
}
- for (int orient=0; orient<=UNKNOWN; ++orient) {
- r2lOrientationPriorCountsMap[GetOrientationString((REO_POS)orient, modelType)] += m_r2lOrientationPriorCounts[orient];
+ for (int orient=0; orient<=REO_CLASS_UNKNOWN; ++orient) {
+ r2lOrientationPriorCountsMap[GetOrientationString((REO_CLASS)orient, modelType)] += m_r2lOrientationPriorCounts[orient];
}
for (std::map<std::string,float>::const_iterator l2rOrientationPriorCountsMapIt = l2rOrientationPriorCountsMap.begin();
l2rOrientationPriorCountsMapIt != l2rOrientationPriorCountsMap.end(); ++l2rOrientationPriorCountsMapIt) {
diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.h b/phrase-extract/extract-ghkm/PhraseOrientation.h
index 8ef05987f..313c1f3df 100644
--- a/phrase-extract/extract-ghkm/PhraseOrientation.h
+++ b/phrase-extract/extract-ghkm/PhraseOrientation.h
@@ -33,10 +33,6 @@ namespace Moses
namespace GHKM
{
-enum REO_MODEL_TYPE {REO_MSD, REO_MSLR, REO_MONO};
-enum REO_POS {LEFT, RIGHT, DLEFT, DRIGHT, UNKNOWN};
-enum REO_DIR {L2R, R2L, BIDIR};
-
// The key of the map is the English index and the value is a set of the source ones
typedef std::map <int, std::set<int> > HSentenceVertices;
@@ -45,18 +41,25 @@ class PhraseOrientation
{
public:
- PhraseOrientation(int sourceSize,
- int targetSize,
- const Alignment &alignment);
+ enum REO_MODEL_TYPE {REO_MODEL_TYPE_MSD, REO_MODEL_TYPE_MSLR, REO_MODEL_TYPE_MONO};
+ enum REO_CLASS {REO_CLASS_LEFT, REO_CLASS_RIGHT, REO_CLASS_DLEFT, REO_CLASS_DRIGHT, REO_CLASS_UNKNOWN};
+ enum REO_DIR {REO_DIR_L2R, REO_DIR_R2L, REO_DIR_BIDIR};
+
- REO_POS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
- REO_POS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
- const std::string GetOrientationInfoString(int startF, int endF, REO_DIR direction=BIDIR) const;
- const std::string GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction=BIDIR) const;
- static const std::string GetOrientationString(const REO_POS orient, const REO_MODEL_TYPE modelType=REO_MSLR);
- static void WriteOrientation(std::ostream& out, const REO_POS orient, const REO_MODEL_TYPE modelType=REO_MSLR);
- void IncrementPriorCount(REO_DIR direction, REO_POS orient, float increment);
- static void WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE modelType=REO_MSLR);
+ PhraseOrientation(int sourceSize,
+ int targetSize,
+ const Alignment &alignment);
+
+ REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
+ REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
+ const std::string GetOrientationInfoString(int startF, int endF, REO_DIR direction=REO_DIR_BIDIR) const;
+ const std::string GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction=REO_DIR_BIDIR) const;
+ static const std::string GetOrientationString(const REO_CLASS orient, const REO_MODEL_TYPE modelType=REO_MODEL_TYPE_MSLR);
+ static void WriteOrientation(std::ostream& out, const REO_CLASS orient, const REO_MODEL_TYPE modelType=REO_MODEL_TYPE_MSLR);
+ void IncrementPriorCount(REO_DIR direction, REO_CLASS orient, float increment);
+ static void WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE modelType=REO_MODEL_TYPE_MSLR);
+ bool SourceSpanIsAligned(int index1, int index2) const;
+ bool TargetSpanIsAligned(int index1, int index2) const;
private:
@@ -68,12 +71,13 @@ private:
HSentenceVertices & bottomRight,
int startF, int startE, int endF, int endE);
- REO_POS GetOrientHierModel(REO_MODEL_TYPE modelType,
- bool connectedLeftTop, bool connectedRightTop,
- int startF, int endF, int startE, int endE, int countF, int zero, int unit,
+ REO_CLASS GetOrientHierModel(REO_MODEL_TYPE modelType,
+ int startF, int endF, int startE, int endE, int countF, int zeroF, int zeroE, int unit,
bool (*ge)(int, int), bool (*lt)(int, int),
const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const;
+ bool SpanIsAligned(int index1, int index2, const boost::unordered_map< std::pair<int,int> , std::pair<int,int> > &minAndMaxAligned) const;
+
bool IsAligned(int fi, int ei) const;
static bool ge(int first, int second) { return first >= second; };
@@ -91,6 +95,7 @@ private:
HSentenceVertices m_bottomRight;
boost::unordered_map< std::pair<int,int> , std::pair<int,int> > m_minAndMaxAlignedToSourceSpan;
+ boost::unordered_map< std::pair<int,int> , std::pair<int,int> > m_minAndMaxAlignedToTargetSpan;
static std::vector<float> m_l2rOrientationPriorCounts;
static std::vector<float> m_r2lOrientationPriorCounts;