towards using aligned source words in context features

author: Ales Tamchyna <tamchyna@ufal.mff.cuni.cz> 2016-03-24 17:07:48 +0300
committer: Ales Tamchyna <tamchyna@ufal.mff.cuni.cz> 2016-03-24 17:07:48 +0300
commit: 25363528759959d9eb19a3bfff184de8a90e3817 (patch)
tree: 260a89c750032a0477f0b37bcf266ea5b7b71204 /moses/FF
parent: 4f807c47c81605de29bd91a482fb57dc913881a5 (diff)
2 files changed, 37 insertions, 2 deletions
diff --git a/moses/FF/VW/VW.cpp b/moses/FF/VW/VW.cpp
index 061a8536c..5111ff8ed 100644
--- a/moses/FF/VW/VW.cpp
+++ b/moses/FF/VW/VW.cpp
@@ -15,6 +15,7 @@
 #include "moses/StaticData.h"
 #include "moses/Phrase.h"
 #include "moses/AlignmentInfo.h"
+#include "moses/AlignmentInfoCollection.h"
 #include "moses/Word.h"
 #include "moses/FactorCollection.h"
 
@@ -118,9 +119,9 @@ FFState* VW::EvaluateWhenApplied(
 
       const Phrase &targetContext = prevVWState.GetPhrase();
       Discriminative::FeatureVector contextVector;
-      AlignmentInfo alignInfo("");
+      const AlignmentInfo *alignInfo = TransformAlignmentInfo(curHypo, targetContext.GetSize());
       for(size_t i = 0; i < contextFeatures.size(); ++i)
-        (*contextFeatures[i])(input, targetContext, alignInfo, classifier, contextVector);
+        (*contextFeatures[i])(input, targetContext, *alignInfo, classifier, contextVector);
 
       contextFeaturesCache[contextHash] = contextVector;
       VERBOSE(3, "VW :: context cache miss\n");
@@ -449,6 +450,35 @@ void VW::InitializeForInput(ttasksptr const& ttask) {
   targetSent.SetConstraints(source.GetSize());
 }
 
+/*************************************************************************************
+ * private methods
+ ************************************************************************************/
+
+const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const {
+  std::set<std::pair<size_t, size_t> > alignmentPoints;
+  const Hypothesis *contextHypo = curHypo.GetPrevHypo();
+  int idxInContext = contextSize - 1;
+  int processedWordsInHypo = 0;
+  while (idxInContext >= 0 && contextHypo) {
+    int idxInHypo = contextHypo->GetCurrTargetLength() - 1 - processedWordsInHypo;
+    if (idxInHypo >= 0) {
+      const AlignmentInfo &hypoAlign = contextHypo->GetCurrTargetPhrase().GetAlignTerm();
+      std::set<size_t> alignedToTgt = hypoAlign.GetAlignmentsForTarget(idxInHypo);
+      size_t srcOffset = contextHypo->GetCurrSourceWordsRange().GetStartPos();
+      BOOST_FOREACH(size_t srcIdx, alignedToTgt) {
+        alignmentPoints.insert(std::make_pair(srcOffset + srcIdx, idxInContext));
+      }
+      processedWordsInHypo++;
+      idxInContext--;
+    } else {
+      processedWordsInHypo = 0;
+      contextHypo = contextHypo->GetPrevHypo();
+    }
+  }
+
+  return AlignmentInfoCollection::Instance().Add(alignmentPoints);
+}
+
 std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &topt) const {
 
   //std::cerr << topt.GetSourceWordsRange() << std::endl;
diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h
index 57fb66782..ea17188f7 100644
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@@ -148,6 +148,11 @@ private:
     return key;
   }
 
+  // used in decoding to transform the global word alignment information into
+  // context-phrase internal alignment information (i.e., with target indices correspoding
+  // to positions in contextPhrase)
+  const AlignmentInfo *TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const;
+
   // At training time, determine whether a translation option is correct for the current target sentence
   // based on word alignment. This is a bit complicated because we need to handle various corner-cases
   // where some word(s) on phrase borders are unaligned.
author	Ales Tamchyna <tamchyna@ufal.mff.cuni.cz>	2016-03-24 17:07:48 +0300
committer	Ales Tamchyna <tamchyna@ufal.mff.cuni.cz>	2016-03-24 17:07:48 +0300
commit	25363528759959d9eb19a3bfff184de8a90e3817 (patch)
tree	260a89c750032a0477f0b37bcf266ea5b7b71204 /moses/FF
parent	4f807c47c81605de29bd91a482fb57dc913881a5 (diff)