Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/FF
diff options
context:
space:
mode:
authorMichael Denkowski <mdenkows@amazon.com>2016-08-11 10:21:51 +0300
committerMichael Denkowski <mdenkows@amazon.com>2016-08-12 13:05:12 +0300
commit3aedc0bf68dfd204a5ae88bcf9d3e0da7c4028d5 (patch)
treee07d34536ccbcb509d956cee55a0255b6a496dba /moses/FF
parentae1e51d81ad450f7ee497386eea16ebe3792f68b (diff)
Standalone phrase distance feature
(Uses input coordinates populated by XML input and target phrase coordinates populated by phrase dictionary implementation)
Diffstat (limited to 'moses/FF')
-rw-r--r--moses/FF/Factory.cpp2
-rw-r--r--moses/FF/PhraseDistanceFeature.cpp123
-rw-r--r--moses/FF/PhraseDistanceFeature.h57
3 files changed, 182 insertions, 0 deletions
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 87dafdf8f..a41b8cb2c 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -30,6 +30,7 @@
#include "moses/FF/TargetBigramFeature.h"
#include "moses/FF/TargetNgramFeature.h"
#include "moses/FF/PhraseBoundaryFeature.h"
+#include "moses/FF/PhraseDistanceFeature.h"
#include "moses/FF/PhrasePairFeature.h"
#include "moses/FF/RulePairUnlexicalizedSource.h"
#include "moses/FF/PhraseLengthFeature.h"
@@ -252,6 +253,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SourceWordDeletionFeature);
MOSES_FNAME(TargetWordInsertionFeature);
MOSES_FNAME(PhraseBoundaryFeature);
+ MOSES_FNAME(PhraseDistanceFeature);
MOSES_FNAME(PhraseLengthFeature);
MOSES_FNAME(WordTranslationFeature);
MOSES_FNAME(TargetBigramFeature);
diff --git a/moses/FF/PhraseDistanceFeature.cpp b/moses/FF/PhraseDistanceFeature.cpp
new file mode 100644
index 000000000..b399ccc6c
--- /dev/null
+++ b/moses/FF/PhraseDistanceFeature.cpp
@@ -0,0 +1,123 @@
+#include "PhraseDistanceFeature.h"
+
+#include <vector>
+#include <boost/foreach.hpp>
+#include "moses/InputType.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/StaticData.h"
+#include "util/exception.hh"
+
+using namespace std;
+
+namespace Moses
+{
+PhraseDistanceFeature::PhraseDistanceFeature(const string &line)
+ : StatelessFeatureFunction(2, line)
+ , m_space("")
+ , m_spaceID(0)
+ , m_measure(EuclideanDistance)
+{
+ ReadParameters();
+}
+
+void PhraseDistanceFeature::EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedScores) const
+{
+ vector<float> scores(m_numScoreComponents, 0);
+ bool broken = false;
+ // Input coord
+ map<size_t const, vector<float> >::const_iterator ii;
+ if (input.m_coordMap) {
+ ii = input.m_coordMap->find(m_spaceID);
+ } else {
+ TRACE_ERR("No coordinates for space " << m_space << " on input (specify with coord XML tag)" << endl);
+ TRACE_ERR("Scores for " << m_description << " will be incorrect and probably all zeros" << endl);
+ broken = true;
+ }
+ if (ii == input.m_coordMap->end()) {
+ TRACE_ERR("No coordinates for space " << m_space << " on input (specify with coord XML tag)" << endl);
+ TRACE_ERR("Scores for " << m_description << " will be incorrect and probably all zeros" << endl);
+ broken = true;
+ }
+ // Target phrase coord
+ vector<SPTR<vector<float> > > const* tpp = targetPhrase.GetCoordList(m_spaceID);
+ if (tpp == NULL) {
+ TRACE_ERR("No coordinates for space " << m_space << " on target phrase (PhraseDictionary implementation needs to set)" << endl);
+ TRACE_ERR("Scores for " << m_description << " will be incorrect and probably all zeros" << endl);
+ broken = true;
+ }
+ // Compute scores
+ if (!broken) {
+ vector<float> const& inputCoord = ii->second;
+ vector<SPTR<vector<float> > > const& tpCoord = *tpp;
+ // Centroid of target phrase instances (from phrase extraction)
+ vector<float> centroid = vector<float>(inputCoord.size(), 0);
+ BOOST_FOREACH(SPTR<vector<float> > const coord, tpCoord) {
+ for (size_t i = 0; i < inputCoord.size(); ++i) {
+ centroid[i] += (*coord)[i];
+ }
+ }
+ for (size_t i = 0; i < inputCoord.size(); ++i) {
+ centroid[i] /= tpCoord.size();
+ }
+ // Average distance from the target phrase instances to (1) the input and
+ // (2) the target phrase centroid
+ float inputDistance = 0;
+ float centroidDistance = 0;
+ if (m_measure == EuclideanDistance) {
+ BOOST_FOREACH(SPTR<vector<float> > const coord, tpCoord) {
+ float pointInputDistance = 0;
+ float pointCentroidDistance = 0;
+ for (size_t i = 0; i < inputCoord.size(); ++i) {
+ pointInputDistance += pow(inputCoord[i] - (*coord)[i], 2);
+ pointCentroidDistance += pow(centroid[i] - (*coord)[i], 2);
+ }
+ inputDistance += sqrt(pointInputDistance);
+ centroidDistance += sqrt(pointCentroidDistance);
+ }
+ } else if (m_measure == TotalVariationDistance) {
+ BOOST_FOREACH(SPTR<vector<float> > const coord, tpCoord) {
+ float pointInputDistance = 0;
+ float pointCentroidDistance = 0;
+ for (size_t i = 0; i < inputCoord.size(); ++i) {
+ pointInputDistance += abs(inputCoord[i] - (*coord)[i]);
+ pointCentroidDistance += abs(centroid[i] - (*coord)[i]);
+ }
+ inputDistance += pointInputDistance / 2;
+ centroidDistance += pointCentroidDistance / 2;
+ }
+ }
+ inputDistance /= tpCoord.size();
+ centroidDistance /= tpCoord.size();
+ // Log transform scores, max with float epsilon to avoid domain error
+ scores[0] = log(max(inputDistance, Moses::FLOAT_EPSILON));
+ scores[1] = log(max(centroidDistance, Moses::FLOAT_EPSILON));
+ }
+ // Set scores
+ scoreBreakdown.Assign(this, scores);
+ return;
+}
+
+void PhraseDistanceFeature::SetParameter(const string& key, const string& value)
+{
+ if (key == "space") {
+ m_space = value;
+ m_spaceID = StaticData::InstanceNonConst().MapCoordSpace(m_space);
+ } else if (key == "measure") {
+ if (value == "euc") {
+ m_measure = EuclideanDistance;
+ } else if (value == "var") {
+ m_measure = TotalVariationDistance;
+ } else {
+ UTIL_THROW2("Unknown measure " << value << ", choices: euc var");
+ }
+ } else {
+ StatelessFeatureFunction::SetParameter(key, value);
+ }
+}
+
+} // namespace
diff --git a/moses/FF/PhraseDistanceFeature.h b/moses/FF/PhraseDistanceFeature.h
new file mode 100644
index 000000000..b411289cb
--- /dev/null
+++ b/moses/FF/PhraseDistanceFeature.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#include "StatelessFeatureFunction.h"
+
+namespace Moses
+{
+
+class PhraseDistanceFeature : public StatelessFeatureFunction
+{
+ enum Measure
+ {
+ EuclideanDistance,
+ TotalVariationDistance,
+ };
+
+public:
+ PhraseDistanceFeature(const std::string &line);
+
+ bool IsUseable(const FactorMask &mask) const {
+ return true;
+ }
+
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedScores) const {
+ }
+
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const {
+ }
+ void EvaluateWhenApplied(const Syntax::SHyperedge &hyperedge,
+ ScoreComponentCollection* accumulator) const {
+ }
+
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedScores = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ }
+ void SetParameter(const std::string& key, const std::string& value);
+
+protected:
+ Measure m_measure;
+ std::string m_space;
+ size_t m_spaceID;
+};
+
+} //namespace