Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/FF
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-06-01 19:36:43 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-06-01 19:36:43 +0300
commit73ffe51bd8b49d3f8ddc593979ee595061ef51a7 (patch)
treed83643705539886d0d802d8e84e1b59b4c165d6b /moses/FF
parent33932a0fc98c158e63818b7482d517b77b155f93 (diff)
Added Grammatical Error Correction specific scorer (M^2) and features
Diffstat (limited to 'moses/FF')
-rw-r--r--moses/FF/CorrectionPattern.cpp363
-rw-r--r--moses/FF/CorrectionPattern.h73
-rw-r--r--moses/FF/Diffs.h139
-rw-r--r--moses/FF/EditOps.cpp119
-rw-r--r--moses/FF/EditOps.h64
-rw-r--r--moses/FF/Factory.cpp6
6 files changed, 764 insertions, 0 deletions
diff --git a/moses/FF/CorrectionPattern.cpp b/moses/FF/CorrectionPattern.cpp
new file mode 100644
index 000000000..04a62b0ec
--- /dev/null
+++ b/moses/FF/CorrectionPattern.cpp
@@ -0,0 +1,363 @@
+#include <sstream>
+#include "CorrectionPattern.h"
+#include "moses/Phrase.h"
+#include "moses/TargetPhrase.h"
+#include "moses/InputPath.h"
+#include "moses/Hypothesis.h"
+#include "moses/ChartHypothesis.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/TranslationOption.h"
+#include "util/string_piece_hash.hh"
+#include "util/exception.hh"
+
+#include <functional>
+#include <algorithm>
+
+#include <boost/foreach.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "Diffs.h"
+
+namespace Moses
+{
+
+using namespace std;
+
+std::string MakePair(const std::string &s1, const std::string &s2, bool general) {
+ std::vector<std::string> sourceList;
+ std::vector<std::string> targetList;
+
+ if(general) {
+ Diffs diffs = CreateDiff(s1, s2);
+
+ size_t i = 0, j = 0;
+ char lastType = 'm';
+
+ std::string source, target;
+ std::string match;
+
+ int count = 1;
+
+ BOOST_FOREACH(Diff type, diffs) {
+ if(type == 'm') {
+ if(lastType != 'm') {
+ sourceList.push_back(source);
+ targetList.push_back(target);
+ }
+ source.clear();
+ target.clear();
+
+ if(s1[i] == '+') {
+ if(match.size() >= 3) {
+ sourceList.push_back("(\\w{3,})·");
+ std::string temp = "1";
+ sprintf((char*)temp.c_str(), "%d", count);
+ targetList.push_back("\\" + temp + "·");
+ count++;
+ }
+ else {
+ sourceList.push_back(match + "·");
+ targetList.push_back(match + "·");
+ }
+ match.clear();
+ }
+ else
+ match.push_back(s1[i]);
+
+ i++;
+ j++;
+ }
+ else if(type == 'd') {
+ if(s1[i] == '+')
+ source += "·";
+ else
+ source.push_back(s1[i]);
+ i++;
+ }
+ else if(type == 'i') {
+ if(s2[j] == '+')
+ target += "·";
+ else
+ target.push_back(s2[j]);
+ j++;
+ }
+ if(type != 'm' && !match.empty()) {
+ if(match.size() >= 3) {
+ sourceList.push_back("(\\w{3,})");
+ std::string temp = "1";
+ sprintf((char*)temp.c_str(), "%d", count);
+ targetList.push_back("\\" + temp);
+ count++;
+ }
+ else {
+ sourceList.push_back(match);
+ targetList.push_back(match);
+ }
+
+ match.clear();
+ }
+
+ lastType = type;
+ }
+ if(lastType != 'm') {
+ sourceList.push_back(source);
+ targetList.push_back(target);
+ }
+
+ if(!match.empty()) {
+ if(match.size() >= 3) {
+ sourceList.push_back("(\\w{3,})");
+ std::string temp = "1";
+ sprintf((char*)temp.c_str(), "%d", count);
+ targetList.push_back("\\"+ temp);
+ count++;
+ }
+ else {
+ sourceList.push_back(match);
+ targetList.push_back(match);
+ }
+ }
+ match.clear();
+ }
+ else {
+ std::string cs1 = s1;
+ std::string cs2 = s2;
+ boost::replace_all(cs1, "+", "·");
+ boost::replace_all(cs2, "+", "·");
+
+ sourceList.push_back(cs1);
+ targetList.push_back(cs2);
+ }
+
+ std::stringstream out;
+ out << "sub(«";
+ out << boost::join(sourceList, "");
+ out << "»,«";
+ out << boost::join(targetList, "");
+ out << "»)";
+
+ return out.str();
+}
+
+std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const {
+ std::stringstream out;
+ if(s1.empty()) {
+ out << "ins(«" << boost::join(s2, "·") << "»)";
+ return out.str();
+ }
+ else if(s2.empty()) {
+ out << "del(«" << boost::join(s1, "·") << "»)";
+ return out.str();
+ }
+ else {
+ typename Tokens::value_type v1 = boost::join(s1, "+");
+ typename Tokens::value_type v2 = boost::join(s2, "+");
+ out << MakePair(v1, v2, m_general);
+ return out.str();
+ }
+}
+
+std::vector<std::string> GetContext(size_t pos,
+ size_t len,
+ size_t window,
+ const InputType &input,
+ const InputPath &inputPath,
+ const std::vector<FactorType>& factorTypes,
+ bool isRight) {
+
+ const Sentence& sentence = static_cast<const Sentence&>(input);
+ const Range& range = inputPath.GetWordsRange();
+
+ int leftPos = range.GetStartPos() + pos - len - 1;
+ int rightPos = range.GetStartPos() + pos;
+
+ std::vector<std::string> contexts;
+
+ for(int length = 1; length <= (int)window; ++length) {
+ std::vector<std::string> current;
+ if(!isRight) {
+ for(int i = 0; i < length; i++) {
+ if(leftPos - i >= 0) {
+ current.push_back(sentence.GetWord(leftPos - i).GetString(factorTypes, false));
+ }
+ else {
+ current.push_back("<s>");
+ }
+ }
+
+ if(current.back() == "<s>" && current.size() >= 2 && current[current.size()-2] == "<s>")
+ continue;
+
+ std::reverse(current.begin(), current.end());
+ contexts.push_back("left(«" + boost::join(current, "·") + "»)_");
+ }
+ if(isRight) {
+ for(int i = 0; i < length; i++) {
+ if(rightPos + i < (int)sentence.GetSize()) {
+ current.push_back(sentence.GetWord(rightPos + i).GetString(factorTypes, false));
+ }
+ else {
+ current.push_back("</s>");
+ }
+ }
+
+ if(current.back() == "</s>" && current.size() >= 2 && current[current.size()-2] == "</s>")
+ continue;
+
+ contexts.push_back("_right(«" + boost::join(current, "·") + "»)");
+ }
+ }
+ return contexts;
+}
+
+std::vector<std::string>
+CorrectionPattern::CreatePattern(const Tokens &s1,
+ const Tokens &s2,
+ const InputType &input,
+ const InputPath &inputPath) const {
+
+ Diffs diffs = CreateDiff(s1, s2);
+ size_t i = 0, j = 0;
+ char lastType = 'm';
+ std::vector<std::string> patternList;
+ Tokens source, target;
+ BOOST_FOREACH(Diff type, diffs) {
+ if(type == 'm') {
+ if(lastType != 'm') {
+ std::string pattern = CreateSinglePattern(source, target);
+ patternList.push_back(pattern);
+
+ if(m_context > 0) {
+ std::vector<std::string> leftContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, false);
+ std::vector<std::string> rightContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, true);
+
+ BOOST_FOREACH(std::string left, leftContexts)
+ patternList.push_back(left + pattern);
+
+ BOOST_FOREACH(std::string right, rightContexts)
+ patternList.push_back(pattern + right);
+
+ BOOST_FOREACH(std::string left, leftContexts)
+ BOOST_FOREACH(std::string right, rightContexts)
+ patternList.push_back(left + pattern + right);
+ }
+ }
+ source.clear();
+ target.clear();
+ if(s1[i] != s2[j]) {
+ source.push_back(s1[i]);
+ target.push_back(s2[j]);
+ }
+ i++;
+ j++;
+ }
+ else if(type == 'd') {
+ source.push_back(s1[i]);
+ i++;
+ }
+ else if(type == 'i') {
+ target.push_back(s2[j]);
+ j++;
+ }
+ lastType = type;
+ }
+ if(lastType != 'm') {
+ std::string pattern = CreateSinglePattern(source, target);
+ patternList.push_back(pattern);
+
+ if(m_context > 0) {
+ std::vector<std::string> leftContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, false);
+ std::vector<std::string> rightContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, true);
+
+ BOOST_FOREACH(std::string left, leftContexts)
+ patternList.push_back(left + pattern);
+
+ BOOST_FOREACH(std::string right, rightContexts)
+ patternList.push_back(pattern + right);
+
+ BOOST_FOREACH(std::string left, leftContexts)
+ BOOST_FOREACH(std::string right, rightContexts)
+ patternList.push_back(left + pattern + right);
+ }
+ }
+
+ return patternList;
+}
+
+CorrectionPattern::CorrectionPattern(const std::string &line)
+ : StatelessFeatureFunction(0, line), m_factors(1, 0), m_general(false),
+ m_context(0), m_contextFactors(1, 0)
+{
+ std::cerr << "Initializing correction pattern feature.." << std::endl;
+ ReadParameters();
+}
+
+void CorrectionPattern::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "factor") {
+ m_factors = std::vector<FactorType>(1, Scan<FactorType>(value));
+ } else if (key == "context-factor") {
+ m_contextFactors = std::vector<FactorType>(1, Scan<FactorType>(value));
+ } else if (key == "general") {
+ m_general = Scan<bool>(value);
+ } else if (key == "context") {
+ m_context = Scan<size_t>(value);
+ } else {
+ StatelessFeatureFunction::SetParameter(key, value);
+ }
+}
+
+void CorrectionPattern::EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore) const
+{
+ ComputeFeatures(input, inputPath, targetPhrase, &scoreBreakdown);
+}
+
+void CorrectionPattern::ComputeFeatures(
+ const InputType &input,
+ const InputPath &inputPath,
+ const TargetPhrase& target,
+ ScoreComponentCollection* accumulator) const
+{
+ const Phrase &source = inputPath.GetPhrase();
+
+ std::vector<std::string> sourceTokens;
+ for(size_t i = 0; i < source.GetSize(); ++i)
+ sourceTokens.push_back(source.GetWord(i).GetString(m_factors, false));
+
+ std::vector<std::string> targetTokens;
+ for(size_t i = 0; i < target.GetSize(); ++i)
+ targetTokens.push_back(target.GetWord(i).GetString(m_factors, false));
+
+ std::vector<std::string> patternList = CreatePattern(sourceTokens, targetTokens, input, inputPath);
+ for(size_t i = 0; i < patternList.size(); ++i)
+ accumulator->PlusEquals(this, patternList[i], 1);
+
+ /*
+ BOOST_FOREACH(std::string w, sourceTokens)
+ std::cerr << w << " ";
+ std::cerr << std::endl;
+ BOOST_FOREACH(std::string w, targetTokens)
+ std::cerr << w << " ";
+ std::cerr << std::endl;
+ BOOST_FOREACH(std::string w, patternList)
+ std::cerr << w << " ";
+ std::cerr << std::endl << std::endl;
+ */
+}
+
+bool CorrectionPattern::IsUseable(const FactorMask &mask) const
+{
+ bool ret = true;
+ for(size_t i = 0; i < m_factors.size(); ++i)
+ ret = ret && mask[m_factors[i]];
+ for(size_t i = 0; i < m_contextFactors.size(); ++i)
+ ret = ret && mask[m_contextFactors[i]];
+ return ret;
+}
+
+}
diff --git a/moses/FF/CorrectionPattern.h b/moses/FF/CorrectionPattern.h
new file mode 100644
index 000000000..63ca125c6
--- /dev/null
+++ b/moses/FF/CorrectionPattern.h
@@ -0,0 +1,73 @@
+#ifndef moses_CorrectionPattern_h
+#define moses_CorrectionPattern_h
+
+#include <string>
+#include <boost/unordered_set.hpp>
+
+#include "StatelessFeatureFunction.h"
+#include "moses/FactorCollection.h"
+#include "moses/AlignmentInfo.h"
+
+namespace Moses
+{
+
+typedef std::vector<std::string> Tokens;
+
+/** Sets the features for length of source phrase, target phrase, both.
+ */
+class CorrectionPattern : public StatelessFeatureFunction
+{
+private:
+ std::vector<FactorType> m_factors;
+ bool m_general;
+ size_t m_context;
+ std::vector<FactorType> m_contextFactors;
+
+public:
+ CorrectionPattern(const std::string &line);
+
+ bool IsUseable(const FactorMask &mask) const;
+
+ void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
+ {}
+
+ virtual void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const
+ {}
+
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const
+ {}
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const
+ {}
+
+ void ComputeFeatures(const InputType &input,
+ const InputPath &inputPath,
+ const TargetPhrase& targetPhrase,
+ ScoreComponentCollection* accumulator) const;
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+ std::vector<std::string> CreatePattern(const Tokens &s1,
+ const Tokens &s2,
+ const InputType &input,
+ const InputPath &inputPath) const;
+
+ std::string CreateSinglePattern(const Tokens &s1, const Tokens &s2) const;
+
+};
+
+}
+
+#endif // moses_CorrectionPattern_h
diff --git a/moses/FF/Diffs.h b/moses/FF/Diffs.h
new file mode 100644
index 000000000..bf0a7cefc
--- /dev/null
+++ b/moses/FF/Diffs.h
@@ -0,0 +1,139 @@
+#ifndef moses_Diffs_h
+#define moses_Diffs_h
+
+#include <cmath>
+
+namespace Moses
+{
+
+typedef char Diff;
+typedef std::vector<Diff> Diffs;
+
+template <class Sequence, class Pred>
+void CreateDiffRec(size_t** c,
+ const Sequence &s1,
+ const Sequence &s2,
+ size_t start,
+ size_t i,
+ size_t j,
+ Diffs& diffs,
+ Pred pred) {
+ if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
+ CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
+ diffs.push_back(Diff('m'));
+ }
+ else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
+ CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
+ diffs.push_back(Diff('i'));
+ }
+ else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
+ CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
+ diffs.push_back(Diff('d'));
+ }
+}
+
+template <class Sequence, class Pred>
+Diffs CreateDiff(const Sequence& s1,
+ const Sequence& s2,
+ Pred pred) {
+
+ Diffs diffs;
+
+ size_t n = s2.size();
+
+ int start = 0;
+ int m_end = s1.size() - 1;
+ int n_end = s2.size() - 1;
+
+ while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) {
+ diffs.push_back(Diff('m'));
+ start++;
+ }
+ while(start <= m_end && start <= n_end && pred(s1[m_end], s2[n_end])) {
+ m_end--;
+ n_end--;
+ }
+
+ size_t m_new = m_end - start + 1;
+ size_t n_new = n_end - start + 1;
+
+ size_t** c = new size_t*[m_new + 1];
+ for(size_t i = 0; i <= m_new; ++i) {
+ c[i] = new size_t[n_new + 1];
+ c[i][0] = 0;
+ }
+ for(size_t j = 0; j <= n_new; ++j)
+ c[0][j] = 0;
+ for(size_t i = 1; i <= m_new; ++i)
+ for(size_t j = 1; j <= n_new; ++j)
+ if(pred(s1[i - 1 + start], s2[j - 1 + start]))
+ c[i][j] = c[i-1][j-1] + 1;
+ else
+ c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j];
+
+ CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred);
+
+ for(size_t i = 0; i <= m_new; ++i)
+ delete[] c[i];
+ delete[] c;
+
+ for (size_t i = n_end + 1; i < n; ++i)
+ diffs.push_back(Diff('m'));
+
+ return diffs;
+}
+
+template <class Sequence>
+Diffs CreateDiff(const Sequence& s1, const Sequence& s2) {
+ return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
+}
+
+template <class Sequence, class Sig, class Stats>
+void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats) {
+ if(sig.size() != stats.size())
+ throw "Signature size differs from score array size.";
+
+ size_t m = 0, d = 0, i = 0, s = 0;
+ Diffs diff = CreateDiff(s1, s2);
+
+ for(int j = 0; j < (int)diff.size(); ++j) {
+ if(diff[j] == 'm')
+ m++;
+ else if(diff[j] == 'd') {
+ d++;
+ int k = 0;
+ while(j - k >= 0 && j + 1 + k < (int)diff.size() &&
+ diff[j - k] == 'd' && diff[j + 1 + k] == 'i') {
+ d--;
+ s++;
+ k++;
+ }
+ j += k;
+ }
+ else if(diff[j] == 'i')
+ i++;
+ }
+
+ for(size_t j = 0; j < sig.size(); ++j) {
+ switch (sig[j]) {
+ case 'l': stats[j] += d + i + s; break;
+ case 'm': stats[j] += m; break;
+ case 'd': stats[j] += d; break;
+ case 'i': stats[j] += i; break;
+ case 's': stats[j] += s; break;
+ case 'r':
+ float macc = 1;
+ if (d + i + s + m)
+ macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m);
+ if(macc > 0)
+ stats[j] += log(macc);
+ else
+ stats[j] += log(1.0/(float)(d + i + s + m + 1));
+ break;
+ }
+ }
+}
+
+}
+
+#endif
diff --git a/moses/FF/EditOps.cpp b/moses/FF/EditOps.cpp
new file mode 100644
index 000000000..fdca93963
--- /dev/null
+++ b/moses/FF/EditOps.cpp
@@ -0,0 +1,119 @@
+#include <sstream>
+#include "EditOps.h"
+#include "moses/Phrase.h"
+#include "moses/TargetPhrase.h"
+#include "moses/Hypothesis.h"
+#include "moses/ChartHypothesis.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/TranslationOption.h"
+#include "util/string_piece_hash.hh"
+#include "util/exception.hh"
+
+#include <functional>
+
+#include <boost/foreach.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "Diffs.h"
+
+namespace Moses
+{
+
+using namespace std;
+
+std::string ParseScores(const std::string &line, const std::string& defaultScores) {
+ std::vector<std::string> toks = Tokenize(line);
+ UTIL_THROW_IF2(toks.empty(), "Empty line");
+
+ for (size_t i = 1; i < toks.size(); ++i) {
+ std::vector<std::string> args = TokenizeFirstOnly(toks[i], "=");
+ UTIL_THROW_IF2(args.size() != 2,
+ "Incorrect format for feature function arg: " << toks[i]);
+
+ if (args[0] == "scores") {
+ return args[1];
+ }
+ }
+ return defaultScores;
+}
+
+EditOps::EditOps(const std::string &line)
+ : StatelessFeatureFunction(ParseScores(line, "dis").size(), line)
+ , m_factorType(0), m_chars(false), m_scores(ParseScores(line, "dis"))
+{
+ std::cerr << "Initializing EditOps feature.." << std::endl;
+ ReadParameters();
+}
+
+void EditOps::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "factor") {
+ m_factorType = Scan<FactorType>(value);
+ } else if (key == "chars") {
+ m_chars = Scan<bool>(value);
+ } else if (key == "scores") {
+ m_scores = value;
+ } else {
+ StatelessFeatureFunction::SetParameter(key, value);
+ }
+}
+
+void EditOps::Load()
+{ }
+
+void EditOps::EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &target
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const
+{
+ ComputeFeatures(source, target, &scoreBreakdown);
+}
+
+void EditOps::ComputeFeatures(
+ const Phrase &source,
+ const TargetPhrase& target,
+ ScoreComponentCollection* accumulator) const
+{
+ std::vector<float> ops(GetNumScoreComponents(), 0);
+
+ if(m_chars) {
+ std::vector<FactorType> factors;
+ factors.push_back(m_factorType);
+
+ std::string sourceStr = source.GetStringRep(factors);
+ std::string targetStr = target.GetStringRep(factors);
+
+ AddStats(sourceStr, targetStr, m_scores, ops);
+ }
+ else {
+ std::vector<std::string> sourceTokens;
+ //std::cerr << "Ed src: ";
+ for(size_t i = 0; i < source.GetSize(); ++i) {
+ if(!source.GetWord(i).IsNonTerminal())
+ sourceTokens.push_back(source.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
+ //std::cerr << sourceTokens.back() << " ";
+ }
+ //std::cerr << std::endl;
+
+ std::vector<std::string> targetTokens;
+ //std::cerr << "Ed trg: ";
+ for(size_t i = 0; i < target.GetSize(); ++i) {
+ if(!target.GetWord(i).IsNonTerminal())
+ targetTokens.push_back(target.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
+ //std::cerr << targetTokens.back() << " ";
+ }
+ //std::cerr << std::endl;
+
+ AddStats(sourceTokens, targetTokens, m_scores, ops);
+ }
+
+ accumulator->PlusEquals(this, ops);
+}
+
+bool EditOps::IsUseable(const FactorMask &mask) const
+{
+ bool ret = mask[m_factorType];
+ return ret;
+}
+
+}
diff --git a/moses/FF/EditOps.h b/moses/FF/EditOps.h
new file mode 100644
index 000000000..b1a1cef7e
--- /dev/null
+++ b/moses/FF/EditOps.h
@@ -0,0 +1,64 @@
+#ifndef moses_EditOps_h
+#define moses_EditOps_h
+
+#include <string>
+#include <boost/unordered_set.hpp>
+
+#include "StatelessFeatureFunction.h"
+#include "moses/FactorCollection.h"
+#include "moses/AlignmentInfo.h"
+
+namespace Moses
+{
+
+typedef std::vector<std::string> Tokens;
+
+/** Calculates string edit operations that transform source phrase into target
+ * phrase using the LCS algorithm. Potentially usefule for monolingual tasks
+ * like paraphrasing, summarization, correction.
+ */
+class EditOps : public StatelessFeatureFunction
+{
+private:
+ FactorType m_factorType;
+ bool m_chars;
+ std::string m_scores;
+
+public:
+ EditOps(const std::string &line);
+
+ bool IsUseable(const FactorMask &mask) const;
+
+ void Load();
+
+ virtual void EvaluateInIsolation(const Phrase &source
+ , const TargetPhrase &targetPhrase
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection &estimatedFutureScore) const;
+
+ void EvaluateWithSourceContext(const InputType &input
+ , const InputPath &inputPath
+ , const TargetPhrase &targetPhrase
+ , const StackVec *stackVec
+ , ScoreComponentCollection &scoreBreakdown
+ , ScoreComponentCollection *estimatedFutureScore = NULL) const
+ {}
+ void EvaluateWhenApplied(const Hypothesis& hypo,
+ ScoreComponentCollection* accumulator) const
+ {}
+ void EvaluateWhenApplied(const ChartHypothesis &hypo,
+ ScoreComponentCollection* accumulator) const
+ {}
+ void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const
+ {}
+
+ void ComputeFeatures(const Phrase &source,
+ const TargetPhrase& targetPhrase,
+ ScoreComponentCollection* accumulator) const;
+ void SetParameter(const std::string& key, const std::string& value);
+};
+
+}
+
+#endif // moses_CorrectionPattern_h
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 048981d04..537b43bc5 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -72,6 +72,9 @@
#include "moses/Syntax/InputWeightFF.h"
#include "moses/Syntax/RuleTableFF.h"
+#include "moses/FF/EditOps.h"
+#include "moses/FF/CorrectionPattern.h"
+
#ifdef HAVE_VW
#include "moses/FF/VW/VW.h"
#include "moses/FF/VW/VWFeatureSourceBagOfWords.h"
@@ -292,6 +295,9 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SkeletonTranslationOptionListFeature);
MOSES_FNAME(SkeletonPT);
+ MOSES_FNAME(EditOps);
+ MOSES_FNAME(CorrectionPattern);
+
#ifdef HAVE_VW
MOSES_FNAME(VW);
MOSES_FNAME(VWFeatureSourceBagOfWords);