Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/FF
diff options
context:
space:
mode:
authorAles Tamchyna <tamchyna@ufal.mff.cuni.cz>2016-04-04 16:22:43 +0300
committerAles Tamchyna <tamchyna@ufal.mff.cuni.cz>2016-04-04 16:22:43 +0300
commit90f6f4d4a7cc2bc4ab32d479763387c05122f606 (patch)
treee8f00dd4adf8ab6727de05f69e4ac306c67e4239 /moses/FF
parent4f85b605c901114cce082fde55efca6541efcfd1 (diff)
vw context bigram features; support using tag subsets as features in VW
Diffstat (limited to 'moses/FF')
-rw-r--r--moses/FF/Factory.cpp6
-rw-r--r--moses/FF/VW/VWFeatureContext.h29
-rw-r--r--moses/FF/VW/VWFeatureContextBigrams.h40
-rw-r--r--moses/FF/VW/VWFeatureContextBilingual.h6
-rw-r--r--moses/FF/VW/VWFeatureContextWindow.h6
5 files changed, 73 insertions, 14 deletions
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 1f67a3aa9..dd9e5e049 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -73,8 +73,9 @@
#ifdef HAVE_VW
#include "moses/FF/VW/VW.h"
-#include "moses/FF/VW/VWFeatureContextWindow.h"
+#include "moses/FF/VW/VWFeatureContextBigrams.h"
#include "moses/FF/VW/VWFeatureContextBilingual.h"
+#include "moses/FF/VW/VWFeatureContextWindow.h"
#include "moses/FF/VW/VWFeatureSourceBagOfWords.h"
#include "moses/FF/VW/VWFeatureSourceBigrams.h"
#include "moses/FF/VW/VWFeatureSourceIndicator.h"
@@ -294,8 +295,9 @@ FeatureRegistry::FeatureRegistry()
#ifdef HAVE_VW
MOSES_FNAME(VW);
- MOSES_FNAME(VWFeatureContextWindow);
+ MOSES_FNAME(VWFeatureContextBigrams);
MOSES_FNAME(VWFeatureContextBilingual);
+ MOSES_FNAME(VWFeatureContextWindow);
MOSES_FNAME(VWFeatureSourceBagOfWords);
MOSES_FNAME(VWFeatureSourceBigrams);
MOSES_FNAME(VWFeatureSourceIndicator);
diff --git a/moses/FF/VW/VWFeatureContext.h b/moses/FF/VW/VWFeatureContext.h
index 1a19060c8..b42ddf1d3 100644
--- a/moses/FF/VW/VWFeatureContext.h
+++ b/moses/FF/VW/VWFeatureContext.h
@@ -40,7 +40,16 @@ public:
}
virtual void SetParameter(const std::string& key, const std::string& value) {
- VWFeatureBase::SetParameter(key, value);
+ if (key == "size") {
+ m_contextSize = Scan<size_t>(value);
+ } else if (key == "factor-positions") {
+ // factor positions: assuming a factor such as positional morphological tag, use this
+ // option to select only certain positions; this assumes that only a single
+ // target-side factor is defined
+ Tokenize<size_t>(m_factorPositions, value, ",");
+ } else {
+ VWFeatureBase::SetParameter(key, value);
+ }
}
size_t GetContextSize() {
@@ -55,7 +64,19 @@ protected:
// 1 = next to last word
// ...etc.
inline std::string GetWord(const Phrase &phrase, size_t posFromEnd) const {
- return phrase.GetWord(phrase.GetSize() - posFromEnd - 1).GetString(m_targetFactors, false);
+ const Word &word = phrase.GetWord(phrase.GetSize() - posFromEnd - 1);
+ if (m_factorPositions.empty()) {
+ return word.GetString(m_targetFactors, false);
+ } else {
+ if (m_targetFactors.size() != 1)
+ UTIL_THROW2("You can only use factor-positions when a single target-side factor is defined.");
+ const std::string &fullFactor = word.GetFactor(m_targetFactors[0])->GetString().as_string();
+ std::string subFactor(m_factorPositions.size(), 'x'); // initialize string with correct size and placeholder chars
+ for (size_t i = 0; i < m_factorPositions.size(); i++)
+ subFactor[i] = fullFactor[m_factorPositions[i]];
+
+ return subFactor;
+ }
}
// some target-context feature functions also look at the source
@@ -80,6 +101,10 @@ protected:
// required context size
size_t m_contextSize;
+
+ // factor positions: assuming a factor such as positional morphological tag, use this
+ // option to select only certain positions
+ std::vector<size_t> m_factorPositions;
};
}
diff --git a/moses/FF/VW/VWFeatureContextBigrams.h b/moses/FF/VW/VWFeatureContextBigrams.h
new file mode 100644
index 000000000..6d6b6cca3
--- /dev/null
+++ b/moses/FF/VW/VWFeatureContextBigrams.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <string>
+#include <algorithm>
+#include "VWFeatureContext.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureContextBigrams : public VWFeatureContext
+{
+public:
+ VWFeatureContextBigrams(const std::string &line)
+ : VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ virtual void operator()(const InputType &input
+ , const Phrase &contextPhrase
+ , const AlignmentInfo &alignmentInfo
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
+ for (size_t i = 1; i < m_contextSize; i++)
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("tcbigram^-" + SPrint(i + 1)
+ + "^" + GetWord(contextPhrase, i - 1) + "^" + GetWord(contextPhrase, i)));
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureContext::SetParameter(key, value);
+ }
+
+private:
+ static const int DEFAULT_WINDOW_SIZE = 1;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureContextBilingual.h b/moses/FF/VW/VWFeatureContextBilingual.h
index 1bf56c3d5..f681fcb78 100644
--- a/moses/FF/VW/VWFeatureContextBilingual.h
+++ b/moses/FF/VW/VWFeatureContextBilingual.h
@@ -35,11 +35,7 @@ public:
}
virtual void SetParameter(const std::string& key, const std::string& value) {
- if (key == "size") {
- m_contextSize = Scan<size_t>(value);
- } else {
- VWFeatureContext::SetParameter(key, value);
- }
+ VWFeatureContext::SetParameter(key, value);
}
private:
diff --git a/moses/FF/VW/VWFeatureContextWindow.h b/moses/FF/VW/VWFeatureContextWindow.h
index d2d920bb4..66c9c3ec5 100644
--- a/moses/FF/VW/VWFeatureContextWindow.h
+++ b/moses/FF/VW/VWFeatureContextWindow.h
@@ -29,11 +29,7 @@ public:
}
virtual void SetParameter(const std::string& key, const std::string& value) {
- if (key == "size") {
- m_contextSize = Scan<size_t>(value);
- } else {
- VWFeatureContext::SetParameter(key, value);
- }
+ VWFeatureContext::SetParameter(key, value);
}
private: