Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/FF
diff options
context:
space:
mode:
authorAles Tamchyna <tamchyna@ufal.mff.cuni.cz>2016-06-07 18:14:16 +0300
committerAles Tamchyna <tamchyna@ufal.mff.cuni.cz>2016-06-07 18:14:16 +0300
commit44c9e6db17118874756854b5f208f8cccf5da255 (patch)
treedec8b58e12cbcf642ea07e942d20a64086f6c8b3 /moses/FF
parentdefbf8d7c3f9ed5b656488fe61b46f4e759182d8 (diff)
parent7c5bb9328caf4f23133355251b938e0714e89a7a (diff)
Merge branch 'vw_tgtcontext' of github.com:moses-smt/mosesdecoder
Diffstat (limited to 'moses/FF')
-rw-r--r--moses/FF/Factory.cpp6
-rw-r--r--moses/FF/VW/AlignmentConstraint.h40
-rw-r--r--moses/FF/VW/VW.cpp627
-rw-r--r--moses/FF/VW/VW.h534
-rw-r--r--moses/FF/VW/VWFeatureBase.cpp14
-rw-r--r--moses/FF/VW/VWFeatureBase.h57
-rw-r--r--moses/FF/VW/VWFeatureContext.h116
-rw-r--r--moses/FF/VW/VWFeatureContextBigrams.h40
-rw-r--r--moses/FF/VW/VWFeatureContextBilingual.h45
-rw-r--r--moses/FF/VW/VWFeatureContextWindow.h39
-rw-r--r--moses/FF/VW/VWFeatureSource.h13
-rw-r--r--moses/FF/VW/VWFeatureSourceBagOfWords.h6
-rw-r--r--moses/FF/VW/VWFeatureSourceBigrams.h6
-rw-r--r--moses/FF/VW/VWFeatureSourceExternalFeatures.h6
-rw-r--r--moses/FF/VW/VWFeatureSourceIndicator.h6
-rw-r--r--moses/FF/VW/VWFeatureSourcePhraseInternal.h6
-rw-r--r--moses/FF/VW/VWFeatureSourceSenseWindow.h16
-rw-r--r--moses/FF/VW/VWFeatureSourceWindow.h8
-rw-r--r--moses/FF/VW/VWFeatureTarget.h13
-rw-r--r--moses/FF/VW/VWFeatureTargetBigrams.h6
-rw-r--r--moses/FF/VW/VWFeatureTargetIndicator.h6
-rw-r--r--moses/FF/VW/VWFeatureTargetPhraseInternal.h6
-rw-r--r--moses/FF/VW/VWFeatureTargetPhraseScores.h6
-rw-r--r--moses/FF/VW/VWState.cpp70
-rw-r--r--moses/FF/VW/VWState.h54
-rw-r--r--moses/FF/VW/VWTargetSentence.h54
26 files changed, 1327 insertions, 473 deletions
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 537b43bc5..87dafdf8f 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -77,6 +77,9 @@
#ifdef HAVE_VW
#include "moses/FF/VW/VW.h"
+#include "moses/FF/VW/VWFeatureContextBigrams.h"
+#include "moses/FF/VW/VWFeatureContextBilingual.h"
+#include "moses/FF/VW/VWFeatureContextWindow.h"
#include "moses/FF/VW/VWFeatureSourceBagOfWords.h"
#include "moses/FF/VW/VWFeatureSourceBigrams.h"
#include "moses/FF/VW/VWFeatureSourceIndicator.h"
@@ -300,6 +303,9 @@ FeatureRegistry::FeatureRegistry()
#ifdef HAVE_VW
MOSES_FNAME(VW);
+ MOSES_FNAME(VWFeatureContextBigrams);
+ MOSES_FNAME(VWFeatureContextBilingual);
+ MOSES_FNAME(VWFeatureContextWindow);
MOSES_FNAME(VWFeatureSourceBagOfWords);
MOSES_FNAME(VWFeatureSourceBigrams);
MOSES_FNAME(VWFeatureSourceIndicator);
diff --git a/moses/FF/VW/AlignmentConstraint.h b/moses/FF/VW/AlignmentConstraint.h
new file mode 100644
index 000000000..28ba7d4f3
--- /dev/null
+++ b/moses/FF/VW/AlignmentConstraint.h
@@ -0,0 +1,40 @@
+#pragma once
+
+namespace Moses
+{
+
+/**
+ * Helper class for storing alignment constraints.
+ */
+class AlignmentConstraint
+{
+public:
+ AlignmentConstraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
+
+ AlignmentConstraint(int min, int max) : m_min(min), m_max(max) {}
+
+ /**
+ * We are aligned to point => our min cannot be larger, our max cannot be smaller.
+ */
+ void Update(int point) {
+ if (m_min > point) m_min = point;
+ if (m_max < point) m_max = point;
+ }
+
+ bool IsSet() const {
+ return m_max != -1;
+ }
+
+ int GetMin() const {
+ return m_min;
+ }
+
+ int GetMax() const {
+ return m_max;
+ }
+
+private:
+ int m_min, m_max;
+};
+
+}
diff --git a/moses/FF/VW/VW.cpp b/moses/FF/VW/VW.cpp
new file mode 100644
index 000000000..dfb732c8f
--- /dev/null
+++ b/moses/FF/VW/VW.cpp
@@ -0,0 +1,627 @@
+#include <string>
+#include <map>
+#include <limits>
+#include <vector>
+
+#include <boost/unordered_map.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/PP/CountsPhraseProperty.h"
+#include "moses/TranslationOptionList.h"
+#include "moses/TranslationOption.h"
+#include "moses/Util.h"
+#include "moses/TypeDef.h"
+#include "moses/StaticData.h"
+#include "moses/Phrase.h"
+#include "moses/AlignmentInfo.h"
+#include "moses/AlignmentInfoCollection.h"
+#include "moses/Word.h"
+#include "moses/FactorCollection.h"
+
+#include "Normalizer.h"
+#include "Classifier.h"
+#include "VWFeatureBase.h"
+#include "TabbedSentence.h"
+#include "ThreadLocalByFeatureStorage.h"
+#include "TrainingLoss.h"
+#include "VWTargetSentence.h"
+#include "VWState.h"
+#include "VW.h"
+
+namespace Moses
+{
+
+VW::VW(const std::string &line)
+ : StatefulFeatureFunction(1, line)
+ , TLSTargetSentence(this)
+ , m_train(false)
+ , m_sentenceStartWord(Word()) {
+ ReadParameters();
+ Discriminative::ClassifierFactory *classifierFactory = m_train
+ ? new Discriminative::ClassifierFactory(m_modelPath)
+ : new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
+
+ m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
+
+ m_tlsFutureScores = new TLSFloatHashMap(this);
+ m_tlsComputedStateExtensions = new TLSStateExtensions(this);
+ m_tlsTranslationOptionFeatures = new TLSFeatureVectorMap(this);
+ m_tlsTargetContextFeatures = new TLSFeatureVectorMap(this);
+
+ if (! m_normalizer) {
+ VERBOSE(1, "VW :: No loss function specified, assuming logistic loss.\n");
+ m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
+ }
+
+ if (! m_trainingLoss) {
+ VERBOSE(1, "VW :: Using basic 1/0 loss calculation in training.\n");
+ m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
+ }
+
+ // create a virtual beginning-of-sentence word with all factors replaced by <S>
+ const Factor *bosFactor = FactorCollection::Instance().AddFactor(BOS_);
+ for (size_t i = 0; i < MAX_NUM_FACTORS; i++)
+ m_sentenceStartWord.SetFactor(i, bosFactor);
+}
+
+VW::~VW() {
+ delete m_tlsClassifier;
+ delete m_normalizer;
+ // TODO delete more stuff
+}
+
+FFState* VW::EvaluateWhenApplied(
+ const Hypothesis& curHypo,
+ const FFState* prevState,
+ ScoreComponentCollection* accumulator) const
+{
+ VERBOSE(3, "VW :: Evaluating translation options\n");
+
+ const VWState& prevVWState = *static_cast<const VWState *>(prevState);
+
+ const std::vector<VWFeatureBase*>& contextFeatures =
+ VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription());
+
+ if (contextFeatures.empty()) {
+ // no target context features => we already evaluated everything in
+ // EvaluateTranslationOptionListWithSourceContext(). Nothing to do now,
+ // no state information to track.
+ return new VWState();
+ }
+
+ size_t spanStart = curHypo.GetTranslationOption().GetStartPos();
+ size_t spanEnd = curHypo.GetTranslationOption().GetEndPos();
+
+ // compute our current key
+ size_t cacheKey = MakeCacheKey(prevState, spanStart, spanEnd);
+
+ boost::unordered_map<size_t, FloatHashMap> &computedStateExtensions
+ = *m_tlsComputedStateExtensions->GetStored();
+
+ if (computedStateExtensions.find(cacheKey) == computedStateExtensions.end()) {
+ // we have not computed this set of translation options yet
+ const TranslationOptionList *topts =
+ curHypo.GetManager().getSntTranslationOptions()->GetTranslationOptionList(spanStart, spanEnd);
+
+ const InputType& input = curHypo.GetManager().GetSource();
+
+ Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
+
+ // extract target context features
+ size_t contextHash = prevVWState.hash();
+
+ FeatureVectorMap &contextFeaturesCache = *m_tlsTargetContextFeatures->GetStored();
+
+ FeatureVectorMap::const_iterator contextIt = contextFeaturesCache.find(contextHash);
+ if (contextIt == contextFeaturesCache.end()) {
+ // we have not extracted features for this context yet
+
+ const Phrase &targetContext = prevVWState.GetPhrase();
+ Discriminative::FeatureVector contextVector;
+ const AlignmentInfo *alignInfo = TransformAlignmentInfo(curHypo, targetContext.GetSize());
+ for(size_t i = 0; i < contextFeatures.size(); ++i)
+ (*contextFeatures[i])(input, targetContext, *alignInfo, classifier, contextVector);
+
+ contextFeaturesCache[contextHash] = contextVector;
+ VERBOSE(3, "VW :: context cache miss\n");
+ } else {
+ // context already in cache, simply put feature IDs in the classifier object
+ classifier.AddLabelIndependentFeatureVector(contextIt->second);
+ VERBOSE(3, "VW :: context cache hit\n");
+ }
+
+ std::vector<float> losses(topts->size());
+
+ for (size_t toptIdx = 0; toptIdx < topts->size(); toptIdx++) {
+ const TranslationOption *topt = topts->Get(toptIdx);
+ const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
+ size_t toptHash = hash_value(*topt);
+
+ // start with pre-computed source-context-only VW scores
+ losses[toptIdx] = m_tlsFutureScores->GetStored()->find(toptHash)->second;
+
+ // add all features associated with this translation option
+ // (pre-computed when evaluated with source context)
+ const Discriminative::FeatureVector &targetFeatureVector =
+ m_tlsTranslationOptionFeatures->GetStored()->find(toptHash)->second;
+
+ classifier.AddLabelDependentFeatureVector(targetFeatureVector);
+
+ // add classifier score with context+target features only to the total loss
+ losses[toptIdx] += classifier.Predict(MakeTargetLabel(targetPhrase));
+ }
+
+ // normalize classifier scores to get a probability distribution
+ (*m_normalizer)(losses);
+
+ // fill our cache with the results
+ FloatHashMap &toptScores = computedStateExtensions[cacheKey];
+ for (size_t toptIdx = 0; toptIdx < topts->size(); toptIdx++) {
+ const TranslationOption *topt = topts->Get(toptIdx);
+ size_t toptHash = hash_value(*topt);
+ toptScores[toptHash] = FloorScore(TransformScore(losses[toptIdx]));
+ }
+
+ VERBOSE(3, "VW :: cache miss\n");
+ } else {
+ VERBOSE(3, "VW :: cache hit\n");
+ }
+
+ // now our cache is guaranteed to contain the required score, simply look it up
+ std::vector<float> newScores(m_numScoreComponents);
+ size_t toptHash = hash_value(curHypo.GetTranslationOption());
+ newScores[0] = computedStateExtensions[cacheKey][toptHash];
+ VERBOSE(3, "VW :: adding score: " << newScores[0] << "\n");
+ accumulator->PlusEquals(this, newScores);
+
+ return new VWState(prevVWState, curHypo);
+}
+
+const FFState* VW::EmptyHypothesisState(const InputType &input) const {
+ size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
+ Phrase initialPhrase;
+ for (size_t i = 0; i < maxContextSize; i++)
+ initialPhrase.AddWord(m_sentenceStartWord);
+
+ return new VWState(initialPhrase);
+}
+
+void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const {
+ Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
+
+ if (translationOptionList.size() == 0)
+ return; // nothing to do
+
+ VERBOSE(3, "VW :: Evaluating translation options\n");
+
+ // which feature functions do we use (on the source and target side)
+ const std::vector<VWFeatureBase*>& sourceFeatures =
+ VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
+
+ const std::vector<VWFeatureBase*>& contextFeatures =
+ VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription());
+
+ const std::vector<VWFeatureBase*>& targetFeatures =
+ VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
+
+ size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
+
+ // only use stateful score computation when needed
+ bool haveTargetContextFeatures = ! contextFeatures.empty();
+
+ const Range &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
+
+ if (m_train) {
+ //
+ // extract features for training the classifier (only call this when using vwtrainer, not in Moses!)
+ //
+
+ // find which topts are correct
+ std::vector<bool> correct(translationOptionList.size());
+ std::vector<int> startsAt(translationOptionList.size());
+ std::set<int> uncoveredStartingPositions;
+
+ for (size_t i = 0; i < translationOptionList.size(); i++) {
+ std::pair<bool, int> isCorrect = IsCorrectTranslationOption(* translationOptionList.Get(i));
+ correct[i] = isCorrect.first;
+ startsAt[i] = isCorrect.second;
+ if (isCorrect.first) {
+ uncoveredStartingPositions.insert(isCorrect.second);
+ }
+ }
+
+ // optionally update translation options using leave-one-out
+ std::vector<bool> keep = (m_leaveOneOut.size() > 0)
+ ? LeaveOneOut(translationOptionList, correct)
+ : std::vector<bool>(translationOptionList.size(), true);
+
+ while (! uncoveredStartingPositions.empty()) {
+ int currentStart = *uncoveredStartingPositions.begin();
+ uncoveredStartingPositions.erase(uncoveredStartingPositions.begin());
+
+ // check whether we (still) have some correct translation
+ int firstCorrect = -1;
+ for (size_t i = 0; i < translationOptionList.size(); i++) {
+ if (keep[i] && correct[i] && startsAt[i] == currentStart) {
+ firstCorrect = i;
+ break;
+ }
+ }
+
+ // do not train if there are no positive examples
+ if (firstCorrect == -1) {
+ VERBOSE(3, "VW :: skipping topt collection, no correct translation for span at current tgt start position\n");
+ continue;
+ }
+
+ // the first correct topt can be used by some loss functions
+ const TargetPhrase &correctPhrase = translationOptionList.Get(firstCorrect)->GetTargetPhrase();
+
+ // feature extraction *at prediction time* outputs feature hashes which can be cached;
+ // this is training time, simply store everything in this dummyVector
+ Discriminative::FeatureVector dummyVector;
+
+ // extract source side features
+ for(size_t i = 0; i < sourceFeatures.size(); ++i)
+ (*sourceFeatures[i])(input, sourceRange, classifier, dummyVector);
+
+ // build target-side context
+ Phrase targetContext;
+ for (size_t i = 0; i < maxContextSize; i++)
+ targetContext.AddWord(m_sentenceStartWord);
+
+ const Phrase *targetSent = GetStored()->m_sentence;
+
+ // word alignment info shifted by context size
+ AlignmentInfo contextAlignment = TransformAlignmentInfo(*GetStored()->m_alignment, maxContextSize, currentStart);
+
+ if (currentStart > 0)
+ targetContext.Append(targetSent->GetSubString(Range(0, currentStart - 1)));
+
+ // extract target-context features
+ for(size_t i = 0; i < contextFeatures.size(); ++i)
+ (*contextFeatures[i])(input, targetContext, contextAlignment, classifier, dummyVector);
+
+ // go over topts, extract target side features and train the classifier
+ for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
+
+ // this topt was discarded by leaving one out
+ if (! keep[toptIdx])
+ continue;
+
+ // extract target-side features for each topt
+ const TargetPhrase &targetPhrase = translationOptionList.Get(toptIdx)->GetTargetPhrase();
+ for(size_t i = 0; i < targetFeatures.size(); ++i)
+ (*targetFeatures[i])(input, targetPhrase, classifier, dummyVector);
+
+ bool isCorrect = correct[toptIdx] && startsAt[toptIdx] == currentStart;
+ float loss = (*m_trainingLoss)(targetPhrase, correctPhrase, isCorrect);
+
+ // train classifier on current example
+ classifier.Train(MakeTargetLabel(targetPhrase), loss);
+ }
+ }
+ } else {
+ //
+ // predict using a trained classifier, use this in decoding (=at test time)
+ //
+
+ std::vector<float> losses(translationOptionList.size());
+
+ Discriminative::FeatureVector outFeaturesSourceNamespace;
+
+ // extract source side features
+ for(size_t i = 0; i < sourceFeatures.size(); ++i)
+ (*sourceFeatures[i])(input, sourceRange, classifier, outFeaturesSourceNamespace);
+
+ for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
+ const TranslationOption *topt = translationOptionList.Get(toptIdx);
+ const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
+ Discriminative::FeatureVector outFeaturesTargetNamespace;
+
+ // extract target-side features for each topt
+ for(size_t i = 0; i < targetFeatures.size(); ++i)
+ (*targetFeatures[i])(input, targetPhrase, classifier, outFeaturesTargetNamespace);
+
+ // cache the extracted target features (i.e. features associated with given topt)
+ // for future use at decoding time
+ size_t toptHash = hash_value(*topt);
+ m_tlsTranslationOptionFeatures->GetStored()->insert(
+ std::make_pair(toptHash, outFeaturesTargetNamespace));
+
+ // get classifier score
+ losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
+ }
+
+ // normalize classifier scores to get a probability distribution
+ std::vector<float> rawLosses = losses;
+ (*m_normalizer)(losses);
+
+ // update scores of topts
+ for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
+ TranslationOption *topt = *(translationOptionList.begin() + toptIdx);
+ if (! haveTargetContextFeatures) {
+ // no target context features; evaluate the FF now
+ std::vector<float> newScores(m_numScoreComponents);
+ newScores[0] = FloorScore(TransformScore(losses[toptIdx]));
+
+ ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
+ scoreBreakDown.PlusEquals(this, newScores);
+
+ topt->UpdateScore();
+ } else {
+ // We have target context features => this is just a partial score,
+ // do not add it to the score component collection.
+ size_t toptHash = hash_value(*topt);
+
+ // Subtract the score contribution of target-only features, otherwise it would
+ // be included twice.
+ Discriminative::FeatureVector emptySource;
+ const Discriminative::FeatureVector &targetFeatureVector =
+ m_tlsTranslationOptionFeatures->GetStored()->find(toptHash)->second;
+ classifier.AddLabelIndependentFeatureVector(emptySource);
+ classifier.AddLabelDependentFeatureVector(targetFeatureVector);
+ float targetOnlyLoss = classifier.Predict(VW_DUMMY_LABEL);
+
+ float futureScore = rawLosses[toptIdx] - targetOnlyLoss;
+ m_tlsFutureScores->GetStored()->insert(std::make_pair(toptHash, futureScore));
+ }
+ }
+ }
+}
+
+void VW::SetParameter(const std::string& key, const std::string& value) {
+ if (key == "train") {
+ m_train = Scan<bool>(value);
+ } else if (key == "path") {
+ m_modelPath = value;
+ } else if (key == "vw-options") {
+ m_vwOptions = value;
+ } else if (key == "leave-one-out-from") {
+ m_leaveOneOut = value;
+ } else if (key == "training-loss") {
+ // which type of loss to use for training
+ if (value == "basic") {
+ m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
+ } else if (value == "bleu") {
+ m_trainingLoss = (TrainingLoss *) new TrainingLossBLEU();
+ } else {
+ UTIL_THROW2("Unknown training loss type:" << value);
+ }
+ } else if (key == "loss") {
+ // which normalizer to use (theoretically depends on the loss function used for training the
+ // classifier (squared/logistic/hinge/...), hence the name "loss"
+ if (value == "logistic") {
+ m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
+ } else if (value == "squared") {
+ m_normalizer = (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
+ } else {
+ UTIL_THROW2("Unknown loss type:" << value);
+ }
+ } else {
+ StatefulFeatureFunction::SetParameter(key, value);
+ }
+}
+
+void VW::InitializeForInput(ttasksptr const& ttask) {
+ // do not keep future cost estimates across sentences!
+ m_tlsFutureScores->GetStored()->clear();
+
+ // invalidate our caches after each sentence
+ m_tlsComputedStateExtensions->GetStored()->clear();
+
+ // it's not certain that we should clear these caches; we do it
+ // because they shouldn't be allowed to grow indefinitely large but
+ // target contexts and translation options will have identical features
+ // the next time we extract them...
+ m_tlsTargetContextFeatures->GetStored()->clear();
+ m_tlsTranslationOptionFeatures->GetStored()->clear();
+
+ InputType const& source = *(ttask->GetSource().get());
+ // tabbed sentence is assumed only in training
+ if (! m_train)
+ return;
+
+ UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
+ "This feature function requires the TabbedSentence input type");
+
+ const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
+ UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
+ "TabbedSentence must contain target<tab>alignment");
+
+ // target sentence represented as a phrase
+ Phrase *target = new Phrase();
+ target->CreateFromString(
+ Output
+ , StaticData::Instance().options()->output.factor_order
+ , tabbedSentence.GetColumns()[0]
+ , NULL);
+
+ // word alignment between source and target sentence
+ // we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
+ // sentences, not phrases
+ AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
+
+ VWTargetSentence &targetSent = *GetStored();
+ targetSent.Clear();
+ targetSent.m_sentence = target;
+ targetSent.m_alignment = alignment;
+
+ // pre-compute max- and min- aligned points for faster translation option checking
+ targetSent.SetConstraints(source.GetSize());
+}
+
+/*************************************************************************************
+ * private methods
+ ************************************************************************************/
+
+const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const {
+ std::set<std::pair<size_t, size_t> > alignmentPoints;
+ const Hypothesis *contextHypo = curHypo.GetPrevHypo();
+ int idxInContext = contextSize - 1;
+ int processedWordsInHypo = 0;
+ while (idxInContext >= 0 && contextHypo) {
+ int idxInHypo = contextHypo->GetCurrTargetLength() - 1 - processedWordsInHypo;
+ if (idxInHypo >= 0) {
+ const AlignmentInfo &hypoAlign = contextHypo->GetCurrTargetPhrase().GetAlignTerm();
+ std::set<size_t> alignedToTgt = hypoAlign.GetAlignmentsForTarget(idxInHypo);
+ size_t srcOffset = contextHypo->GetCurrSourceWordsRange().GetStartPos();
+ BOOST_FOREACH(size_t srcIdx, alignedToTgt) {
+ alignmentPoints.insert(std::make_pair(srcOffset + srcIdx, idxInContext));
+ }
+ processedWordsInHypo++;
+ idxInContext--;
+ } else {
+ processedWordsInHypo = 0;
+ contextHypo = contextHypo->GetPrevHypo();
+ }
+ }
+
+ return AlignmentInfoCollection::Instance().Add(alignmentPoints);
+}
+
+AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const {
+ std::set<std::pair<size_t, size_t> > alignmentPoints;
+ for (int i = std::max(0, currentStart - (int)contextSize); i < currentStart; i++) {
+ std::set<size_t> alignedToTgt = alignInfo.GetAlignmentsForTarget(i);
+ BOOST_FOREACH(size_t srcIdx, alignedToTgt) {
+ alignmentPoints.insert(std::make_pair(srcIdx, i + contextSize));
+ }
+ }
+ return AlignmentInfo(alignmentPoints);
+}
+
+std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &topt) const {
+
+ //std::cerr << topt.GetSourceWordsRange() << std::endl;
+
+ int sourceStart = topt.GetSourceWordsRange().GetStartPos();
+ int sourceEnd = topt.GetSourceWordsRange().GetEndPos();
+
+ const VWTargetSentence &targetSentence = *GetStored();
+
+ // [targetStart, targetEnd] spans aligned target words
+ int targetStart = targetSentence.m_sentence->GetSize();
+ int targetEnd = -1;
+
+ // get the left-most and right-most alignment point within source span
+ for(int i = sourceStart; i <= sourceEnd; ++i) {
+ if(targetSentence.m_sourceConstraints[i].IsSet()) {
+ if(targetStart > targetSentence.m_sourceConstraints[i].GetMin())
+ targetStart = targetSentence.m_sourceConstraints[i].GetMin();
+ if(targetEnd < targetSentence.m_sourceConstraints[i].GetMax())
+ targetEnd = targetSentence.m_sourceConstraints[i].GetMax();
+ }
+ }
+ // there was no alignment
+ if(targetEnd == -1)
+ return std::make_pair(false, -1);
+
+ //std::cerr << "Shorter: " << targetStart << " " << targetEnd << std::endl;
+
+ // [targetStart2, targetEnd2] spans unaligned words left and right of [targetStart, targetEnd]
+ int targetStart2 = targetStart;
+ for(int i = targetStart2; i >= 0 && !targetSentence.m_targetConstraints[i].IsSet(); --i)
+ targetStart2 = i;
+
+ int targetEnd2 = targetEnd;
+ for(int i = targetEnd2;
+ i < targetSentence.m_sentence->GetSize() && !targetSentence.m_targetConstraints[i].IsSet();
+ ++i)
+ targetEnd2 = i;
+
+ //std::cerr << "Longer: " << targetStart2 << " " << targetEnd2 << std::endl;
+
+ const TargetPhrase &tphrase = topt.GetTargetPhrase();
+ //std::cerr << tphrase << std::endl;
+
+ // if target phrase is shorter than inner span return false
+ if(tphrase.GetSize() < targetEnd - targetStart + 1)
+ return std::make_pair(false, -1);
+
+ // if target phrase is longer than outer span return false
+ if(tphrase.GetSize() > targetEnd2 - targetStart2 + 1)
+ return std::make_pair(false, -1);
+
+ // for each possible starting point
+ for(int tempStart = targetStart2; tempStart <= targetStart; tempStart++) {
+ bool found = true;
+ // check if the target phrase is within longer span
+ for(int i = tempStart; i <= targetEnd2 && i < tphrase.GetSize() + tempStart; ++i) {
+ if(tphrase.GetWord(i - tempStart) != targetSentence.m_sentence->GetWord(i)) {
+ found = false;
+ break;
+ }
+ }
+ // return true if there was a match
+ if(found) {
+ //std::cerr << "Found" << std::endl;
+ return std::make_pair(true, tempStart);
+ }
+ }
+
+ return std::make_pair(false, -1);
+}
+
+std::vector<bool> VW::LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const {
+ UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
+
+ float sourceRawCount = 0.0;
+ const float ONE = 1.0001; // I don't understand floating point numbers
+
+ std::vector<bool> keepOpt;
+
+ for (size_t i = 0; i < topts.size(); i++) {
+ TranslationOption *topt = *(topts.begin() + i);
+ const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
+
+ // extract raw counts from phrase-table property
+ const CountsPhraseProperty *property =
+ static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
+
+ if (! property) {
+ VERBOSE(2, "VW :: Counts not found for topt! Is this an OOV?\n");
+ // keep all translation opts without updating, this is either OOV or bad usage...
+ keepOpt.assign(topts.size(), true);
+ return keepOpt;
+ }
+
+ if (sourceRawCount == 0.0) {
+ sourceRawCount = property->GetSourceMarginal() - ONE; // discount one occurrence of the source phrase
+ if (sourceRawCount <= 0) {
+ // no translation options survived, source phrase was a singleton
+ keepOpt.assign(topts.size(), false);
+ return keepOpt;
+ }
+ }
+
+ float discount = correct[i] ? ONE : 0.0;
+ float target = property->GetTargetMarginal() - discount;
+ float joint = property->GetJointCount() - discount;
+ if (discount != 0.0) VERBOSE(3, "VW :: leaving one out!\n");
+
+ if (joint > 0) {
+ // topt survived leaving one out, update its scores
+ const FeatureFunction *feature = &FindFeatureFunction(m_leaveOneOut);
+ std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(feature);
+ UTIL_THROW_IF2(scores.size() != 4, "Unexpected number of scores in feature " << m_leaveOneOut);
+ scores[0] = TransformScore(joint / target); // P(f|e)
+ scores[2] = TransformScore(joint / sourceRawCount); // P(e|f)
+
+ ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
+ scoreBreakDown.Assign(feature, scores);
+ topt->UpdateScore();
+ keepOpt.push_back(true);
+ } else {
+ // they only occurred together once, discard topt
+ VERBOSE(2, "VW :: discarded topt when leaving one out\n");
+ keepOpt.push_back(false);
+ }
+ }
+
+ return keepOpt;
+}
+
+} // namespace Moses
diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h
index da8a5cfb8..89891e725 100644
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@@ -3,8 +3,12 @@
#include <string>
#include <map>
#include <limits>
+#include <vector>
-#include "moses/FF/StatelessFeatureFunction.h"
+#include <boost/unordered_map.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/PP/CountsPhraseProperty.h"
#include "moses/TranslationOptionList.h"
#include "moses/TranslationOption.h"
@@ -13,6 +17,8 @@
#include "moses/StaticData.h"
#include "moses/Phrase.h"
#include "moses/AlignmentInfo.h"
+#include "moses/Word.h"
+#include "moses/FactorCollection.h"
#include "Normalizer.h"
#include "Classifier.h"
@@ -20,119 +26,50 @@
#include "TabbedSentence.h"
#include "ThreadLocalByFeatureStorage.h"
#include "TrainingLoss.h"
+#include "VWTargetSentence.h"
+
+/*
+ * VW classifier feature. See vw/README.md for further information.
+ *
+ * TODO: say which paper to cite.
+ */
namespace Moses
{
-const std::string VW_DUMMY_LABEL = "1111"; // VW does not use the actual label, other classifiers might
+// dummy class label; VW does not use the actual label, other classifiers might
+const std::string VW_DUMMY_LABEL = "1111";
-/**
- * Helper class for storing alignment constraints.
- */
-class Constraint
-{
-public:
- Constraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
+// thread-specific classifier instance
+typedef ThreadLocalByFeatureStorage<Discriminative::Classifier, Discriminative::ClassifierFactory &> TLSClassifier;
- Constraint(int min, int max) : m_min(min), m_max(max) {}
+// current target sentence, used in VW training (vwtrainer), not in decoding (prediction time)
+typedef ThreadLocalByFeatureStorage<VWTargetSentence> TLSTargetSentence;
- /**
- * We are aligned to point => our min cannot be larger, our max cannot be smaller.
- */
- void Update(int point) {
- if (m_min > point) m_min = point;
- if (m_max < point) m_max = point;
- }
+// hash table of feature vectors
+typedef boost::unordered_map<size_t, Discriminative::FeatureVector> FeatureVectorMap;
- bool IsSet() const {
- return m_max != -1;
- }
+// thread-specific feature vector hash
+typedef ThreadLocalByFeatureStorage<FeatureVectorMap> TLSFeatureVectorMap;
- int GetMin() const {
- return m_min;
- }
+// hash table of partial scores
+typedef boost::unordered_map<size_t, float> FloatHashMap;
- int GetMax() const {
- return m_max;
- }
+// thread-specific score hash table, used for caching
+typedef ThreadLocalByFeatureStorage<FloatHashMap> TLSFloatHashMap;
-private:
- int m_min, m_max;
-};
+// thread-specific hash tablei for caching full classifier outputs
+typedef ThreadLocalByFeatureStorage<boost::unordered_map<size_t, FloatHashMap> > TLSStateExtensions;
-/**
- * VW thread-specific data about target sentence.
+/*
+ * VW feature function. A discriminative classifier with source and target context features.
*/
-struct VWTargetSentence {
- VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
-
- void Clear() {
- if (m_sentence) delete m_sentence;
- if (m_alignment) delete m_alignment;
- }
-
- ~VWTargetSentence() {
- Clear();
- }
-
- void SetConstraints(size_t sourceSize) {
- // initialize to unconstrained
- m_sourceConstraints.assign(sourceSize, Constraint());
- m_targetConstraints.assign(m_sentence->GetSize(), Constraint());
-
- // set constraints according to alignment points
- AlignmentInfo::const_iterator it;
- for (it = m_alignment->begin(); it != m_alignment->end(); it++) {
- int src = it->first;
- int tgt = it->second;
-
- if (src >= m_sourceConstraints.size() || tgt >= m_targetConstraints.size()) {
- UTIL_THROW2("VW :: alignment point out of bounds: " << src << "-" << tgt);
- }
-
- m_sourceConstraints[src].Update(tgt);
- m_targetConstraints[tgt].Update(src);
- }
- }
-
- Phrase *m_sentence;
- AlignmentInfo *m_alignment;
- std::vector<Constraint> m_sourceConstraints, m_targetConstraints;
-};
-
-typedef ThreadLocalByFeatureStorage<Discriminative::Classifier, Discriminative::ClassifierFactory &> TLSClassifier;
-
-typedef ThreadLocalByFeatureStorage<VWTargetSentence> TLSTargetSentence;
-
-class VW : public StatelessFeatureFunction, public TLSTargetSentence
+class VW : public StatefulFeatureFunction, public TLSTargetSentence
{
public:
- VW(const std::string &line)
- : StatelessFeatureFunction(1, line)
- , TLSTargetSentence(this)
- , m_train(false) {
- ReadParameters();
- Discriminative::ClassifierFactory *classifierFactory = m_train
- ? new Discriminative::ClassifierFactory(m_modelPath)
- : new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
-
- m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
-
- if (! m_normalizer) {
- VERBOSE(1, "VW :: No loss function specified, assuming logistic loss.\n");
- m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
- }
-
- if (! m_trainingLoss) {
- VERBOSE(1, "VW :: Using basic 1/0 loss calculation in training.\n");
- m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
- }
- }
+ VW(const std::string &line);
- virtual ~VW() {
- delete m_tlsClassifier;
- delete m_normalizer;
- }
+ virtual ~VW();
bool IsUseable(const FactorMask &mask) const {
return true;
@@ -152,335 +89,89 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
- void EvaluateTranslationOptionListWithSourceContext(const InputType &input
- , const TranslationOptionList &translationOptionList) const {
- Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
-
- if (translationOptionList.size() == 0)
- return; // nothing to do
-
- VERBOSE(2, "VW :: Evaluating translation options\n");
+ // This behavior of this method depends on whether it's called during VW
+ // training (feature extraction) by vwtrainer or during decoding (prediction
+ // time) by Moses.
+ //
+ // When predicting, it evaluates all translation options with the VW model;
+ // if no target-context features are defined, this is the final score and it
+ // is added directly to the TranslationOption score. If there are target
+ // context features, the score is a partial score and it is only stored in
+ // cache; the final score is computed based on target context in
+ // EvaluateWhenApplied().
+ //
+ // This method is also used in training by vwtrainer in which case features
+ // are written to a file, no classifier predictions take place. Target-side
+ // context is constant at training time (we know the true target sentence),
+ // so target-context features are extracted here as well.
+ virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+ , const TranslationOptionList &translationOptionList) const;
+
+ // Evaluate VW during decoding. This is only used at prediction time (not in training).
+ // When no target-context features are defined, VW predictions were already fully calculated
+ // in EvaluateTranslationOptionListWithSourceContext() and the scores were added to the model.
+ // If there are target-context features, we compute the context-dependent part of the
+ // classifier score and combine it with the source-context only partial score which was computed
+ // in EvaluateTranslationOptionListWithSourceContext(). Various caches are used to make this
+ // method more efficient.
+ virtual FFState* EvaluateWhenApplied(
+ const Hypothesis& curHypo,
+ const FFState* prevState,
+ ScoreComponentCollection* accumulator) const;
+
+ virtual FFState* EvaluateWhenApplied(
+ const ChartHypothesis&,
+ int,
+ ScoreComponentCollection* accumulator) const {
+ throw new std::logic_error("hiearchical/syntax not supported");
+ }
+
+ // Initial VW state; contains unaligned BOS symbols.
+ const FFState* EmptyHypothesisState(const InputType &input) const;
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+ // At prediction time, this clears our caches. At training time, we load the next sentence, its
+ // translation and word alignment.
+ virtual void InitializeForInput(ttasksptr const& ttask);
- // which feature functions do we use (on the source and target side)
- const std::vector<VWFeatureBase*>& sourceFeatures =
- VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
-
- const std::vector<VWFeatureBase*>& targetFeatures =
- VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
-
- const Range &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
- const InputPath &inputPath = translationOptionList.Get(0)->GetInputPath();
-
- if (m_train) {
- //
- // extract features for training the classifier (only call this when using vwtrainer, not in Moses!)
- //
-
- // find which topts are correct
- std::vector<bool> correct(translationOptionList.size());
- for (size_t i = 0; i < translationOptionList.size(); i++)
- correct[i] = IsCorrectTranslationOption(* translationOptionList.Get(i));
-
- // optionally update translation options using leave-one-out
- std::vector<bool> keep = (m_leaveOneOut.size() > 0)
- ? LeaveOneOut(translationOptionList, correct)
- : std::vector<bool>(translationOptionList.size(), true);
-
- // check whether we (still) have some correct translation
- int firstCorrect = -1;
- for (size_t i = 0; i < translationOptionList.size(); i++) {
- if (keep[i] && correct[i]) {
- firstCorrect = i;
- break;
- }
- }
-
- // do not train if there are no positive examples
- if (firstCorrect == -1) {
- VERBOSE(2, "VW :: skipping topt collection, no correct translation for span\n");
- return;
- }
-
- // the first correct topt can be used by some loss functions
- const TargetPhrase &correctPhrase = translationOptionList.Get(firstCorrect)->GetTargetPhrase();
-
- // extract source side features
- for(size_t i = 0; i < sourceFeatures.size(); ++i)
- (*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
-
- // go over topts, extract target side features and train the classifier
- for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
-
- // this topt was discarded by leaving one out
- if (! keep[toptIdx])
- continue;
-
- // extract target-side features for each topt
- const TargetPhrase &targetPhrase = translationOptionList.Get(toptIdx)->GetTargetPhrase();
- for(size_t i = 0; i < targetFeatures.size(); ++i)
- (*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
-
- float loss = (*m_trainingLoss)(targetPhrase, correctPhrase, correct[toptIdx]);
-
- // train classifier on current example
- classifier.Train(MakeTargetLabel(targetPhrase), loss);
- }
- } else {
- //
- // predict using a trained classifier, use this in decoding (=at test time)
- //
-
- std::vector<float> losses(translationOptionList.size());
-
- // extract source side features
- for(size_t i = 0; i < sourceFeatures.size(); ++i)
- (*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
-
- for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
- const TranslationOption *topt = translationOptionList.Get(toptIdx);
- const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
-
- // extract target-side features for each topt
- for(size_t i = 0; i < targetFeatures.size(); ++i)
- (*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
-
- // get classifier score
- losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
- }
-
- // normalize classifier scores to get a probability distribution
- (*m_normalizer)(losses);
-
- // update scores of topts
- for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
- TranslationOption *topt = *(translationOptionList.begin() + toptIdx);
- std::vector<float> newScores(m_numScoreComponents);
- newScores[0] = FloorScore(TransformScore(losses[toptIdx]));
-
- ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
- scoreBreakDown.PlusEquals(this, newScores);
-
- topt->UpdateScore();
- }
- }
- }
-
- void EvaluateWhenApplied(const Hypothesis& hypo,
- ScoreComponentCollection* accumulator) const {
- }
-
- void EvaluateWhenApplied(const ChartHypothesis &hypo,
- ScoreComponentCollection* accumulator) const {
+private:
+ inline std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
+ return VW_DUMMY_LABEL; // VW does not care about class labels in our setting (--csoaa_ldf mc).
}
- void SetParameter(const std::string& key, const std::string& value) {
- if (key == "train") {
- m_train = Scan<bool>(value);
- } else if (key == "path") {
- m_modelPath = value;
- } else if (key == "vw-options") {
- m_vwOptions = value;
- } else if (key == "leave-one-out-from") {
- m_leaveOneOut = value;
- } else if (key == "training-loss") {
- // which type of loss to use for training
- if (value == "basic") {
- m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
- } else if (value == "bleu") {
- m_trainingLoss = (TrainingLoss *) new TrainingLossBLEU();
- } else {
- UTIL_THROW2("Unknown training loss type:" << value);
- }
- } else if (key == "loss") {
- // which normalizer to use (theoretically depends on the loss function used for training the
- // classifier (squared/logistic/hinge/...), hence the name "loss"
- if (value == "logistic") {
- m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
- } else if (value == "squared") {
- m_normalizer = (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
- } else {
- UTIL_THROW2("Unknown loss type:" << value);
- }
- } else {
- StatelessFeatureFunction::SetParameter(key, value);
- }
+ inline size_t MakeCacheKey(const FFState *prevState, size_t spanStart, size_t spanEnd) const {
+ size_t key = 0;
+ boost::hash_combine(key, prevState);
+ boost::hash_combine(key, spanStart);
+ boost::hash_combine(key, spanEnd);
+ return key;
}
- virtual void InitializeForInput(ttasksptr const& ttask) {
- InputType const& source = *(ttask->GetSource().get());
- // tabbed sentence is assumed only in training
- if (! m_train)
- return;
-
- UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
- "This feature function requires the TabbedSentence input type");
-
- const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
- UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
- "TabbedSentence must contain target<tab>alignment");
-
- // target sentence represented as a phrase
- Phrase *target = new Phrase();
- target->CreateFromString(
- Output
- , StaticData::Instance().options()->output.factor_order
- , tabbedSentence.GetColumns()[0]
- , NULL);
-
- // word alignment between source and target sentence
- // we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
- // sentences, not phrases
- AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
-
- VWTargetSentence &targetSent = *GetStored();
- targetSent.Clear();
- targetSent.m_sentence = target;
- targetSent.m_alignment = alignment;
-
- // pre-compute max- and min- aligned points for faster translation option checking
- targetSent.SetConstraints(source.GetSize());
- }
+ // used in decoding to transform the global word alignment information into
+ // context-phrase internal alignment information (i.e., with target indices correspoding
+ // to positions in contextPhrase)
+ const AlignmentInfo *TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const;
+ // used during training to extract relevant alignment points from the full sentence alignment
+ // and shift them by target context size
+ AlignmentInfo TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const;
-private:
- std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
- return VW_DUMMY_LABEL;
- }
+ // At training time, determine whether a translation option is correct for the current target sentence
+ // based on word alignment. This is a bit complicated because we need to handle various corner-cases
+ // where some word(s) on phrase borders are unaligned.
+ std::pair<bool, int> IsCorrectTranslationOption(const TranslationOption &topt) const;
- bool IsCorrectTranslationOption(const TranslationOption &topt) const {
-
- //std::cerr << topt.GetSourceWordsRange() << std::endl;
-
- int sourceStart = topt.GetSourceWordsRange().GetStartPos();
- int sourceEnd = topt.GetSourceWordsRange().GetEndPos();
-
- const VWTargetSentence &targetSentence = *GetStored();
-
- // [targetStart, targetEnd] spans aligned target words
- int targetStart = targetSentence.m_sentence->GetSize();
- int targetEnd = -1;
-
- // get the left-most and right-most alignment point within source span
- for(int i = sourceStart; i <= sourceEnd; ++i) {
- if(targetSentence.m_sourceConstraints[i].IsSet()) {
- if(targetStart > targetSentence.m_sourceConstraints[i].GetMin())
- targetStart = targetSentence.m_sourceConstraints[i].GetMin();
- if(targetEnd < targetSentence.m_sourceConstraints[i].GetMax())
- targetEnd = targetSentence.m_sourceConstraints[i].GetMax();
- }
- }
- // there was no alignment
- if(targetEnd == -1)
- return false;
-
- //std::cerr << "Shorter: " << targetStart << " " << targetEnd << std::endl;
-
- // [targetStart2, targetEnd2] spans unaligned words left and right of [targetStart, targetEnd]
- int targetStart2 = targetStart;
- for(int i = targetStart2; i >= 0 && !targetSentence.m_targetConstraints[i].IsSet(); --i)
- targetStart2 = i;
-
- int targetEnd2 = targetEnd;
- for(int i = targetEnd2;
- i < targetSentence.m_sentence->GetSize() && !targetSentence.m_targetConstraints[i].IsSet();
- ++i)
- targetEnd2 = i;
-
- //std::cerr << "Longer: " << targetStart2 << " " << targetEnd2 << std::endl;
-
- const TargetPhrase &tphrase = topt.GetTargetPhrase();
- //std::cerr << tphrase << std::endl;
-
- // if target phrase is shorter than inner span return false
- if(tphrase.GetSize() < targetEnd - targetStart + 1)
- return false;
-
- // if target phrase is longer than outer span return false
- if(tphrase.GetSize() > targetEnd2 - targetStart2 + 1)
- return false;
-
- // for each possible starting point
- for(int tempStart = targetStart2; tempStart <= targetStart; tempStart++) {
- bool found = true;
- // check if the target phrase is within longer span
- for(int i = tempStart; i <= targetEnd2 && i < tphrase.GetSize() + tempStart; ++i) {
- if(tphrase.GetWord(i - tempStart) != targetSentence.m_sentence->GetWord(i)) {
- found = false;
- break;
- }
- }
- // return true if there was a match
- if(found) {
- //std::cerr << "Found" << std::endl;
- return true;
- }
- }
-
- return false;
- }
-
- std::vector<bool> LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const {
- UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
-
- float sourceRawCount = 0.0;
- const float ONE = 1.0001; // I don't understand floating point numbers
-
- std::vector<bool> keepOpt;
-
- for (size_t i = 0; i < topts.size(); i++) {
- TranslationOption *topt = *(topts.begin() + i);
- const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
-
- // extract raw counts from phrase-table property
- const CountsPhraseProperty *property =
- static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
-
- if (! property) {
- VERBOSE(1, "VW :: Counts not found for topt! Is this an OOV?\n");
- // keep all translation opts without updating, this is either OOV or bad usage...
- keepOpt.assign(topts.size(), true);
- return keepOpt;
- }
-
- if (sourceRawCount == 0.0) {
- sourceRawCount = property->GetSourceMarginal() - ONE; // discount one occurrence of the source phrase
- if (sourceRawCount <= 0) {
- // no translation options survived, source phrase was a singleton
- keepOpt.assign(topts.size(), false);
- return keepOpt;
- }
- }
-
- float discount = correct[i] ? ONE : 0.0;
- float target = property->GetTargetMarginal() - discount;
- float joint = property->GetJointCount() - discount;
- if (discount != 0.0) VERBOSE(2, "VW :: leaving one out!\n");
-
- if (joint > 0) {
- // topt survived leaving one out, update its scores
- const FeatureFunction *feature = &FindFeatureFunction(m_leaveOneOut);
- std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(feature);
- UTIL_THROW_IF2(scores.size() != 4, "Unexpected number of scores in feature " << m_leaveOneOut);
- scores[0] = TransformScore(joint / target); // P(f|e)
- scores[2] = TransformScore(joint / sourceRawCount); // P(e|f)
-
- ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
- scoreBreakDown.Assign(feature, scores);
- topt->UpdateScore();
- keepOpt.push_back(true);
- } else {
- // they only occurred together once, discard topt
- VERBOSE(2, "VW :: discarded topt when leaving one out\n");
- keepOpt.push_back(false);
- }
- }
-
- return keepOpt;
- }
+ // At training time, optionally discount occurrences of phrase pairs from the current sentence, helps prevent
+ // over-fitting.
+ std::vector<bool> LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const;
bool m_train; // false means predict
- std::string m_modelPath;
- std::string m_vwOptions;
+ std::string m_modelPath; // path to the VW model file; at training time, this is where extracted features are stored
+ std::string m_vwOptions; // options for Vowpal Wabbit
+
+ // BOS token, all factors
+ Word m_sentenceStartWord;
// calculator of training loss
TrainingLoss *m_trainingLoss = NULL;
@@ -488,9 +179,16 @@ private:
// optionally contains feature name of a phrase table where we recompute scores with leaving one out
std::string m_leaveOneOut;
+ // normalizer, typically this means softmax
Discriminative::Normalizer *m_normalizer = NULL;
+
+ // thread-specific classifier instance
TLSClassifier *m_tlsClassifier;
+
+ // caches for partial scores and feature vectors
+ TLSFloatHashMap *m_tlsFutureScores;
+ TLSStateExtensions *m_tlsComputedStateExtensions;
+ TLSFeatureVectorMap *m_tlsTranslationOptionFeatures, *m_tlsTargetContextFeatures;
};
}
-
diff --git a/moses/FF/VW/VWFeatureBase.cpp b/moses/FF/VW/VWFeatureBase.cpp
index 874544203..28d827305 100644
--- a/moses/FF/VW/VWFeatureBase.cpp
+++ b/moses/FF/VW/VWFeatureBase.cpp
@@ -2,11 +2,25 @@
#include <string>
#include "VWFeatureBase.h"
+#include "VWFeatureContext.h"
namespace Moses
{
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_features;
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_sourceFeatures;
+std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetContextFeatures;
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetFeatures;
+
+std::map<std::string, size_t> VWFeatureBase::s_targetContextLength;
+
+
+void VWFeatureBase::UpdateContextSize(const std::string &usedBy) {
+ // using the standard map behavior here: if the entry does not
+ // exist, it will be added and initialized to zero
+ size_t currentSize = s_targetContextLength[usedBy];
+ size_t newSize = static_cast<VWFeatureContext *const>(this)->GetContextSize();
+ s_targetContextLength[usedBy] = std::max(currentSize, newSize);
+}
+
}
diff --git a/moses/FF/VW/VWFeatureBase.h b/moses/FF/VW/VWFeatureBase.h
index c8bd60a81..000329860 100644
--- a/moses/FF/VW/VWFeatureBase.h
+++ b/moses/FF/VW/VWFeatureBase.h
@@ -12,11 +12,17 @@
namespace Moses
{
+enum VWFeatureType {
+ vwft_source,
+ vwft_target,
+ vwft_targetContext
+};
+
class VWFeatureBase : public StatelessFeatureFunction
{
public:
- VWFeatureBase(const std::string &line, bool isSource = true)
- : StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_isSource(isSource) {
+ VWFeatureBase(const std::string &line, VWFeatureType featureType = vwft_source)
+ : StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_featureType(featureType) {
// defaults
m_sourceFactors.push_back(0);
m_targetFactors.push_back(0);
@@ -71,26 +77,47 @@ public:
return s_sourceFeatures[name];
}
+ // Return only target-context classifier features
+ static const std::vector<VWFeatureBase*>& GetTargetContextFeatures(std::string name = "VW0") {
+ // don't throw an exception when there are no target-context features, this feature type is not mandatory
+ return s_targetContextFeatures[name];
+ }
+
// Return only target-dependent classifier features
static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
return s_targetFeatures[name];
}
+ // Required length context (maximum context size of defined target-context features)
+ static size_t GetMaximumContextSize(std::string name = "VW0") {
+ return s_targetContextLength[name]; // 0 by default
+ }
+
// Overload to process source-dependent data, create features once for every
// source sentence word range.
virtual void operator()(const InputType &input
- , const InputPath &inputPath
, const Range &sourceRange
- , Discriminative::Classifier &classifier) const = 0;
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const = 0;
// Overload to process target-dependent features, create features once for
- // every target phrase. One source word range will have at leat one target
+ // every target phrase. One source word range will have at least one target
// phrase, but may have more.
virtual void operator()(const InputType &input
- , const InputPath &inputPath
, const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const = 0;
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const = 0;
+
+ // Overload to process target-context dependent features, these features are
+ // evaluated during decoding. For efficiency, features are not fed directly into
+ // the classifier object but instead output in the vector "features" and managed
+ // separately in VW.h.
+ virtual void operator()(const InputType &input
+ , const Phrase &contextPhrase
+ , const AlignmentInfo &alignmentInfo
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const = 0;
protected:
std::vector<FactorType> m_sourceFactors, m_targetFactors;
@@ -99,10 +126,15 @@ protected:
for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
it != m_usedBy.end(); it++) {
s_features[*it].push_back(this);
- if(m_isSource)
+
+ if(m_featureType == vwft_source) {
s_sourceFeatures[*it].push_back(this);
- else
+ } else if (m_featureType == vwft_targetContext) {
+ s_targetContextFeatures[*it].push_back(this);
+ UpdateContextSize(*it);
+ } else {
s_targetFeatures[*it].push_back(this);
+ }
}
}
@@ -112,11 +144,16 @@ private:
Tokenize(m_usedBy, usedBy, ",");
}
+ void UpdateContextSize(const std::string &usedBy);
+
std::vector<std::string> m_usedBy;
- bool m_isSource;
+ VWFeatureType m_featureType;
static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
+ static std::map<std::string, std::vector<VWFeatureBase*> > s_targetContextFeatures;
static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;
+
+ static std::map<std::string, size_t> s_targetContextLength;
};
}
diff --git a/moses/FF/VW/VWFeatureContext.h b/moses/FF/VW/VWFeatureContext.h
new file mode 100644
index 000000000..cd7722f7c
--- /dev/null
+++ b/moses/FF/VW/VWFeatureContext.h
@@ -0,0 +1,116 @@
+#pragma once
+
+#include <string>
+#include <boost/foreach.hpp>
+#include "VWFeatureBase.h"
+#include "moses/InputType.h"
+#include "moses/TypeDef.h"
+#include "moses/Word.h"
+
+namespace Moses
+{
+
+// Inherit from this for source-dependent classifier features. They will
+// automatically register with the classifier class named VW0 or one or more
+// names specified by the used-by=name1,name2,... parameter.
+//
+// The classifier gets a full list by calling
+// VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription())
+
+
+class VWFeatureContext : public VWFeatureBase
+{
+public:
+ VWFeatureContext(const std::string &line, size_t contextSize)
+ : VWFeatureBase(line, vwft_targetContext), m_contextSize(contextSize) {
+ }
+
+ // Gets its pure virtual functions from VWFeatureBase
+
+ virtual void operator()(const InputType &input
+ , const TargetPhrase &targetPhrase
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
+ }
+
+ virtual void operator()(const InputType &input
+ , const Range &sourceRange
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ if (key == "size") {
+ m_contextSize = Scan<size_t>(value);
+ } else if (key == "factor-positions") {
+ // factor positions: assuming a factor such as positional morphological tag, use this
+ // option to select only certain positions; this assumes that only a single
+ // target-side factor is defined
+ Tokenize<size_t>(m_factorPositions, value, ",");
+ } else {
+ VWFeatureBase::SetParameter(key, value);
+ }
+ }
+
+ size_t GetContextSize() {
+ return m_contextSize;
+ }
+
+protected:
+ // Get word with the correct subset of factors as string. Because we're target
+ // context features, we look at a limited number of words to the left of the
+ // current translation. posFromEnd is interpreted like this:
+ // 0 = last word of the hypothesis
+ // 1 = next to last word
+ // ...etc.
+ inline std::string GetWord(const Phrase &phrase, size_t posFromEnd) const {
+ const Word &word = phrase.GetWord(phrase.GetSize() - posFromEnd - 1);
+ if (m_factorPositions.empty()) {
+ return word.GetString(m_targetFactors, false);
+ } else {
+ if (m_targetFactors.size() != 1)
+ UTIL_THROW2("You can only use factor-positions when a single target-side factor is defined.");
+ const std::string &fullFactor = word.GetFactor(m_targetFactors[0])->GetString().as_string();
+
+ // corner cases: at sentence beginning/end, we don't have the correct factors set up
+ // similarly for UNK
+ if (fullFactor == BOS_ || fullFactor == EOS_ || fullFactor == UNKNOWN_FACTOR)
+ return fullFactor;
+
+ std::string subFactor(m_factorPositions.size(), 'x'); // initialize string with correct size and placeholder chars
+ for (size_t i = 0; i < m_factorPositions.size(); i++)
+ subFactor[i] = fullFactor[m_factorPositions[i]];
+
+ return subFactor;
+ }
+ }
+
+ // some target-context feature functions also look at the source
+ inline std::string GetSourceWord(const InputType &input, size_t pos) const {
+ return input.GetWord(pos).GetString(m_sourceFactors, false);
+ }
+
+ // get source words aligned to a particular context word
+ std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase
+ , const InputType &input
+ , const AlignmentInfo &alignInfo
+ , size_t posFromEnd) const {
+ size_t idx = contextPhrase.GetSize() - posFromEnd - 1;
+ std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx);
+ std::vector<std::string> out;
+ out.reserve(alignedToTarget.size());
+ BOOST_FOREACH(size_t srcIdx, alignedToTarget) {
+ out.push_back(GetSourceWord(input, srcIdx));
+ }
+ return out;
+ }
+
+ // required context size
+ size_t m_contextSize;
+
+ // factor positions: assuming a factor such as positional morphological tag, use this
+ // option to select only certain positions
+ std::vector<size_t> m_factorPositions;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureContextBigrams.h b/moses/FF/VW/VWFeatureContextBigrams.h
new file mode 100644
index 000000000..6d6b6cca3
--- /dev/null
+++ b/moses/FF/VW/VWFeatureContextBigrams.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <string>
+#include <algorithm>
+#include "VWFeatureContext.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureContextBigrams : public VWFeatureContext
+{
+public:
+ VWFeatureContextBigrams(const std::string &line)
+ : VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ virtual void operator()(const InputType &input
+ , const Phrase &contextPhrase
+ , const AlignmentInfo &alignmentInfo
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
+ for (size_t i = 1; i < m_contextSize; i++)
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("tcbigram^-" + SPrint(i + 1)
+ + "^" + GetWord(contextPhrase, i - 1) + "^" + GetWord(contextPhrase, i)));
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureContext::SetParameter(key, value);
+ }
+
+private:
+ static const int DEFAULT_WINDOW_SIZE = 1;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureContextBilingual.h b/moses/FF/VW/VWFeatureContextBilingual.h
new file mode 100644
index 000000000..f681fcb78
--- /dev/null
+++ b/moses/FF/VW/VWFeatureContextBilingual.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <string>
+#include <boost/foreach.hpp>
+#include <algorithm>
+#include "VWFeatureContext.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureContextBilingual : public VWFeatureContext
+{
+public:
+ VWFeatureContextBilingual(const std::string &line)
+ : VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ virtual void operator()(const InputType &input
+ , const Phrase &contextPhrase
+ , const AlignmentInfo &alignmentInfo
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
+ for (size_t i = 0; i < m_contextSize; i++) {
+ std::string tgtWord = GetWord(contextPhrase, i);
+ std::vector<std::string> alignedTo = GetAlignedSourceWords(contextPhrase, input, alignmentInfo, i);
+ BOOST_FOREACH(const std::string &srcWord, alignedTo) {
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("tcblng^-" + SPrint(i + 1) + "^" + tgtWord + "^" + srcWord));
+ }
+ }
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureContext::SetParameter(key, value);
+ }
+
+private:
+ static const int DEFAULT_WINDOW_SIZE = 1;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureContextWindow.h b/moses/FF/VW/VWFeatureContextWindow.h
new file mode 100644
index 000000000..66c9c3ec5
--- /dev/null
+++ b/moses/FF/VW/VWFeatureContextWindow.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <string>
+#include <algorithm>
+#include "VWFeatureContext.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureContextWindow : public VWFeatureContext
+{
+public:
+ VWFeatureContextWindow(const std::string &line)
+ : VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
+ ReadParameters();
+
+ // Call this last
+ VWFeatureBase::UpdateRegister();
+ }
+
+ virtual void operator()(const InputType &input
+ , const Phrase &contextPhrase
+ , const AlignmentInfo &alignmentInfo
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
+ for (size_t i = 0; i < m_contextSize; i++)
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("tcwin^-" + SPrint(i + 1) + "^" + GetWord(contextPhrase, i)));
+ }
+
+ virtual void SetParameter(const std::string& key, const std::string& value) {
+ VWFeatureContext::SetParameter(key, value);
+ }
+
+private:
+ static const int DEFAULT_WINDOW_SIZE = 1;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureSource.h b/moses/FF/VW/VWFeatureSource.h
index 564f4a3b6..7a306b59c 100644
--- a/moses/FF/VW/VWFeatureSource.h
+++ b/moses/FF/VW/VWFeatureSource.h
@@ -19,15 +19,22 @@ class VWFeatureSource : public VWFeatureBase
{
public:
VWFeatureSource(const std::string &line)
- : VWFeatureBase(line, true) {
+ : VWFeatureBase(line, vwft_source) {
}
// Gets its pure virtual functions from VWFeatureBase
virtual void operator()(const InputType &input
- , const InputPath &inputPath
, const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
+ }
+
+ virtual void operator()(const InputType &input
+ , const Phrase &contextPhrase
+ , const AlignmentInfo &alignmentInfo
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
}
virtual void SetParameter(const std::string& key, const std::string& value) {
diff --git a/moses/FF/VW/VWFeatureSourceBagOfWords.h b/moses/FF/VW/VWFeatureSourceBagOfWords.h
index 97a1cc6c3..b815b4d0e 100644
--- a/moses/FF/VW/VWFeatureSourceBagOfWords.h
+++ b/moses/FF/VW/VWFeatureSourceBagOfWords.h
@@ -18,11 +18,11 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const Range &sourceRange
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 0; i < input.GetSize(); i++) {
- classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i));
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i)));
}
}
diff --git a/moses/FF/VW/VWFeatureSourceBigrams.h b/moses/FF/VW/VWFeatureSourceBigrams.h
index ce5430ab8..5de3ab2c3 100644
--- a/moses/FF/VW/VWFeatureSourceBigrams.h
+++ b/moses/FF/VW/VWFeatureSourceBigrams.h
@@ -18,11 +18,11 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const Range &sourceRange
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 1; i < input.GetSize(); i++) {
- classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i));
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i)));
}
}
diff --git a/moses/FF/VW/VWFeatureSourceExternalFeatures.h b/moses/FF/VW/VWFeatureSourceExternalFeatures.h
index bacc5d231..9995ad1b2 100644
--- a/moses/FF/VW/VWFeatureSourceExternalFeatures.h
+++ b/moses/FF/VW/VWFeatureSourceExternalFeatures.h
@@ -23,12 +23,12 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const Range &sourceRange
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
const Features& features = *m_tls.GetStored();
for (size_t i = 0; i < features.size(); i++) {
- classifier.AddLabelIndependentFeature("srcext^" + features[i]);
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("srcext^" + features[i]));
}
}
diff --git a/moses/FF/VW/VWFeatureSourceIndicator.h b/moses/FF/VW/VWFeatureSourceIndicator.h
index fda929f13..b0d43eb0f 100644
--- a/moses/FF/VW/VWFeatureSourceIndicator.h
+++ b/moses/FF/VW/VWFeatureSourceIndicator.h
@@ -20,9 +20,9 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const Range &sourceRange
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
size_t begin = sourceRange.GetStartPos();
size_t end = sourceRange.GetEndPos() + 1;
@@ -31,7 +31,7 @@ public:
for (size_t i = 0; i < end - begin; i++)
words[i] = GetWord(input, begin + i);
- classifier.AddLabelIndependentFeature("sind^" + Join(" ", words));
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("sind^" + Join(" ", words)));
}
virtual void SetParameter(const std::string& key, const std::string& value) {
diff --git a/moses/FF/VW/VWFeatureSourcePhraseInternal.h b/moses/FF/VW/VWFeatureSourcePhraseInternal.h
index 4e7f6e8d1..b346660a0 100644
--- a/moses/FF/VW/VWFeatureSourcePhraseInternal.h
+++ b/moses/FF/VW/VWFeatureSourcePhraseInternal.h
@@ -20,14 +20,14 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const Range &sourceRange
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
size_t begin = sourceRange.GetStartPos();
size_t end = sourceRange.GetEndPos() + 1;
while (begin < end) {
- classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++));
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++)));
}
}
diff --git a/moses/FF/VW/VWFeatureSourceSenseWindow.h b/moses/FF/VW/VWFeatureSourceSenseWindow.h
index 614f7ff52..e7b1e1a71 100644
--- a/moses/FF/VW/VWFeatureSourceSenseWindow.h
+++ b/moses/FF/VW/VWFeatureSourceSenseWindow.h
@@ -51,9 +51,9 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const Range &sourceRange
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
int begin = sourceRange.GetStartPos();
int end = sourceRange.GetEndPos() + 1;
int inputLen = input.GetSize();
@@ -64,24 +64,24 @@ public:
// before current phrase
for (int i = std::max(0, begin - m_size); i < begin; i++) {
BOOST_FOREACH(const Sense &sense, senses[i]) {
- classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
- classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob);
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob));
}
}
// within current phrase
for (int i = begin; i < end; i++) {
BOOST_FOREACH(const Sense &sense, senses[i]) {
- classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
- classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob);
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob));
}
}
// after current phrase
for (int i = end; i < std::min(end + m_size, inputLen); i++) {
BOOST_FOREACH(const Sense &sense, senses[i]) {
- classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
- classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob);
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob));
}
}
}
diff --git a/moses/FF/VW/VWFeatureSourceWindow.h b/moses/FF/VW/VWFeatureSourceWindow.h
index 5205e4f2f..14c617586 100644
--- a/moses/FF/VW/VWFeatureSourceWindow.h
+++ b/moses/FF/VW/VWFeatureSourceWindow.h
@@ -20,19 +20,19 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const Range &sourceRange
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
int begin = sourceRange.GetStartPos();
int end = sourceRange.GetEndPos() + 1;
int inputLen = input.GetSize();
for (int i = std::max(0, begin - m_size); i < begin; i++) {
- classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i));
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i)));
}
for (int i = end; i < std::min(end + m_size, inputLen); i++) {
- classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i));
+ outFeatures.push_back(classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i)));
}
}
diff --git a/moses/FF/VW/VWFeatureTarget.h b/moses/FF/VW/VWFeatureTarget.h
index 2935b2b4e..ed936ebf3 100644
--- a/moses/FF/VW/VWFeatureTarget.h
+++ b/moses/FF/VW/VWFeatureTarget.h
@@ -17,15 +17,22 @@ class VWFeatureTarget : public VWFeatureBase
{
public:
VWFeatureTarget(const std::string &line)
- : VWFeatureBase(line, false) {
+ : VWFeatureBase(line, vwft_target) {
}
// Gets its pure virtual functions from VWFeatureBase
virtual void operator()(const InputType &input
- , const InputPath &inputPath
, const Range &sourceRange
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
+ }
+
+ virtual void operator()(const InputType &input
+ , const Phrase &contextPhrase
+ , const AlignmentInfo &alignmentInfo
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
}
virtual void SetParameter(const std::string& key, const std::string& value) {
diff --git a/moses/FF/VW/VWFeatureTargetBigrams.h b/moses/FF/VW/VWFeatureTargetBigrams.h
index 6f3f35270..30264dbf5 100644
--- a/moses/FF/VW/VWFeatureTargetBigrams.h
+++ b/moses/FF/VW/VWFeatureTargetBigrams.h
@@ -17,11 +17,11 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 1; i < targetPhrase.GetSize(); i++) {
- classifier.AddLabelDependentFeature("tbigram^" + GetWord(targetPhrase, i - 1) + "^" + GetWord(targetPhrase, i));
+ outFeatures.push_back(classifier.AddLabelDependentFeature("tbigram^" + GetWord(targetPhrase, i - 1) + "^" + GetWord(targetPhrase, i)));
}
}
diff --git a/moses/FF/VW/VWFeatureTargetIndicator.h b/moses/FF/VW/VWFeatureTargetIndicator.h
index 39d8a37a0..0195990d0 100644
--- a/moses/FF/VW/VWFeatureTargetIndicator.h
+++ b/moses/FF/VW/VWFeatureTargetIndicator.h
@@ -17,10 +17,10 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const {
- classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors));
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
+ outFeatures.push_back(classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors)));
}
virtual void SetParameter(const std::string& key, const std::string& value) {
diff --git a/moses/FF/VW/VWFeatureTargetPhraseInternal.h b/moses/FF/VW/VWFeatureTargetPhraseInternal.h
index e376a1ed3..8a9928aaa 100644
--- a/moses/FF/VW/VWFeatureTargetPhraseInternal.h
+++ b/moses/FF/VW/VWFeatureTargetPhraseInternal.h
@@ -17,11 +17,11 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
- classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i));
+ outFeatures.push_back(classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i)));
}
}
diff --git a/moses/FF/VW/VWFeatureTargetPhraseScores.h b/moses/FF/VW/VWFeatureTargetPhraseScores.h
index 5a4519fb1..6c9ab63d2 100644
--- a/moses/FF/VW/VWFeatureTargetPhraseScores.h
+++ b/moses/FF/VW/VWFeatureTargetPhraseScores.h
@@ -20,9 +20,9 @@ public:
}
void operator()(const InputType &input
- , const InputPath &inputPath
, const TargetPhrase &targetPhrase
- , Discriminative::Classifier &classifier) const {
+ , Discriminative::Classifier &classifier
+ , Discriminative::FeatureVector &outFeatures) const {
std::vector<FeatureFunction*> features = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < features.size(); i++) {
std::string fname = features[i]->GetScoreProducerDescription();
@@ -31,7 +31,7 @@ public:
std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(features[i]);
for(size_t j = 0; j < scores.size(); ++j)
- classifier.AddLabelDependentFeature(fname + "^" + boost::lexical_cast<std::string>(j), scores[j]);
+ outFeatures.push_back(classifier.AddLabelDependentFeature(fname + "^" + boost::lexical_cast<std::string>(j), scores[j]));
}
}
diff --git a/moses/FF/VW/VWState.cpp b/moses/FF/VW/VWState.cpp
new file mode 100644
index 000000000..c876c38fb
--- /dev/null
+++ b/moses/FF/VW/VWState.cpp
@@ -0,0 +1,70 @@
+#include "VWState.h"
+
+#include "moses/FF/FFState.h"
+#include "moses/Phrase.h"
+#include "moses/Hypothesis.h"
+#include "moses/Util.h"
+#include "moses/TypeDef.h"
+#include "moses/StaticData.h"
+#include "moses/TranslationOption.h"
+#include <boost/functional/hash.hpp>
+
+namespace Moses {
+
+VWState::VWState() : m_spanStart(0), m_spanEnd(0) {
+ ComputeHash();
+}
+
+VWState::VWState(const Phrase &phrase)
+ : m_phrase(phrase), m_spanStart(0), m_spanEnd(0) {
+ ComputeHash();
+}
+
+VWState::VWState(const VWState &prevState, const Hypothesis &curHypo) {
+ VERBOSE(3, "VW :: updating state\n>> previous state: " << prevState << "\n");
+
+ // copy phrase from previous state
+ Phrase phrase = prevState.GetPhrase();
+ size_t contextSize = phrase.GetSize(); // identical to VWFeatureBase::GetMaximumContextSize()
+
+ // add words from current hypothesis
+ phrase.Append(curHypo.GetCurrTargetPhrase());
+
+ VERBOSE(3, ">> current hypo: " << curHypo.GetCurrTargetPhrase() << "\n");
+
+ // get a slice of appropriate length
+ Range range(phrase.GetSize() - contextSize, phrase.GetSize() - 1);
+ m_phrase = phrase.GetSubString(range);
+
+ // set current span start/end
+ m_spanStart = curHypo.GetTranslationOption().GetStartPos();
+ m_spanEnd = curHypo.GetTranslationOption().GetEndPos();
+
+ // compute our hash
+ ComputeHash();
+
+ VERBOSE(3, ">> updated state: " << *this << "\n");
+}
+
+bool VWState::operator==(const FFState& o) const {
+ const VWState &other = static_cast<const VWState &>(o);
+
+ return m_phrase == other.GetPhrase()
+ && m_spanStart == other.GetSpanStart()
+ && m_spanEnd == other.GetSpanEnd();
+}
+
+void VWState::ComputeHash() {
+ m_hash = 0;
+
+ boost::hash_combine(m_hash, m_phrase);
+ boost::hash_combine(m_hash, m_spanStart);
+ boost::hash_combine(m_hash, m_spanEnd);
+}
+
+std::ostream &operator<<(std::ostream &out, const VWState &state) {
+ out << state.GetPhrase() << "::" << state.GetSpanStart() << "-" << state.GetSpanEnd();
+ return out;
+}
+
+}
diff --git a/moses/FF/VW/VWState.h b/moses/FF/VW/VWState.h
new file mode 100644
index 000000000..5f434a041
--- /dev/null
+++ b/moses/FF/VW/VWState.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <ostream>
+
+#include "moses/FF/FFState.h"
+#include "moses/Phrase.h"
+#include "moses/Hypothesis.h"
+
+namespace Moses {
+
+/**
+ * VW state, used in decoding (when target context is enabled).
+ */
+class VWState : public FFState {
+public:
+ // empty state, used only when VWState is ignored
+ VWState();
+
+ // used for construction of the initial VW state
+ VWState(const Phrase &phrase);
+
+ // continue from previous VW state with a new hypothesis
+ VWState(const VWState &prevState, const Hypothesis &curHypo);
+
+ virtual bool operator==(const FFState& o) const;
+
+ inline virtual size_t hash() const {
+ return m_hash;
+ }
+
+ inline const Phrase &GetPhrase() const {
+ return m_phrase;
+ }
+
+ inline size_t GetSpanStart() const {
+ return m_spanStart;
+ }
+
+ inline size_t GetSpanEnd() const {
+ return m_spanEnd;
+ }
+
+private:
+ void ComputeHash();
+
+ Phrase m_phrase;
+ size_t m_spanStart, m_spanEnd;
+ size_t m_hash;
+};
+
+// how to print a VW state
+std::ostream &operator<<(std::ostream &out, const VWState &state);
+
+}
diff --git a/moses/FF/VW/VWTargetSentence.h b/moses/FF/VW/VWTargetSentence.h
new file mode 100644
index 000000000..beff4f861
--- /dev/null
+++ b/moses/FF/VW/VWTargetSentence.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <vector>
+
+#include "moses/AlignmentInfo.h"
+#include "moses/Phrase.h"
+
+#include "AlignmentConstraint.h"
+
+namespace Moses
+{
+
+/**
+ * VW thread-specific data about target sentence.
+ */
+class VWTargetSentence {
+public:
+ VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
+
+ void Clear() {
+ if (m_sentence) delete m_sentence;
+ if (m_alignment) delete m_alignment;
+ }
+
+ ~VWTargetSentence() {
+ Clear();
+ }
+
+ void SetConstraints(size_t sourceSize) {
+ // initialize to unconstrained
+ m_sourceConstraints.assign(sourceSize, AlignmentConstraint());
+ m_targetConstraints.assign(m_sentence->GetSize(), AlignmentConstraint());
+
+ // set constraints according to alignment points
+ AlignmentInfo::const_iterator it;
+ for (it = m_alignment->begin(); it != m_alignment->end(); it++) {
+ int src = it->first;
+ int tgt = it->second;
+
+ if (src >= m_sourceConstraints.size() || tgt >= m_targetConstraints.size()) {
+ UTIL_THROW2("VW :: alignment point out of bounds: " << src << "-" << tgt);
+ }
+
+ m_sourceConstraints[src].Update(tgt);
+ m_targetConstraints[tgt].Update(src);
+ }
+ }
+
+ Phrase *m_sentence;
+ AlignmentInfo *m_alignment;
+ std::vector<AlignmentConstraint> m_sourceConstraints, m_targetConstraints;
+};
+
+}