diff options
author | Matthias Huck <huck@i6.informatik.rwth-aachen.de> | 2015-01-09 17:03:18 +0300 |
---|---|---|
committer | Matthias Huck <huck@i6.informatik.rwth-aachen.de> | 2015-01-09 17:03:18 +0300 |
commit | 168118d2524c6397864fd4e93fb35cbc08cc20a0 (patch) | |
tree | 33624adfab06b2d901fea8bf37ce5949d204ad21 | |
parent | 52eac4058f715ab2275ec3620ba0236f593a5907 (diff) |
PhraseOrientationFeature efficiency improvement
-rw-r--r-- | moses/FF/PhraseOrientationFeature.cpp | 32 | ||||
-rw-r--r-- | phrase-extract/extract-ghkm/PhraseOrientation.cpp | 58 | ||||
-rw-r--r-- | phrase-extract/extract-ghkm/PhraseOrientation.h | 11 |
3 files changed, 80 insertions, 21 deletions
diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp index 8401b326f..86a6d09ce 100644 --- a/moses/FF/PhraseOrientationFeature.cpp +++ b/moses/FF/PhraseOrientationFeature.cpp @@ -69,27 +69,27 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied( // const Factor* targetLHS = currTarPhr.GetTargetLHS()[0]; // bool isGlueGrammarRule = false; - FEATUREVERBOSE(2, *currSrcPhr << std::endl); - FEATUREVERBOSE(2, currTarPhr << std::endl); - - Moses::GHKM::Alignment alignment; // TODO: Efficiency! It's not necessary to fill a Moses::GHKM::Alignment object and then touch everything again in Moses::GHKM::PhraseOrientation's constructor - - for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignTerm().begin(); - it!=currTarPhr.GetAlignTerm().end(); ++it) + IFFEATUREVERBOSE(2) { - alignment.push_back(std::make_pair(it->first, it->second)); - FEATUREVERBOSE(2, "alignTerm " << it->first << " " << it->second << std::endl); - } + FEATUREVERBOSE(2, *currSrcPhr << std::endl); + FEATUREVERBOSE(2, currTarPhr << std::endl); - for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignNonTerm().begin(); - it!=currTarPhr.GetAlignNonTerm().end(); ++it) - { - alignment.push_back(std::make_pair(it->first, it->second)); - FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl); + for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignTerm().begin(); + it!=currTarPhr.GetAlignTerm().end(); ++it) + { + FEATUREVERBOSE(2, "alignTerm " << it->first << " " << it->second << std::endl); + } + + for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignNonTerm().begin(); + it!=currTarPhr.GetAlignNonTerm().end(); ++it) + { + FEATUREVERBOSE(2, "alignNonTerm " << it->first << " " << it->second << std::endl); + } } // Initialize phrase orientation scoring object - Moses::GHKM::PhraseOrientation phraseOrientation(currSrcPhr->GetSize(), currTarPhr.GetSize(), alignment); // TODO: Efficiency! This should be precomputed. + Moses::GHKM::PhraseOrientation phraseOrientation(currSrcPhr->GetSize(), currTarPhr.GetSize(), + currTarPhr.GetAlignTerm(), currTarPhr.GetAlignNonTerm()); // Get index map for underlying hypotheses const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.cpp b/phrase-extract/extract-ghkm/PhraseOrientation.cpp index 5a8452f42..f9a7af8c2 100644 --- a/phrase-extract/extract-ghkm/PhraseOrientation.cpp +++ b/phrase-extract/extract-ghkm/PhraseOrientation.cpp @@ -40,7 +40,6 @@ PhraseOrientation::PhraseOrientation(int sourceSize, : m_countF(sourceSize) , m_countE(targetSize) { - // prepare data structures for alignments std::vector<std::vector<int> > alignedToS; for(int i=0; i<m_countF; ++i) { @@ -54,11 +53,60 @@ PhraseOrientation::PhraseOrientation(int sourceSize, std::vector<int> alignedCountS(m_countF,0); for (Alignment::const_iterator a=alignment.begin(); a!=alignment.end(); ++a) { - m_alignedToT[a->second].push_back(a->first); - alignedCountS[a->first]++; alignedToS[a->first].push_back(a->second); + alignedCountS[a->first]++; + m_alignedToT[a->second].push_back(a->first); } + Init(sourceSize, targetSize, m_alignedToT, alignedToS, alignedCountS); +} + + +PhraseOrientation::PhraseOrientation(int sourceSize, + int targetSize, + const Moses::AlignmentInfo &alignTerm, + const Moses::AlignmentInfo &alignNonTerm) + : m_countF(sourceSize) + , m_countE(targetSize) +{ + // prepare data structures for alignments + std::vector<std::vector<int> > alignedToS; + for(int i=0; i<m_countF; ++i) { + std::vector< int > dummy; + alignedToS.push_back(dummy); + } + for(int i=0; i<m_countE; ++i) { + std::vector< int > dummy; + m_alignedToT.push_back(dummy); + } + std::vector<int> alignedCountS(m_countF,0); + + for (Moses::AlignmentInfo::const_iterator it=alignTerm.begin(); + it!=alignTerm.end(); ++it) + { + alignedToS[it->first].push_back(it->second); + alignedCountS[it->first]++; + m_alignedToT[it->second].push_back(it->first); + } + + for (Moses::AlignmentInfo::const_iterator it=alignNonTerm.begin(); + it!=alignNonTerm.end(); ++it) + { + alignedToS[it->first].push_back(it->second); + alignedCountS[it->first]++; + m_alignedToT[it->second].push_back(it->first); + } + + Init(sourceSize, targetSize, m_alignedToT, alignedToS, alignedCountS); +} + + +void PhraseOrientation::Init(int sourceSize, + int targetSize, + const std::vector<std::vector<int> > &alignedToT, + const std::vector<std::vector<int> > &alignedToS, + const std::vector<int> &alignedCountS) +{ for (int startF=0; startF<m_countF; ++startF) { for (int endF=startF; endF<m_countF; ++endF) { @@ -89,8 +137,8 @@ PhraseOrientation::PhraseOrientation(int sourceSize, int maxF = -1; std::vector< int > usedF = alignedCountS; for (int ei=startE; ei<=endE; ++ei) { - for (size_t i=0; i<m_alignedToT[ei].size(); ++i) { - int fi = m_alignedToT[ei][i]; + for (size_t i=0; i<alignedToT[ei].size(); ++i) { + int fi = alignedToT[ei][i]; if (fi<minF) { minF = fi; } diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.h b/phrase-extract/extract-ghkm/PhraseOrientation.h index 313c1f3df..aac9c34d1 100644 --- a/phrase-extract/extract-ghkm/PhraseOrientation.h +++ b/phrase-extract/extract-ghkm/PhraseOrientation.h @@ -21,6 +21,7 @@ #pragma once #include "Alignment.h" +#include "moses/AlignmentInfo.h" #include <map> #include <set> @@ -50,6 +51,11 @@ public: int targetSize, const Alignment &alignment); + PhraseOrientation(int sourceSize, + int targetSize, + const AlignmentInfo &alignTerm, + const AlignmentInfo &alignNonTerm); + REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const; REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const; const std::string GetOrientationInfoString(int startF, int endF, REO_DIR direction=REO_DIR_BIDIR) const; @@ -63,6 +69,11 @@ public: private: + void Init(int sourceSize, int targetSize, + const std::vector<std::vector<int> > &alignedToT, + const std::vector<std::vector<int> > &alignedToS, + const std::vector<int> &alignedCountS); + void InsertVertex( HSentenceVertices & corners, int x, int y ); void InsertPhraseVertices(HSentenceVertices & topLeft, |