diff options
author | Matthias Huck <huck@i6.informatik.rwth-aachen.de> | 2014-01-29 22:37:42 +0400 |
---|---|---|
committer | Matthias Huck <huck@i6.informatik.rwth-aachen.de> | 2014-01-29 22:37:42 +0400 |
commit | 86ee3e15a441aec72eaebdd0389fa925da2316c7 (patch) | |
tree | 6de24e964968820fb708a2e37a935c40a2a1494e /phrase-extract/InternalStructFeature.cpp | |
parent | ffd62e994ecb88358b5f3aa835f84d441ec58c77 (diff) |
new version of the `score` tool
which is now capable of dealing with additional properties in an appropriate manner
Diffstat (limited to 'phrase-extract/InternalStructFeature.cpp')
-rw-r--r-- | phrase-extract/InternalStructFeature.cpp | 78 |
1 files changed, 29 insertions, 49 deletions
diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp index e0e9fd3e2..3757b0e43 100644 --- a/phrase-extract/InternalStructFeature.cpp +++ b/phrase-extract/InternalStructFeature.cpp @@ -1,50 +1,30 @@ #include "InternalStructFeature.h" +#include <map> using namespace std; namespace MosesTraining { -InternalStructFeature::InternalStructFeature() - :m_type(0){ - //cout<<"InternalStructFeature: Construct "<<m_type<<"\n"; -} - -bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const{ - //cout<<"InternalStructFeature: Equals\n"; - //don't know what it's used for and what we should compare - //-> if the dense score is the same - //-> if the sparse feature is set - // compare phrases? with the internalStrucutre string? - /** Return true if the two phrase pairs are equal from the point of this feature. Assume - that they already compare true according to PhraseAlignment.equals() - **/ - -/* if(lhs.ghkmParse==rhs.ghkmParse) - return true; - else - return false; -*/ - //return true; -} - void InternalStructFeature::add(const ScoreFeatureContext& context, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const{ - for(size_t i=0; i<context.phrasePair.size(); i++) { - add(&context.phrasePair[i]->treeFragment, denseValues, sparseValues); - } - + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const { + const std::map<std::string,float> *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only? + for ( std::map<std::string,float>::const_iterator iter=allTrees->begin(); + iter!=allTrees->end(); ++iter ) { + add(&(iter->first), iter->second, denseValues, sparseValues); + } } -void InternalStructFeatureDense::add(std::string *internalStruct, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const{ +void InternalStructFeatureDense::add(const std::string *treeFragment, + float count, + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const { //cout<<"Dense: "<<*internalStruct<<endl; size_t start=0; int countNP=0; - while((start = internalStruct->find("NP", start)) != string::npos) { - countNP++; + while((start = treeFragment->find("NP", start)) != string::npos) { + countNP += count; start+=2; //length of "NP" } //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln? @@ -53,21 +33,21 @@ void InternalStructFeatureDense::add(std::string *internalStruct, } -void InternalStructFeatureSparse::add(std::string *internalStruct, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const{ - //cout<<"Sparse: "<<*internalStruct<<endl; - if(internalStruct->find("VBZ")!=std::string::npos) - sparseValues["NTVBZ"] = 1; - if(internalStruct->find("VBD")!=std::string::npos) - sparseValues["NTVBD"] = 1; - if(internalStruct->find("VBP")!=std::string::npos) - sparseValues["NTVBP"] = 1; - if(internalStruct->find("PP")!=std::string::npos) - sparseValues["NTPP"] = 1; - if(internalStruct->find("SBAR")!=std::string::npos) - sparseValues["NTSBAR"] = 1; - +void InternalStructFeatureSparse::add(const std::string *treeFragment, + float count, + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const { + //cout<<"Sparse: "<<*internalStruct<<endl; + if(treeFragment->find("VBZ")!=std::string::npos) + sparseValues["NTVBZ"] += count; + if(treeFragment->find("VBD")!=std::string::npos) + sparseValues["NTVBD"] += count; + if(treeFragment->find("VBP")!=std::string::npos) + sparseValues["NTVBP"] += count; + if(treeFragment->find("PP")!=std::string::npos) + sparseValues["NTPP"] += count; + if(treeFragment->find("SBAR")!=std::string::npos) + sparseValues["NTSBAR"] += count; } |