From 86ee3e15a441aec72eaebdd0389fa925da2316c7 Mon Sep 17 00:00:00 2001 From: Matthias Huck Date: Wed, 29 Jan 2014 18:37:42 +0000 Subject: new version of the `score` tool which is now capable of dealing with additional properties in an appropriate manner --- phrase-extract/InternalStructFeature.cpp | 78 ++++++++++++-------------------- 1 file changed, 29 insertions(+), 49 deletions(-) (limited to 'phrase-extract/InternalStructFeature.cpp') diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp index e0e9fd3e2..3757b0e43 100644 --- a/phrase-extract/InternalStructFeature.cpp +++ b/phrase-extract/InternalStructFeature.cpp @@ -1,50 +1,30 @@ #include "InternalStructFeature.h" +#include using namespace std; namespace MosesTraining { -InternalStructFeature::InternalStructFeature() - :m_type(0){ - //cout<<"InternalStructFeature: Construct "< if the dense score is the same - //-> if the sparse feature is set - // compare phrases? with the internalStrucutre string? - /** Return true if the two phrase pairs are equal from the point of this feature. Assume - that they already compare true according to PhraseAlignment.equals() - **/ - -/* if(lhs.ghkmParse==rhs.ghkmParse) - return true; - else - return false; -*/ - //return true; -} - void InternalStructFeature::add(const ScoreFeatureContext& context, - std::vector& denseValues, - std::map& sparseValues) const{ - for(size_t i=0; itreeFragment, denseValues, sparseValues); - } - + std::vector& denseValues, + std::map& sparseValues) const { + const std::map *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only? + for ( std::map::const_iterator iter=allTrees->begin(); + iter!=allTrees->end(); ++iter ) { + add(&(iter->first), iter->second, denseValues, sparseValues); + } } -void InternalStructFeatureDense::add(std::string *internalStruct, - std::vector& denseValues, - std::map& sparseValues) const{ +void InternalStructFeatureDense::add(const std::string *treeFragment, + float count, + std::vector& denseValues, + std::map& sparseValues) const { //cout<<"Dense: "<<*internalStruct<find("NP", start)) != string::npos) { - countNP++; + while((start = treeFragment->find("NP", start)) != string::npos) { + countNP += count; start+=2; //length of "NP" } //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln? @@ -53,21 +33,21 @@ void InternalStructFeatureDense::add(std::string *internalStruct, } -void InternalStructFeatureSparse::add(std::string *internalStruct, - std::vector& denseValues, - std::map& sparseValues) const{ - //cout<<"Sparse: "<<*internalStruct<find("VBZ")!=std::string::npos) - sparseValues["NTVBZ"] = 1; - if(internalStruct->find("VBD")!=std::string::npos) - sparseValues["NTVBD"] = 1; - if(internalStruct->find("VBP")!=std::string::npos) - sparseValues["NTVBP"] = 1; - if(internalStruct->find("PP")!=std::string::npos) - sparseValues["NTPP"] = 1; - if(internalStruct->find("SBAR")!=std::string::npos) - sparseValues["NTSBAR"] = 1; - +void InternalStructFeatureSparse::add(const std::string *treeFragment, + float count, + std::vector& denseValues, + std::map& sparseValues) const { + //cout<<"Sparse: "<<*internalStruct<find("VBZ")!=std::string::npos) + sparseValues["NTVBZ"] += count; + if(treeFragment->find("VBD")!=std::string::npos) + sparseValues["NTVBD"] += count; + if(treeFragment->find("VBP")!=std::string::npos) + sparseValues["NTVBP"] += count; + if(treeFragment->find("PP")!=std::string::npos) + sparseValues["NTPP"] += count; + if(treeFragment->find("SBAR")!=std::string::npos) + sparseValues["NTSBAR"] += count; } -- cgit v1.2.3