diff options
author | maria nadejde <maria.nadejde@gmail.com> | 2013-09-13 14:45:46 +0400 |
---|---|---|
committer | maria nadejde <maria.nadejde@gmail.com> | 2013-09-13 14:45:46 +0400 |
commit | bff123635ea9773d50f8536885ba409d24175749 (patch) | |
tree | be44f62caa8742d99c03067eec6545cfb3385607 | |
parent | 43a9323d0fa68d49c1a2d0f0c8c50ef758d80baf (diff) |
added Dense and Sparse feature to scorer
-rw-r--r-- | phrase-extract/InternalStructFeature.cpp | 26 | ||||
-rw-r--r-- | phrase-extract/InternalStructFeature.h | 6 | ||||
-rw-r--r-- | phrase-extract/ScoreFeature.cpp | 19 | ||||
-rw-r--r-- | phrase-extract/ScoreFeatureTest.cpp | 4 |
4 files changed, 38 insertions, 17 deletions
diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp index 160a0baa2..5f558f4d7 100644 --- a/phrase-extract/InternalStructFeature.cpp +++ b/phrase-extract/InternalStructFeature.cpp @@ -7,7 +7,7 @@ namespace MosesTraining InternalStructFeature::InternalStructFeature() :m_type(0){ - cout<<"InternalStructFeature: Construct "<<m_type<<"\n"; + //cout<<"InternalStructFeature: Construct "<<m_type<<"\n"; } bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const{ @@ -25,21 +25,37 @@ bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlign void InternalStructFeature::add(const ScoreFeatureContext& context, std::vector<float>& denseValues, std::map<std::string,float>& sparseValues) const{ - std::string *internalStruct=new string("(NP((DT)(NN)))"); - add(internalStruct, denseValues, sparseValues); + for(size_t i=0; i<context.phrasePair.size(); i++) { + add(&context.phrasePair[i]->ghkmParse, denseValues, sparseValues); + } + } void InternalStructFeatureDense::add(std::string *internalStruct, std::vector<float>& denseValues, std::map<std::string,float>& sparseValues) const{ - cout<<internalStruct<<endl; + //cout<<"Dense: "<<*internalStruct<<endl; + size_t start=0; + int countNP=0; + while((start = internalStruct->find("NP", start)) != string::npos) { + countNP++; + start+=2; //length of "NP" + } + //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln? + //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 ) + denseValues.push_back(exp(countNP)); } void InternalStructFeatureSparse::add(std::string *internalStruct, std::vector<float>& denseValues, std::map<std::string,float>& sparseValues) const{ - cout<<internalStruct<<endl; + //cout<<"Sparse: "<<*internalStruct<<endl; + if(internalStruct->find("VBZ")!=std::string::npos) + sparseValues["NT_VBZ"] = 1; + if(internalStruct->find("VBD")!=std::string::npos) + sparseValues["NT_VBD"] = 1; + } diff --git a/phrase-extract/InternalStructFeature.h b/phrase-extract/InternalStructFeature.h index 353808ef2..bd513a715 100644 --- a/phrase-extract/InternalStructFeature.h +++ b/phrase-extract/InternalStructFeature.h @@ -36,7 +36,7 @@ protected: /** Overriden in subclass */ virtual void add(std::string *internalStruct, std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const; + std::map<std::string,float>& sparseValues) const = 0; int m_type; }; @@ -45,7 +45,7 @@ class InternalStructFeatureDense : public InternalStructFeature { public: InternalStructFeatureDense() - :InternalStructFeature(){m_type=1; std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";} + :InternalStructFeature(){m_type=1;} //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";} protected: virtual void add(std::string *internalStruct, std::vector<float>& denseValues, @@ -56,7 +56,7 @@ class InternalStructFeatureSparse : public InternalStructFeature { public: InternalStructFeatureSparse() - :InternalStructFeature(){m_type=2; std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";} + :InternalStructFeature(){m_type=2;}// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";} protected: virtual void add(std::string *internalStruct, std::vector<float>& denseValues, diff --git a/phrase-extract/ScoreFeature.cpp b/phrase-extract/ScoreFeature.cpp index e967900f9..f98759755 100644 --- a/phrase-extract/ScoreFeature.cpp +++ b/phrase-extract/ScoreFeature.cpp @@ -19,7 +19,7 @@ #include "ScoreFeature.h" #include "domain.h" -//#include "InternalStructFeature.h" +#include "InternalStructFeature.h" using namespace std; @@ -37,8 +37,9 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args) { bool domainAdded = false; bool sparseDomainAdded = false; + for (size_t i = 0; i < args.size(); ++i) { - if (args[i] == "--IgnoreSentenceId") { + if (args[i] == "--IgnoreSentenceId") { m_includeSentenceId = true; } else if (args[i].substr(0,8) == "--Domain") { string type = args[i].substr(8); @@ -76,15 +77,17 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args) } sparseDomainAdded = true; m_includeSentenceId = true; + } else if(args[i] == "--GHKMFeatureSparse"){ + //MARIA + m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse())); + } else if(args[i] == "--GHKMFeatureDense"){ + //MARIA + m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense())); } else { UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]); - } - //MARIA - //m_features.push_back(ScoreFeaturePtr(new InternalStructSparseFeature())); + } + } - //MARIA - //m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse())); - //InternalStructFeatureSparse *test=new InternalStructFeatureSparse(); } diff --git a/phrase-extract/ScoreFeatureTest.cpp b/phrase-extract/ScoreFeatureTest.cpp index f4570fe30..65f1f4437 100644 --- a/phrase-extract/ScoreFeatureTest.cpp +++ b/phrase-extract/ScoreFeatureTest.cpp @@ -69,7 +69,9 @@ static void checkDomainConfigured( ScoreFeatureManager manager; manager.configure(args); const std::vector<ScoreFeaturePtr>& features = manager.getFeatures(); - BOOST_REQUIRE_EQUAL(features.size(), 1); + //BOOST_REQUIRE_EQUAL(features.size(), 2); + //if I add to features this check will fail? + BOOST_REQUIRE_EQUAL(features.size(), 1); //MARIA -> what is this check and why does it fail when I add my feature? Expected* feature = dynamic_cast<Expected*>(features[0].get()); BOOST_REQUIRE(feature); BOOST_CHECK(manager.includeSentenceId()); |