Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormaria nadejde <maria.nadejde@gmail.com>2013-09-13 14:45:46 +0400
committermaria nadejde <maria.nadejde@gmail.com>2013-09-13 14:45:46 +0400
commitbff123635ea9773d50f8536885ba409d24175749 (patch)
treebe44f62caa8742d99c03067eec6545cfb3385607 /phrase-extract
parent43a9323d0fa68d49c1a2d0f0c8c50ef758d80baf (diff)
added Dense and Sparse feature to scorer
Diffstat (limited to 'phrase-extract')
-rw-r--r--phrase-extract/InternalStructFeature.cpp26
-rw-r--r--phrase-extract/InternalStructFeature.h6
-rw-r--r--phrase-extract/ScoreFeature.cpp19
-rw-r--r--phrase-extract/ScoreFeatureTest.cpp4
4 files changed, 38 insertions, 17 deletions
diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp
index 160a0baa2..5f558f4d7 100644
--- a/phrase-extract/InternalStructFeature.cpp
+++ b/phrase-extract/InternalStructFeature.cpp
@@ -7,7 +7,7 @@ namespace MosesTraining
InternalStructFeature::InternalStructFeature()
:m_type(0){
- cout<<"InternalStructFeature: Construct "<<m_type<<"\n";
+ //cout<<"InternalStructFeature: Construct "<<m_type<<"\n";
}
bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const{
@@ -25,21 +25,37 @@ bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlign
void InternalStructFeature::add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const{
- std::string *internalStruct=new string("(NP((DT)(NN)))");
- add(internalStruct, denseValues, sparseValues);
+ for(size_t i=0; i<context.phrasePair.size(); i++) {
+ add(&context.phrasePair[i]->ghkmParse, denseValues, sparseValues);
+ }
+
}
void InternalStructFeatureDense::add(std::string *internalStruct,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const{
- cout<<internalStruct<<endl;
+ //cout<<"Dense: "<<*internalStruct<<endl;
+ size_t start=0;
+ int countNP=0;
+ while((start = internalStruct->find("NP", start)) != string::npos) {
+ countNP++;
+ start+=2; //length of "NP"
+ }
+ //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
+ //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
+ denseValues.push_back(exp(countNP));
}
void InternalStructFeatureSparse::add(std::string *internalStruct,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const{
- cout<<internalStruct<<endl;
+ //cout<<"Sparse: "<<*internalStruct<<endl;
+ if(internalStruct->find("VBZ")!=std::string::npos)
+ sparseValues["NT_VBZ"] = 1;
+ if(internalStruct->find("VBD")!=std::string::npos)
+ sparseValues["NT_VBD"] = 1;
+
}
diff --git a/phrase-extract/InternalStructFeature.h b/phrase-extract/InternalStructFeature.h
index 353808ef2..bd513a715 100644
--- a/phrase-extract/InternalStructFeature.h
+++ b/phrase-extract/InternalStructFeature.h
@@ -36,7 +36,7 @@ protected:
/** Overriden in subclass */
virtual void add(std::string *internalStruct,
std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+ std::map<std::string,float>& sparseValues) const = 0;
int m_type;
};
@@ -45,7 +45,7 @@ class InternalStructFeatureDense : public InternalStructFeature
{
public:
InternalStructFeatureDense()
- :InternalStructFeature(){m_type=1; std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
+ :InternalStructFeature(){m_type=1;} //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
protected:
virtual void add(std::string *internalStruct,
std::vector<float>& denseValues,
@@ -56,7 +56,7 @@ class InternalStructFeatureSparse : public InternalStructFeature
{
public:
InternalStructFeatureSparse()
- :InternalStructFeature(){m_type=2; std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
+ :InternalStructFeature(){m_type=2;}// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
protected:
virtual void add(std::string *internalStruct,
std::vector<float>& denseValues,
diff --git a/phrase-extract/ScoreFeature.cpp b/phrase-extract/ScoreFeature.cpp
index e967900f9..f98759755 100644
--- a/phrase-extract/ScoreFeature.cpp
+++ b/phrase-extract/ScoreFeature.cpp
@@ -19,7 +19,7 @@
#include "ScoreFeature.h"
#include "domain.h"
-//#include "InternalStructFeature.h"
+#include "InternalStructFeature.h"
using namespace std;
@@ -37,8 +37,9 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
{
bool domainAdded = false;
bool sparseDomainAdded = false;
+
for (size_t i = 0; i < args.size(); ++i) {
- if (args[i] == "--IgnoreSentenceId") {
+ if (args[i] == "--IgnoreSentenceId") {
m_includeSentenceId = true;
} else if (args[i].substr(0,8) == "--Domain") {
string type = args[i].substr(8);
@@ -76,15 +77,17 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
}
sparseDomainAdded = true;
m_includeSentenceId = true;
+ } else if(args[i] == "--GHKMFeatureSparse"){
+ //MARIA
+ m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
+ } else if(args[i] == "--GHKMFeatureDense"){
+ //MARIA
+ m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
} else {
UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
- }
- //MARIA
- //m_features.push_back(ScoreFeaturePtr(new InternalStructSparseFeature()));
+ }
+
}
- //MARIA
- //m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
- //InternalStructFeatureSparse *test=new InternalStructFeatureSparse();
}
diff --git a/phrase-extract/ScoreFeatureTest.cpp b/phrase-extract/ScoreFeatureTest.cpp
index f4570fe30..65f1f4437 100644
--- a/phrase-extract/ScoreFeatureTest.cpp
+++ b/phrase-extract/ScoreFeatureTest.cpp
@@ -69,7 +69,9 @@ static void checkDomainConfigured(
ScoreFeatureManager manager;
manager.configure(args);
const std::vector<ScoreFeaturePtr>& features = manager.getFeatures();
- BOOST_REQUIRE_EQUAL(features.size(), 1);
+ //BOOST_REQUIRE_EQUAL(features.size(), 2);
+ //if I add to features this check will fail?
+ BOOST_REQUIRE_EQUAL(features.size(), 1); //MARIA -> what is this check and why does it fail when I add my feature?
Expected* feature = dynamic_cast<Expected*>(features[0].get());
BOOST_REQUIRE(feature);
BOOST_CHECK(manager.includeSentenceId());