Welcome to mirror list, hosted at ThFree Co, Russian Federation.

InternalStructFeature.cpp « phrase-extract - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 3757b0e436043b2a65b2f0bf4d21389bfa12792a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#include "InternalStructFeature.h"
#include <map>

using namespace std;

namespace MosesTraining
{

void InternalStructFeature::add(const ScoreFeatureContext& context,
                                std::vector<float>& denseValues,
                                std::map<std::string,float>& sparseValues) const {
  const std::map<std::string,float> *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only?
  for ( std::map<std::string,float>::const_iterator iter=allTrees->begin();
        iter!=allTrees->end(); ++iter ) {
    add(&(iter->first), iter->second, denseValues, sparseValues);
  }
}

void InternalStructFeatureDense::add(const std::string *treeFragment,
                                     float count,
                                     std::vector<float>& denseValues,
                                     std::map<std::string,float>& sparseValues) const {
	//cout<<"Dense: "<<*internalStruct<<endl;
	size_t start=0;
	int countNP=0;
	while((start = treeFragment->find("NP", start)) != string::npos) {
		countNP += count;
		start+=2; //length of "NP"
	}
	//should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
	//should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
	denseValues.push_back(exp(countNP));

}

void InternalStructFeatureSparse::add(const std::string *treeFragment,
                                      float count,
                                      std::vector<float>& denseValues,
                                      std::map<std::string,float>& sparseValues) const {
  //cout<<"Sparse: "<<*internalStruct<<endl;
  if(treeFragment->find("VBZ")!=std::string::npos)
    sparseValues["NTVBZ"] += count;
  if(treeFragment->find("VBD")!=std::string::npos)
    sparseValues["NTVBD"] += count;
  if(treeFragment->find("VBP")!=std::string::npos)
    sparseValues["NTVBP"] += count;
  if(treeFragment->find("PP")!=std::string::npos)
    sparseValues["NTPP"] += count;
  if(treeFragment->find("SBAR")!=std::string::npos)
    sparseValues["NTSBAR"] += count;
}


}