Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatous Machacek <machacekmatous@gmail.com>2012-03-24 19:07:47 +0400
committerMatous Machacek <machacekmatous@gmail.com>2012-03-24 19:07:47 +0400
commit1665a23fe18a07f9f0dd26de627411be9343eb73 (patch)
treec44d7cb122cfba53be2a44583e3e9abfbc195ff7 /mert/SemposScorer.cpp
parentac5ba83ec873491e72c406679aa386fd8924dabf (diff)
Added posibility to weight words in sempos
Diffstat (limited to 'mert/SemposScorer.cpp')
-rw-r--r--mert/SemposScorer.cpp51
1 files changed, 50 insertions, 1 deletions
diff --git a/mert/SemposScorer.cpp b/mert/SemposScorer.cpp
index 5dee29c3a..62a70c510 100644
--- a/mert/SemposScorer.cpp
+++ b/mert/SemposScorer.cpp
@@ -3,20 +3,28 @@
#include <algorithm>
#include <vector>
#include <stdexcept>
+#include <fstream>
#include "Util.h"
+#include "SemposOverlapping.h"
using namespace std;
SemposScorer::SemposScorer(const string& config)
: StatisticsBasedScorer("SEMPOS", config),
- m_ovr(SemposOverlappingFactory::GetOverlapping(getConfig("overlapping", "cap-micro"))),
+ m_ovr(SemposOverlappingFactory::GetOverlapping(getConfig("overlapping", "cap-micro"),this)),
m_enable_debug(false)
{
const string& debugSwitch = getConfig("debug", "0");
if (debugSwitch == "1") m_enable_debug = true;
m_semposMap.clear();
+
+ string weightsfile = getConfig("weightsfile", "");
+ if (weightsfile != "")
+ {
+ loadWeights(weightsfile);
+ }
}
SemposScorer::~SemposScorer() {}
@@ -128,3 +136,44 @@ int SemposScorer::encodeSempos(const string& sempos)
return it->second;
}
}
+
+float SemposScorer::weight(int item) const
+{
+ std::map<int,float>::const_iterator it = weightsMap.find(item);
+ if (it == weightsMap.end())
+ {
+ return 1.0f;
+ }
+ else
+ {
+ return it->second;
+ }
+}
+
+void SemposScorer::loadWeights(const string& weightsfile)
+{
+ string line;
+ ifstream myfile;
+ myfile.open(weightsfile.c_str(), ifstream::in);
+ if (myfile.is_open())
+ {
+ while ( myfile.good() )
+ {
+ getline (myfile,line);
+ vector<string> fields;
+ if (line == "") continue;
+ split(line, '\t', fields);
+ if (fields.size() != 2) throw std::runtime_error("Bad format of a row in weights file.");
+ int encoded = encodeString(fields[0]);
+ float weight = atof(fields[1].c_str());
+ weightsMap[encoded] = weight;
+ }
+ myfile.close();
+ }
+ else
+ {
+ cerr << "Unable to open file "<< weightsfile << endl;
+ exit(1);
+ }
+
+}