Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-23 20:52:24 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-23 21:39:04 +0400
commit5e5f500254465722b1bd6224d675d47e162546f0 (patch)
tree2de109d5e8347e503e926c7b273d4e8c9f2a59f2 /mert/SemposScorer.cpp
parent3319805102a5d76fa8fd4795c2b20a3ab2222667 (diff)
Remove unnecessary headers; prefix private members with "m_".
- Add const. - Add a virtual destructor to abstract class. - Add some TODOs to some constant values. Create files for "overlapping" classes.
Diffstat (limited to 'mert/SemposScorer.cpp')
-rw-r--r--mert/SemposScorer.cpp170
1 files changed, 42 insertions, 128 deletions
diff --git a/mert/SemposScorer.cpp b/mert/SemposScorer.cpp
index b0c9c17e3..820a747c4 100644
--- a/mert/SemposScorer.cpp
+++ b/mert/SemposScorer.cpp
@@ -1,39 +1,30 @@
-#include <sys/types.h>
-#include <unistd.h>
-#include <sstream>
+#include "SemposScorer.h"
+
+#include <algorithm>
#include <vector>
#include <stdexcept>
-#include <algorithm>
-#include <set>
-#include <map>
-#include <iterator>
-#include "SemposScorer.h"
#include "Util.h"
+using namespace std;
+
SemposScorer::SemposScorer(const string& config)
- : StatisticsBasedScorer("SEMPOS",config),
- debug(false)
+ : StatisticsBasedScorer("SEMPOS", config),
+ m_ovr(SemposOverlappingFactory::GetOverlapping(getConfig("overlapping", "cap-micro"))),
+ m_enable_debug(false)
{
- string debugSwitch = getConfig("debug", "0");
- if (debugSwitch == "1") debug = true;
-
- string overlapping = getConfig("overlapping", "cap-micro");
- if (overlapping == "cap-micro") {
- ovr = new CapMicroOverlapping();
- } else if (overlapping == "cap-macro") {
- ovr = new CapMacroOverlapping();
- } else {
- throw runtime_error("Unknown overlapping: " + overlapping);
- }
+ const string& debugSwitch = getConfig("debug", "0");
+ if (debugSwitch == "1") m_enable_debug = true;
- semposMap.clear();
+ m_semposMap.clear();
}
+SemposScorer::~SemposScorer() {}
+
void SemposScorer::setReferenceFiles(const vector<string>& referenceFiles)
{
//make sure reference data is clear
- ref_sentences.clear();
+ m_ref_sentences.clear();
//load reference data
for (size_t rid = 0; rid < referenceFiles.size(); ++rid) {
@@ -41,10 +32,10 @@ void SemposScorer::setReferenceFiles(const vector<string>& referenceFiles)
if (!refin) {
throw runtime_error("Unable to open: " + referenceFiles[rid]);
}
- ref_sentences.push_back(vector<sentence_t>());
+ m_ref_sentences.push_back(vector<sentence_t>());
string line;
while (getline(refin,line)) {
- line = applyFactors(line);
+ line = applyFactors(line);
str_sentence_t sentence;
splitSentence(line, sentence);
@@ -52,68 +43,58 @@ void SemposScorer::setReferenceFiles(const vector<string>& referenceFiles)
sentence_t encodedSentence;
encodeSentence(sentence, encodedSentence);
- ref_sentences[rid].push_back(encodedSentence);
+ m_ref_sentences[rid].push_back(encodedSentence);
}
}
}
void SemposScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
- vector<int> stats;
-
- string sentence = applyFactors(text);
+ vector<ScoreStatsType> stats;
+ const string& sentence = applyFactors(text);
str_sentence_t splitCandSentence;
splitSentence(sentence, splitCandSentence);
sentence_t encodedCandSentence;
encodeSentence(splitCandSentence, encodedCandSentence);
- if (ref_sentences.size() == 1) {
- stats = ovr->prepareStats(encodedCandSentence, ref_sentences[0][sid]);
+ if (m_ref_sentences.size() == 1) {
+ stats = m_ovr->prepareStats(encodedCandSentence, m_ref_sentences[0][sid]);
} else {
- float max = -1;
- for (size_t rid = 0; rid < ref_sentences.size(); ++rid) {
- vector<int> tmp = ovr->prepareStats(encodedCandSentence, ref_sentences[rid][sid]);
- if (ovr->calculateScore(tmp) > max) {
+ float max = -1.0f;
+ for (size_t rid = 0; rid < m_ref_sentences.size(); ++rid) {
+ const vector<ScoreStatsType>& tmp = m_ovr->prepareStats(encodedCandSentence, m_ref_sentences[rid][sid]);
+ if (m_ovr->calculateScore(tmp) > max) {
stats = tmp;
}
}
}
-
- stringstream sout;
- copy(stats.begin(),stats.end(),ostream_iterator<int>(sout," "));
- string stats_str = sout.str();
- entry.set(stats_str);
-}
-
-float SemposScorer::calculateScore(const vector<int>& comps) const
-{
- return ovr->calculateScore(comps);
+ entry.set(stats);
}
void SemposScorer::splitSentence(const string& sentence, str_sentence_t& splitSentence)
{
splitSentence.clear();
- vector<string> tokens;
+ vector<string> tokens;
split(sentence, ' ', tokens);
for (vector<string>::iterator it = tokens.begin(); it != tokens.end(); ++it)
{
vector<string> factors;
split(*it, '|', factors);
if (factors.size() != 2) throw runtime_error("Sempos scorer accepts two factors (item|class)");
- string Item = factors[0];
- string Class = factors[1];
- splitSentence.push_back(make_pair(Item, Class));
+ const string& item = factors[0];
+ const string& klass = factors[1];
+ splitSentence.push_back(make_pair(item, klass));
}
}
void SemposScorer::encodeSentence(const str_sentence_t& sentence, sentence_t& encodedSentence)
{
for (str_sentence_it it = sentence.begin(); it != sentence.end(); ++it) {
- int tlemma = encodeString(it->first);
- int sempos = encodeSempos(it->second);
+ const int tlemma = encodeString(it->first);
+ const int sempos = encodeSempos(it->second);
if (sempos >= 0) {
encodedSentence.insert(make_pair(tlemma,sempos));
}
@@ -122,11 +103,11 @@ void SemposScorer::encodeSentence(const str_sentence_t& sentence, sentence_t& en
int SemposScorer::encodeString(const string& str)
{
- encoding_it encoding = stringMap.find(str);
+ encoding_it encoding = m_stringMap.find(str);
int encoded_str;
- if (encoding == stringMap.end()) {
- encoded_str = (int)stringMap.size();
- stringMap[str] = encoded_str;
+ if (encoding == m_stringMap.end()) {
+ encoded_str = static_cast<int>(m_stringMap.size());
+ m_stringMap[str] = encoded_str;
} else {
encoded_str = encoding->second;
}
@@ -136,15 +117,15 @@ int SemposScorer::encodeString(const string& str)
int SemposScorer::encodeSempos(const string& sempos)
{
if (sempos == "-") return -1;
- encoding_it it = semposMap.find(sempos);
- if (it == semposMap.end())
+ encoding_it it = m_semposMap.find(sempos);
+ if (it == m_semposMap.end())
{
- if (semposMap.size() == maxNOC)
+ if (m_semposMap.size() == kMaxNOC)
{
- throw std::runtime_error("Number of classes is greater than maxNOC");
+ throw std::runtime_error("Number of classes is greater than kMaxNOC");
}
- int classNumber = semposMap.size();
- semposMap[sempos] = classNumber;
+ const int classNumber = static_cast<int>(m_semposMap.size());
+ m_semposMap[sempos] = classNumber;
return classNumber;
}
else
@@ -152,70 +133,3 @@ int SemposScorer::encodeSempos(const string& sempos)
return it->second;
}
}
-
-SemposScorer::~SemposScorer()
-{
- delete ovr;
-}
-
-vector<int> CapMicroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
-{
- vector<int> stats(2);
- sentence_t intersection;
-
- set_intersection(cand.begin(),cand.end(),ref.begin(),ref.end(), inserter(intersection, intersection.begin()));
-
- stats[0] = intersection.size();
- stats[1] = ref.size();
- return stats;
-}
-
-float CapMicroOverlapping::calculateScore(const vector<int>& stats)
-{
- if (stats.size() != 2)
- {
- throw std::runtime_error("Size of stats vector has to be 2");
- }
- if (stats[1] == 0) return (float) 1;
- return stats[0]/(float)stats[1];
-}
-
-
-vector<int> CapMacroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
-{
- vector<int> stats(2*maxNOC);
- sentence_t intersection;
-
- set_intersection(cand.begin(),cand.end(),ref.begin(),ref.end(), inserter(intersection, intersection.begin()));
-
- for (int i = 0; i < 2*maxNOC; ++i) stats[i]=0;
- for (sentence_t::const_iterator it = intersection.begin(); it != intersection.end(); ++it) {
- int sempos = it->second;
- ++stats[2*sempos];
- }
- for (sentence_t::const_iterator it = ref.begin(); it != ref.end(); ++it) {
- int sempos = it->second;
- ++stats[2*sempos+1];
- }
-
- return stats;
-}
-
-float CapMacroOverlapping::calculateScore(const vector<int>& stats)
-{
- if (stats.size() != 2*maxNOC) throw std::runtime_error("Size of stats vector has to be 38");
-
- int n = 0;
- float sum = 0;
- for (int i = 0; i < maxNOC; ++i) {
- int clipped = stats[2*i];
- int refsize = stats[2*i+1];
- if (refsize > 0) {
- sum += clipped / (float) refsize;
- ++n;
- }
- }
- if (n == 0) return 1;
- return sum / n;
-}
-