Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-10 12:12:34 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-10 12:12:34 +0400
commiteb2c9ee5e3e4ed76dd9b155c1b509a22d5ab7f4f (patch)
tree60dd9e1fc444391437237df6a68b2806d1156446
parente4063b0f3b46ebc8a1d9900110e49a09f5241bab (diff)
mert: Prefix private members with "m_" except TER.
Squashed commit of the following: - Clean up PRO. - Clean up ScoreStats. - Clean up ScoreData. - Clean up ScoreArray. - Remove unnecessary headers. - Clean up ScopedVector. - Clean up Point. - Clean up PerScorer. - Clean up Optimizer. - Clean up MergeScorer. - Clean up InterpolatedScorer. - Clean up FileStream. - Clean up FeatureStats. - Remove inefficient string concatenation. - Clean up FeatureData. - Clean up FeatureArray. - Clean up Data.
-rw-r--r--mert/Data.cpp60
-rw-r--r--mert/Data.h52
-rw-r--r--mert/FeatureArray.cpp29
-rw-r--r--mert/FeatureArray.h42
-rw-r--r--mert/FeatureData.cpp59
-rw-r--r--mert/FeatureData.h73
-rw-r--r--mert/FeatureStats.cpp92
-rw-r--r--mert/FeatureStats.h38
-rw-r--r--mert/FileStream.cpp8
-rw-r--r--mert/FileStream.h8
-rw-r--r--mert/InterpolatedScorer.cpp72
-rw-r--r--mert/InterpolatedScorer.h27
-rw-r--r--mert/MergeScorer.cpp3
-rw-r--r--mert/MergeScorer.h14
-rw-r--r--mert/Optimizer.cpp87
-rw-r--r--mert/Optimizer.h17
-rw-r--r--mert/PerScorer.h13
-rw-r--r--mert/Point.cpp107
-rw-r--r--mert/Point.h53
-rw-r--r--mert/ScopedVector.h36
-rw-r--r--mert/ScoreArray.cpp53
-rw-r--r--mert/ScoreArray.h49
-rw-r--r--mert/ScoreData.cpp55
-rw-r--r--mert/ScoreData.h46
-rw-r--r--mert/ScoreStats.cpp42
-rw-r--r--mert/ScoreStats.h39
-rwxr-xr-xmert/mert.cpp2
-rw-r--r--mert/pro.cpp106
28 files changed, 611 insertions, 671 deletions
diff --git a/mert/Data.cpp b/mert/Data.cpp
index c4a35b9b2..3a50a4550 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -18,34 +18,34 @@
#include "Util.h"
Data::Data()
- : theScorer(NULL),
- number_of_scores(0),
- _sparse_flag(false),
- scoredata(),
- featdata() {}
+ : m_scorer(NULL),
+ m_num_scores(0),
+ m_sparse_flag(false),
+ m_score_data(),
+ m_feature_data() {}
Data::Data(Scorer& ptr)
- : theScorer(&ptr),
- score_type(theScorer->getName()),
- number_of_scores(0),
- _sparse_flag(false),
- scoredata(new ScoreData(*theScorer)),
- featdata(new FeatureData)
+ : m_scorer(&ptr),
+ m_score_type(m_scorer->getName()),
+ m_num_scores(0),
+ m_sparse_flag(false),
+ m_score_data(new ScoreData(*m_scorer)),
+ m_feature_data(new FeatureData)
{
- TRACE_ERR("Data::score_type " << score_type << endl);
- TRACE_ERR("Data::Scorer type from Scorer: " << theScorer->getName() << endl);
+ TRACE_ERR("Data::m_score_type " << m_score_type << endl);
+ TRACE_ERR("Data::Scorer type from Scorer: " << m_scorer->getName() << endl);
}
//ADDED BY TS
void Data::remove_duplicates() {
- size_t nSentences = featdata->size();
- assert(scoredata->size() == nSentences);
+ size_t nSentences = m_feature_data->size();
+ assert(m_score_data->size() == nSentences);
for (size_t s=0; s < nSentences; s++) {
- FeatureArray& feat_array = featdata->get(s);
- ScoreArray& score_array = scoredata->get(s);
+ FeatureArray& feat_array = m_feature_data->get(s);
+ ScoreArray& score_array = m_score_data->get(s);
assert(feat_array.size() == score_array.size());
@@ -147,8 +147,8 @@ void Data::loadnbest(const string &file)
getNextPound(line, sentence, "|||"); // second field
getNextPound(line, feature_str, "|||"); // third field
- theScorer->prepareStats(sentence_index, sentence, scoreentry);
- scoredata->add(scoreentry, sentence_index);
+ m_scorer->prepareStats(sentence_index, sentence, scoreentry);
+ m_score_data->add(scoreentry, sentence_index);
// examine first line for name of features
if (!existsFeatureNames()) {
@@ -185,7 +185,7 @@ void Data::InitFeatureMap(const string& str) {
tmp_name = substr.substr(0, substr.size() - 1);
}
}
- featdata->setFeatureMap(features);
+ m_feature_data->setFeatureMap(features);
}
void Data::AddFeatures(const string& str,
@@ -207,10 +207,10 @@ void Data::AddFeatures(const string& str,
string name = substr;
getNextPound(buf, substr);
feature_entry.addSparse(name, atof(substr.c_str()));
- _sparse_flag = true;
+ m_sparse_flag = true;
}
}
- featdata->add(feature_entry, sentence_index);
+ m_feature_data->add(feature_entry, sentence_index);
}
// TODO
@@ -226,8 +226,8 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
CHECK(shard_size >= 0);
CHECK(shard_size <= 1);
- size_t data_size = scoredata->size();
- CHECK(data_size == featdata->size());
+ size_t data_size = m_score_data->size();
+ CHECK(data_size == m_feature_data->size());
shard_size *= data_size;
const float coeff = static_cast<float>(data_size) / shard_count;
@@ -248,15 +248,15 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
}
}
- Scorer* scorer = ScorerFactory::getScorer(score_type, scorerconfig);
+ Scorer* scorer = ScorerFactory::getScorer(m_score_type, scorerconfig);
shards.push_back(Data(*scorer));
- shards.back().score_type = score_type;
- shards.back().number_of_scores = number_of_scores;
- shards.back()._sparse_flag = _sparse_flag;
+ shards.back().m_score_type = m_score_type;
+ shards.back().m_num_scores = m_num_scores;
+ shards.back().m_sparse_flag = m_sparse_flag;
for (size_t i = 0; i < shard_contents.size(); ++i) {
- shards.back().featdata->add(featdata->get(shard_contents[i]));
- shards.back().scoredata->add(scoredata->get(shard_contents[i]));
+ shards.back().m_feature_data->add(m_feature_data->get(shard_contents[i]));
+ shards.back().m_score_data->add(m_score_data->get(shard_contents[i]));
}
//cerr << endl;
}
diff --git a/mert/Data.h b/mert/Data.h
index dbd7c753b..222a58665 100644
--- a/mert/Data.h
+++ b/mert/Data.h
@@ -29,20 +29,18 @@ typedef boost::shared_ptr<FeatureData> FeatureDataHandle;
class Data
{
private:
- Scorer* theScorer;
- std::string score_type;
- size_t number_of_scores;
- bool _sparse_flag;
+ Scorer* m_scorer;
+ std::string m_score_type;
+ size_t m_num_scores;
+ bool m_sparse_flag;
+ ScoreDataHandle m_score_data;
+ FeatureDataHandle m_feature_data;
// Helper functions for loadnbest();
void InitFeatureMap(const std::string& str);
void AddFeatures(const std::string& str,
const std::string& sentence_index);
-protected:
- ScoreDataHandle scoredata;
- FeatureDataHandle featdata;
-
public:
explicit Data(Scorer& sc);
Data();
@@ -51,45 +49,45 @@ public:
//compiler synthesised shallow copy is available
inline void clear() {
- scoredata->clear();
- featdata->clear();
+ m_score_data->clear();
+ m_feature_data->clear();
}
ScoreDataHandle getScoreData() {
- return scoredata;
+ return m_score_data;
}
FeatureDataHandle getFeatureData() {
- return featdata;
+ return m_feature_data;
}
Scorer* getScorer() {
- return theScorer;
+ return m_scorer;
}
inline size_t NumberOfFeatures() const {
- return featdata->NumberOfFeatures();
+ return m_feature_data->NumberOfFeatures();
}
inline void NumberOfFeatures(size_t v) {
- featdata->NumberOfFeatures(v);
+ m_feature_data->NumberOfFeatures(v);
}
inline std::string Features() const {
- return featdata->Features();
+ return m_feature_data->Features();
}
inline void Features(const std::string &f) {
- featdata->Features(f);
+ m_feature_data->Features(f);
}
- inline bool hasSparseFeatures() const { return _sparse_flag; }
+ inline bool hasSparseFeatures() const { return m_sparse_flag; }
void mergeSparseFeatures();
void loadnbest(const std::string &file);
void load(const std::string &featfile,const std::string &scorefile) {
- featdata->load(featfile);
- scoredata->load(scorefile);
- if (featdata->hasSparseFeatures())
- _sparse_flag = true;
+ m_feature_data->load(featfile);
+ m_score_data->load(scorefile);
+ if (m_feature_data->hasSparseFeatures())
+ m_sparse_flag = true;
}
//ADDED BY TS
@@ -101,20 +99,20 @@ public:
if (bin) cerr << "Binary write mode is selected" << endl;
else cerr << "Binary write mode is NOT selected" << endl;
- featdata->save(featfile, bin);
- scoredata->save(scorefile, bin);
+ m_feature_data->save(featfile, bin);
+ m_score_data->save(scorefile, bin);
}
inline bool existsFeatureNames() const {
- return featdata->existsFeatureNames();
+ return m_feature_data->existsFeatureNames();
}
inline std::string getFeatureName(size_t idx) const {
- return featdata->getFeatureName(idx);
+ return m_feature_data->getFeatureName(idx);
}
inline size_t getFeatureIndex(const std::string& name) const {
- return featdata->getFeatureIndex(name);
+ return m_feature_data->getFeatureIndex(name);
}
/**
diff --git a/mert/FeatureArray.cpp b/mert/FeatureArray.cpp
index 594411998..7e9de1cf8 100644
--- a/mert/FeatureArray.cpp
+++ b/mert/FeatureArray.cpp
@@ -12,15 +12,15 @@
FeatureArray::FeatureArray()
- : idx(""), number_of_features(0), _sparse_flag(false) {}
+ : m_index(""), m_num_features(0), m_sparse_flag(false) {}
FeatureArray::~FeatureArray() {}
void FeatureArray::savetxt(std::ofstream& outFile)
{
- outFile << FEATURES_TXT_BEGIN << " " << idx << " " << array_.size()
- << " " << number_of_features << " " << features << std::endl;
- for (featarray_t::iterator i = array_.begin(); i !=array_.end(); i++) {
+ outFile << FEATURES_TXT_BEGIN << " " << m_index << " " << m_array.size()
+ << " " << m_num_features << " " << m_features << std::endl;
+ for (featarray_t::iterator i = m_array.begin(); i != m_array.end(); ++i) {
i->savetxt(outFile);
outFile << std::endl;
}
@@ -29,9 +29,9 @@ void FeatureArray::savetxt(std::ofstream& outFile)
void FeatureArray::savebin(std::ofstream& outFile)
{
- outFile << FEATURES_BIN_BEGIN << " " << idx << " " << array_.size()
- << " " << number_of_features << " " << features << std::endl;
- for (featarray_t::iterator i = array_.begin(); i !=array_.end(); i++)
+ outFile << FEATURES_BIN_BEGIN << " " << m_index << " " << m_array.size()
+ << " " << m_num_features << " " << m_features << std::endl;
+ for (featarray_t::iterator i = m_array.begin(); i != m_array.end(); ++i)
i->savebin(outFile);
outFile << FEATURES_BIN_END << std::endl;
@@ -56,7 +56,7 @@ void FeatureArray::save(const std::string &file, bool bin)
void FeatureArray::loadbin(ifstream& inFile, size_t n)
{
- FeatureStats entry(number_of_features);
+ FeatureStats entry(m_num_features);
for (size_t i=0 ; i < n; i++) {
entry.loadbin(inFile);
@@ -66,13 +66,13 @@ void FeatureArray::loadbin(ifstream& inFile, size_t n)
void FeatureArray::loadtxt(ifstream& inFile, size_t n)
{
- FeatureStats entry(number_of_features);
+ FeatureStats entry(m_num_features);
for (size_t i=0 ; i < n; i++) {
entry.loadtxt(inFile);
add(entry);
if (entry.getSparse().size()>0)
- _sparse_flag = true;
+ m_sparse_flag = true;
}
}
@@ -100,12 +100,12 @@ void FeatureArray::load(ifstream& inFile)
}
getNextPound(stringBuf, substring);
getNextPound(stringBuf, substring);
- idx = substring;
+ m_index = substring;
getNextPound(stringBuf, substring);
number_of_entries = atoi(substring.c_str());
getNextPound(stringBuf, substring);
- number_of_features = atoi(substring.c_str());
- features = stringBuf;
+ m_num_features = atoi(substring.c_str());
+ m_features = stringBuf;
}
(binmode)?loadbin(inFile, number_of_entries):loadtxt(inFile, number_of_entries);
@@ -144,10 +144,9 @@ bool FeatureArray::check_consistency() const
if (sz == 0)
return true;
- for (featarray_t::const_iterator i = array_.begin(); i != array_.end(); i++) {
+ for (featarray_t::const_iterator i = m_array.begin(); i != m_array.end(); i++) {
if (i->size() != sz)
return false;
}
return true;
}
-
diff --git a/mert/FeatureArray.h b/mert/FeatureArray.h
index b4b305e39..fa5590ac1 100644
--- a/mert/FeatureArray.h
+++ b/mert/FeatureArray.h
@@ -26,69 +26,67 @@ class FeatureArray
private:
// idx to identify the utterance. It can differ from
// the index inside the vector.
- std::string idx;
-
-protected:
- featarray_t array_;
- size_t number_of_features;
- std::string features;
- bool _sparse_flag;
+ std::string m_index;
+ featarray_t m_array;
+ size_t m_num_features;
+ std::string m_features;
+ bool m_sparse_flag;
public:
FeatureArray();
~FeatureArray();
inline void clear() {
- array_.clear();
+ m_array.clear();
}
inline bool hasSparseFeatures() const {
- return _sparse_flag;
+ return m_sparse_flag;
}
inline std::string getIndex() const {
- return idx;
+ return m_index;
}
inline void setIndex(const std::string& value) {
- idx = value;
+ m_index = value;
}
inline FeatureStats& get(size_t i) {
- return array_.at(i);
+ return m_array.at(i);
}
inline const FeatureStats& get(size_t i)const {
- return array_.at(i);
+ return m_array.at(i);
}
void add(FeatureStats& e) {
- array_.push_back(e);
+ m_array.push_back(e);
}
//ADDED BY TS
void swap(size_t i, size_t j) {
- std::swap(array_[i],array_[j]);
+ std::swap(m_array[i], m_array[j]);
}
-
+
void resize(size_t new_size) {
- array_.resize(std::min(new_size,array_.size()));
+ m_array.resize(std::min(new_size, m_array.size()));
}
//END_ADDED
void merge(FeatureArray& e);
inline size_t size() const {
- return array_.size();
+ return m_array.size();
}
inline size_t NumberOfFeatures() const {
- return number_of_features;
+ return m_num_features;
}
inline void NumberOfFeatures(size_t v) {
- number_of_features = v;
+ m_num_features = v;
}
inline std::string Features() const {
- return features;
+ return m_features;
}
inline void Features(const std::string& f) {
- features = f;
+ m_features = f;
}
void savetxt(ofstream& outFile);
diff --git a/mert/FeatureData.cpp b/mert/FeatureData.cpp
index 081f7ab32..61413a3f5 100644
--- a/mert/FeatureData.cpp
+++ b/mert/FeatureData.cpp
@@ -17,12 +17,12 @@ static const float MIN_FLOAT=-1.0*numeric_limits<float>::max();
static const float MAX_FLOAT=numeric_limits<float>::max();
FeatureData::FeatureData()
- : number_of_features(0),
- _sparse_flag(false) {}
+ : m_num_features(0),
+ m_sparse_flag(false) {}
void FeatureData::save(std::ofstream& outFile, bool bin)
{
- for (featdata_t::iterator i = array_.begin(); i !=array_.end(); i++)
+ for (featdata_t::iterator i = m_array.begin(); i != m_array.end(); i++)
i->save(outFile, bin);
}
@@ -59,7 +59,7 @@ void FeatureData::load(ifstream& inFile)
setFeatureMap(entry.Features());
if (entry.hasSparseFeatures())
- _sparse_flag = true;
+ m_sparse_flag = true;
add(entry);
}
@@ -86,9 +86,9 @@ void FeatureData::add(FeatureArray& e)
if (exists(e.getIndex())) { // array at position e.getIndex() already exists
//enlarge array at position e.getIndex()
size_t pos = getIndex(e.getIndex());
- array_.at(pos).merge(e);
+ m_array.at(pos).merge(e);
} else {
- array_.push_back(e);
+ m_array.push_back(e);
setIndex();
}
}
@@ -99,12 +99,12 @@ void FeatureData::add(FeatureStats& e, const std::string& sent_idx)
//enlarge array at position e.getIndex()
size_t pos = getIndex(sent_idx);
// TRACE_ERR("Inserting " << e << " in array " << sent_idx << std::endl);
- array_.at(pos).add(e);
+ m_array.at(pos).add(e);
} else {
// TRACE_ERR("Creating a new entry in the array and inserting " << e << std::endl);
FeatureArray a;
- a.NumberOfFeatures(number_of_features);
- a.Features(features);
+ a.NumberOfFeatures(m_num_features);
+ a.Features(m_features);
a.setIndex(sent_idx);
a.add(e);
add(a);
@@ -113,10 +113,10 @@ void FeatureData::add(FeatureStats& e, const std::string& sent_idx)
bool FeatureData::check_consistency() const
{
- if (array_.size() == 0)
+ if (m_array.size() == 0)
return true;
- for (featdata_t::const_iterator i = array_.begin(); i != array_.end(); i++)
+ for (featdata_t::const_iterator i = m_array.begin(); i != m_array.end(); i++)
if (!i->check_consistency()) return false;
return true;
@@ -125,26 +125,26 @@ bool FeatureData::check_consistency() const
void FeatureData::setIndex()
{
size_t j=0;
- for (featdata_t::iterator i = array_.begin(); i !=array_.end(); i++) {
- idx2arrayname_[j]=(*i).getIndex();
- arrayname2idx_[(*i).getIndex()] = j;
+ for (featdata_t::iterator i = m_array.begin(); i !=m_array.end(); i++) {
+ m_index_to_array_name[j]=(*i).getIndex();
+ m_array_name_to_index[(*i).getIndex()] = j;
j++;
}
}
void FeatureData::setFeatureMap(const std::string& feat)
{
- number_of_features = 0;
- features = feat;
+ m_num_features = 0;
+ m_features = feat;
vector<string> buf;
Tokenize(feat.c_str(), ' ', &buf);
for (vector<string>::const_iterator it = buf.begin();
it != buf.end(); ++it) {
- const size_t size = idx2featname_.size();
- featname2idx_[*it] = size;
- idx2featname_[size] = *it;
- ++number_of_features;
+ const size_t size = m_index_to_feature_name.size();
+ m_feature_name_to_index[*it] = size;
+ m_index_to_feature_name[size] = *it;
+ ++m_num_features;
}
}
@@ -152,26 +152,23 @@ string FeatureData::ToString() const {
string res;
char buf[100];
- snprintf(buf, sizeof(buf), "number of features: %lu, ", number_of_features);
+ snprintf(buf, sizeof(buf), "number of features: %lu, ", m_num_features);
res.append(buf);
- snprintf(buf, sizeof(buf), "features: ");
- res.append(buf);
- res.append(features);
+ res.append("features: ");
+ res.append(m_features);
- snprintf(buf, sizeof(buf), ", sparse flag: %s, ", (_sparse_flag) ? "yes" : "no");
+ snprintf(buf, sizeof(buf), ", sparse flag: %s, ", (m_sparse_flag) ? "yes" : "no");
res.append(buf);
- snprintf(buf, sizeof(buf), "feature_id_map = { ");
- res.append(buf);
- for (map<string, size_t>::const_iterator it = featname2idx_.begin();
- it != featname2idx_.end(); ++it) {
+ res.append("feature_id_map = { ");
+ for (map<string, size_t>::const_iterator it = m_feature_name_to_index.begin();
+ it != m_feature_name_to_index.end(); ++it) {
snprintf(buf, sizeof(buf), "%s => %lu, ",
it->first.c_str(), it->second);
res.append(buf);
}
- snprintf(buf, sizeof(buf), "}");
- res.append(buf);
+ res.append("}");
return res;
}
diff --git a/mert/FeatureData.h b/mert/FeatureData.h
index 09fb8e9be..171f880af 100644
--- a/mert/FeatureData.h
+++ b/mert/FeatureData.h
@@ -19,37 +19,37 @@ using namespace std;
class FeatureData
{
private:
- size_t number_of_features;
- std::string features;
- bool _sparse_flag;
-
- map<std::string, size_t> featname2idx_; // map from name to index of features
- map<size_t, std::string> idx2featname_; // map from index to name of features
-
-protected:
- featdata_t array_;
- idx2name idx2arrayname_; // map from index to name of array
- name2idx arrayname2idx_; // map from name to index of array
+ size_t m_num_features;
+ std::string m_features;
+ bool m_sparse_flag;
+ map<std::string, size_t> m_feature_name_to_index; // map from name to index of features
+ map<size_t, std::string> m_index_to_feature_name; // map from index to name of features
+ featdata_t m_array;
+ idx2name m_index_to_array_name; // map from index to name of array
+ name2idx m_array_name_to_index; // map from name to index of array
public:
FeatureData();
~FeatureData() {}
inline void clear() {
- array_.clear();
+ m_array.clear();
}
inline bool hasSparseFeatures() const {
- return _sparse_flag;
+ return m_sparse_flag;
}
+
inline FeatureArray get(const std::string& idx) {
- return array_.at(getIndex(idx));
+ return m_array.at(getIndex(idx));
}
+
inline FeatureArray& get(size_t idx) {
- return array_.at(idx);
+ return m_array.at(idx);
}
+
inline const FeatureArray& get(size_t idx) const {
- return array_.at(idx);
+ return m_array.at(idx);
}
inline bool exists(const std::string& sent_idx) const {
@@ -57,33 +57,38 @@ public:
}
inline bool exists(int sent_idx) const {
- return (sent_idx > -1 && sent_idx < static_cast<int>(array_.size())) ? true : false;
+ return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
}
inline FeatureStats& get(size_t i, size_t j) {
- return array_.at(i).get(j);
+ return m_array.at(i).get(j);
}
+
inline const FeatureStats& get(size_t i, size_t j) const {
- return array_.at(i).get(j);
+ return m_array.at(i).get(j);
}
void add(FeatureArray& e);
void add(FeatureStats& e, const std::string& sent_idx);
inline size_t size() const {
- return array_.size();
+ return m_array.size();
}
+
inline size_t NumberOfFeatures() const {
- return number_of_features;
+ return m_num_features;
}
+
inline void NumberOfFeatures(size_t v) {
- number_of_features = v;
+ m_num_features = v;
}
+
inline std::string Features() const {
- return features;
+ return m_features;
}
+
inline void Features(const std::string& f) {
- features = f;
+ m_features = f;
}
void save(const std::string &file, bool bin=false);
@@ -99,29 +104,29 @@ public:
void setIndex();
inline int getIndex(const std::string& idx) const {
- name2idx::const_iterator i = arrayname2idx_.find(idx);
- if (i != arrayname2idx_.end())
+ name2idx::const_iterator i = m_array_name_to_index.find(idx);
+ if (i != m_array_name_to_index.end())
return i->second;
else
return -1;
}
inline std::string getIndex(size_t idx) const {
- idx2name::const_iterator i = idx2arrayname_.find(idx);
- if (i != idx2arrayname_.end())
+ idx2name::const_iterator i = m_index_to_array_name.find(idx);
+ if (i != m_index_to_array_name.end())
throw runtime_error("there is no entry at index " + idx);
return i->second;
}
bool existsFeatureNames() const {
- return (idx2featname_.size() > 0) ? true : false;
+ return (m_index_to_feature_name.size() > 0) ? true : false;
}
std::string getFeatureName(size_t idx) const {
- if (idx >= idx2featname_.size())
+ if (idx >= m_index_to_feature_name.size())
throw runtime_error("Error: you required an too big index");
- map<size_t, std::string>::const_iterator it = idx2featname_.find(idx);
- if (it == idx2featname_.end()) {
+ map<size_t, std::string>::const_iterator it = m_index_to_feature_name.find(idx);
+ if (it == m_index_to_feature_name.end()) {
throw runtime_error("Error: specified id is unknown: " + idx);
} else {
return it->second;
@@ -129,8 +134,8 @@ public:
}
size_t getFeatureIndex(const std::string& name) const {
- map<std::string, size_t>::const_iterator it = featname2idx_.find(name);
- if (it == featname2idx_.end())
+ map<std::string, size_t>::const_iterator it = m_feature_name_to_index.find(name);
+ if (it == m_feature_name_to_index.end())
throw runtime_error("Error: feature " + name + " is unknown");
return it->second;
}
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 73d7ec13a..eb8d96b3d 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -15,58 +15,58 @@ namespace {
const int kAvailableSize = 8;
} // namespace
-SparseVector::name2id_t SparseVector::name2id_;
-SparseVector::id2name_t SparseVector::id2name_;
+SparseVector::name2id_t SparseVector::m_name_to_id;
+SparseVector::id2name_t SparseVector::m_id_to_name;
FeatureStatsType SparseVector::get(const string& name) const {
- name2id_t::const_iterator name2id_iter = name2id_.find(name);
- if (name2id_iter == name2id_.end()) return 0;
+ name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
+ if (name2id_iter == m_name_to_id.end()) return 0;
size_t id = name2id_iter->second;
return get(id);
}
FeatureStatsType SparseVector::get(size_t id) const {
- fvector_t::const_iterator fvector_iter = fvector_.find(id);
- if (fvector_iter == fvector_.end()) return 0;
+ fvector_t::const_iterator fvector_iter = m_fvector.find(id);
+ if (fvector_iter == m_fvector.end()) return 0;
return fvector_iter->second;
}
void SparseVector::set(const string& name, FeatureStatsType value) {
- name2id_t::const_iterator name2id_iter = name2id_.find(name);
+ name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
size_t id = 0;
- if (name2id_iter == name2id_.end()) {
- id = id2name_.size();
- id2name_.push_back(name);
- name2id_[name] = id;
+ if (name2id_iter == m_name_to_id.end()) {
+ id = m_id_to_name.size();
+ m_id_to_name.push_back(name);
+ m_name_to_id[name] = id;
} else {
id = name2id_iter->second;
}
- fvector_[id] = value;
+ m_fvector[id] = value;
}
void SparseVector::write(ostream& out, const string& sep) const {
- for (fvector_t::const_iterator i = fvector_.begin(); i != fvector_.end(); ++i) {
+ for (fvector_t::const_iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
if (abs(i->second) < 0.00001) continue;
- string name = id2name_[i->first];
+ string name = m_id_to_name[i->first];
out << name << sep << i->second << " ";
}
}
void SparseVector::clear() {
- fvector_.clear();
+ m_fvector.clear();
}
SparseVector& SparseVector::operator-=(const SparseVector& rhs) {
//All the elements that have values in *this
- for (fvector_t::iterator i = fvector_.begin(); i != fvector_.end(); ++i) {
- fvector_[i->first] = i->second - rhs.get(i->first);
+ for (fvector_t::iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
+ m_fvector[i->first] = i->second - rhs.get(i->first);
}
//Any elements in rhs, that have no value in *this
- for (fvector_t::const_iterator i = rhs.fvector_.begin();
- i != rhs.fvector_.end(); ++i) {
- if (fvector_.find(i->first) == fvector_.end()) {
- fvector_[i->first] = -(i->second);
+ for (fvector_t::const_iterator i = rhs.m_fvector.begin();
+ i != rhs.m_fvector.end(); ++i) {
+ if (m_fvector.find(i->first) == m_fvector.end()) {
+ m_fvector[i->first] = -(i->second);
}
}
return *this;
@@ -79,37 +79,37 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs) {
}
FeatureStats::FeatureStats()
- : available_(kAvailableSize), entries_(0),
- array_(new FeatureStatsType[available_]) {}
+ : m_available_size(kAvailableSize), m_entries(0),
+ m_array(new FeatureStatsType[m_available_size]) {}
FeatureStats::FeatureStats(const size_t size)
- : available_(size), entries_(size),
- array_(new FeatureStatsType[available_])
+ : m_available_size(size), m_entries(size),
+ m_array(new FeatureStatsType[m_available_size])
{
- memset(array_, 0, GetArraySizeWithBytes());
+ memset(m_array, 0, GetArraySizeWithBytes());
}
FeatureStats::FeatureStats(std::string &theString)
- : available_(0), entries_(0), array_(NULL)
+ : m_available_size(0), m_entries(0), m_array(NULL)
{
set(theString);
}
FeatureStats::~FeatureStats()
{
- if (array_) {
- delete [] array_;
- array_ = NULL;
+ if (m_array) {
+ delete [] m_array;
+ m_array = NULL;
}
}
void FeatureStats::Copy(const FeatureStats &stats)
{
- available_ = stats.available();
- entries_ = stats.size();
- array_ = new FeatureStatsType[available_];
- memcpy(array_, stats.getArray(), GetArraySizeWithBytes());
- map_ = stats.getSparse();
+ m_available_size = stats.available();
+ m_entries = stats.size();
+ m_array = new FeatureStatsType[m_available_size];
+ memcpy(m_array, stats.getArray(), GetArraySizeWithBytes());
+ m_map = stats.getSparse();
}
FeatureStats::FeatureStats(const FeatureStats &stats)
@@ -119,29 +119,29 @@ FeatureStats::FeatureStats(const FeatureStats &stats)
FeatureStats& FeatureStats::operator=(const FeatureStats &stats)
{
- delete [] array_;
+ delete [] m_array;
Copy(stats);
return *this;
}
void FeatureStats::expand()
{
- available_ *= 2;
- featstats_t t_ = new FeatureStatsType[available_];
- memcpy(t_, array_, GetArraySizeWithBytes());
- delete [] array_;
- array_ = t_;
+ m_available_size *= 2;
+ featstats_t t_ = new FeatureStatsType[m_available_size];
+ memcpy(t_, m_array, GetArraySizeWithBytes());
+ delete [] m_array;
+ m_array = t_;
}
void FeatureStats::add(FeatureStatsType v)
{
if (isfull()) expand();
- array_[entries_++]=v;
+ m_array[m_entries++]=v;
}
void FeatureStats::addSparse(const string& name, FeatureStatsType v)
{
- map_.set(name,v);
+ m_map.set(name,v);
}
void FeatureStats::set(std::string &theString)
@@ -166,7 +166,7 @@ void FeatureStats::set(std::string &theString)
void FeatureStats::loadbin(std::ifstream& inFile)
{
- inFile.read((char*) array_, GetArraySizeWithBytes());
+ inFile.read((char*) m_array, GetArraySizeWithBytes());
}
void FeatureStats::loadtxt(std::ifstream& inFile)
@@ -204,7 +204,7 @@ void FeatureStats::savetxt(std::ofstream& outFile)
void FeatureStats::savebin(std::ofstream& outFile)
{
- outFile.write((char*) array_, GetArraySizeWithBytes());
+ outFile.write((char*) m_array, GetArraySizeWithBytes());
}
ostream& operator<<(ostream& o, const FeatureStats& e)
@@ -230,7 +230,7 @@ bool operator==(const FeatureStats& f1, const FeatureStats& f2) {
if (f1.get(k) != f2.get(k))
return false;
}
-
+
return true;
}
//END_ADDED
diff --git a/mert/FeatureStats.h b/mert/FeatureStats.h
index f29862c00..b71a99fe1 100644
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@@ -31,7 +31,7 @@ public:
void set(const std::string& name, FeatureStatsType value);
void clear();
size_t size() const {
- return fvector_.size();
+ return m_fvector.size();
}
void write(std::ostream& out, const std::string& sep = " ") const;
@@ -39,9 +39,9 @@ public:
SparseVector& operator-=(const SparseVector& rhs);
private:
- static name2id_t name2id_;
- static id2name_t id2name_;
- fvector_t fvector_;
+ static name2id_t m_name_to_id;
+ static id2name_t m_id_to_name;
+ fvector_t m_fvector;
};
SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
@@ -49,12 +49,12 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
class FeatureStats
{
private:
- size_t available_;
- size_t entries_;
+ size_t m_available_size;
+ size_t m_entries;
// TODO: Use smart pointer for exceptional-safety.
- featstats_t array_;
- SparseVector map_;
+ featstats_t m_array;
+ SparseVector m_map;
public:
FeatureStats();
@@ -70,33 +70,33 @@ public:
void Copy(const FeatureStats &stats);
bool isfull() const {
- return (entries_ < available_) ? 0 : 1;
+ return (m_entries < m_available_size) ? 0 : 1;
}
void expand();
void add(FeatureStatsType v);
void addSparse(const string& name, FeatureStatsType v);
void clear() {
- memset((void*)array_, 0, GetArraySizeWithBytes());
- map_.clear();
+ memset((void*)m_array, 0, GetArraySizeWithBytes());
+ m_map.clear();
}
void reset() {
- entries_ = 0;
+ m_entries = 0;
clear();
}
inline FeatureStatsType get(size_t i) {
- return array_[i];
+ return m_array[i];
}
inline FeatureStatsType get(size_t i)const {
- return array_[i];
+ return m_array[i];
}
inline featstats_t getArray() const {
- return array_;
+ return m_array;
}
inline const SparseVector& getSparse() const {
- return map_;
+ return m_map;
}
void set(std::string &theString);
@@ -106,15 +106,15 @@ public:
}
size_t GetArraySizeWithBytes() const {
- return entries_ * sizeof(FeatureStatsType);
+ return m_entries * sizeof(FeatureStatsType);
}
inline size_t size() const {
- return entries_;
+ return m_entries;
}
inline size_t available() const {
- return available_;
+ return m_available_size;
}
void savetxt(const std::string &file);
diff --git a/mert/FileStream.cpp b/mert/FileStream.cpp
index 11fd58e26..93b7138b2 100644
--- a/mert/FileStream.cpp
+++ b/mert/FileStream.cpp
@@ -13,11 +13,11 @@ bool IsGzipFile(const std::string &filename) {
} // namespace
inputfilestream::inputfilestream(const std::string &filePath)
- : std::istream(0), m_streambuf(0), is_good(false)
+ : std::istream(0), m_streambuf(0), m_is_good(false)
{
// check if file is readable
std::filebuf* fb = new std::filebuf();
- is_good = (fb->open(filePath.c_str(), std::ios::in) != NULL);
+ m_is_good = (fb->open(filePath.c_str(), std::ios::in) != NULL);
if (IsGzipFile(filePath)) {
fb->close();
@@ -40,11 +40,11 @@ void inputfilestream::close()
}
outputfilestream::outputfilestream(const std::string &filePath)
- : std::ostream(0), m_streambuf(0), is_good(false)
+ : std::ostream(0), m_streambuf(0), m_is_good(false)
{
// check if file is readable
std::filebuf* fb = new std::filebuf();
- is_good = (fb->open(filePath.c_str(), std::ios::out) != NULL);
+ m_is_good = (fb->open(filePath.c_str(), std::ios::out) != NULL);
if (IsGzipFile(filePath)) {
throw runtime_error("Output to a zipped file not supported!");
diff --git a/mert/FileStream.h b/mert/FileStream.h
index 78b6ccb88..671e996ca 100644
--- a/mert/FileStream.h
+++ b/mert/FileStream.h
@@ -9,13 +9,13 @@ class inputfilestream : public std::istream
{
protected:
std::streambuf *m_streambuf;
- bool is_good;
+ bool m_is_good;
public:
explicit inputfilestream(const std::string &filePath);
virtual ~inputfilestream();
- bool good() const { return is_good; }
+ bool good() const { return m_is_good; }
void close();
};
@@ -23,13 +23,13 @@ class outputfilestream : public std::ostream
{
protected:
std::streambuf *m_streambuf;
- bool is_good;
+ bool m_is_good;
public:
explicit outputfilestream(const std::string &filePath);
virtual ~outputfilestream();
- bool good() const { return is_good; }
+ bool good() const { return m_is_good; }
void close();
};
diff --git a/mert/InterpolatedScorer.cpp b/mert/InterpolatedScorer.cpp
index 1951e4234..fced4b4fe 100644
--- a/mert/InterpolatedScorer.cpp
+++ b/mert/InterpolatedScorer.cpp
@@ -1,35 +1,36 @@
-#include "ScorerFactory.h"
#include "InterpolatedScorer.h"
+#include "ScorerFactory.h"
#include "Util.h"
using namespace std;
-
-InterpolatedScorer::InterpolatedScorer (const string& name, const string& config): Scorer(name,config)
+// TODO: This is too long. Consider creating a function for
+// initialization such as Init().
+InterpolatedScorer::InterpolatedScorer(const string& name, const string& config)
+ : Scorer(name,config)
{
-
// name would be: HAMMING,BLEU or similar
string scorers = name;
while (scorers.length() > 0) {
string scorertype = "";
- getNextPound(scorers,scorertype,",");
- Scorer *theScorer=ScorerFactory::getScorer(scorertype,config);
- _scorers.push_back(theScorer);
+ getNextPound(scorers, scorertype,",");
+ Scorer *scorer = ScorerFactory::getScorer(scorertype,config);
+ m_scorers.push_back(scorer);
}
- if (_scorers.size() == 0) {
+ if (m_scorers.size() == 0) {
throw runtime_error("There are no scorers");
}
- cerr << "Number of scorers: " << _scorers.size() << endl;
+ cerr << "Number of scorers: " << m_scorers.size() << endl;
//TODO debug this
string wtype = getConfig("weights","");
//Default weights set to uniform ie. if two weights 0.5 each
//weights should add to 1
if (wtype.length() == 0) {
- float weight = 1.0/_scorers.size() ;
+ float weight = 1.0 / m_scorers.size() ;
//cout << " Default weights:" << weight << endl;
- for (size_t i = 0; i < _scorers.size(); i ++) {
- _scorerWeights.push_back(weight);
+ for (size_t i = 0; i < m_scorers.size(); i ++) {
+ m_scorer_weights.push_back(weight);
}
} else {
float tot=0;
@@ -38,24 +39,24 @@ InterpolatedScorer::InterpolatedScorer (const string& name, const string& config
string scoreweight = "";
getNextPound(wtype,scoreweight,"+");
float weight = atof(scoreweight.c_str());
- _scorerWeights.push_back(weight);
+ m_scorer_weights.push_back(weight);
tot += weight;
//cout << " :" << weight ;
}
//cout << endl;
- if (tot != float(1)) {
- for (vector<float>::iterator it = _scorerWeights.begin(); it != _scorerWeights.end(); ++it)
- {
+ if (tot != float(1)) { // TODO: fix this checking in terms of readability.
+ for (vector<float>::iterator it = m_scorer_weights.begin();
+ it != m_scorer_weights.end(); ++it) {
*it /= tot;
}
}
- if (_scorers.size() != _scorerWeights.size()) {
+ if (m_scorers.size() != m_scorer_weights.size()) {
throw runtime_error("The number of weights does not equal the number of scorers!");
}
}
cerr << "The weights for the interpolated scorers are: " << endl;
- for (vector<float>::iterator it = _scorerWeights.begin(); it < _scorerWeights.end(); it++) {
+ for (vector<float>::iterator it = m_scorer_weights.begin(); it < m_scorer_weights.end(); it++) {
cerr << *it << " " ;
}
cerr <<endl;
@@ -65,7 +66,8 @@ void InterpolatedScorer::setScoreData(ScoreData* data)
{
size_t last = 0;
m_score_data = data;
- for (ScopedVector<Scorer>::iterator itsc = _scorers.begin(); itsc!=_scorers.end(); itsc++) {
+ for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
+ itsc != m_scorers.end(); ++itsc) {
int numScoresScorer = (*itsc)->NumberOfScores();
ScoreData* newData =new ScoreData(**itsc);
for (size_t i = 0; i < data->size(); i++) {
@@ -110,14 +112,16 @@ void InterpolatedScorer::score(const candidates_t& candidates, const diffs_t& di
{
//cout << "*******InterpolatedScorer::score" << endl;
size_t scorerNum = 0;
- for (ScopedVector<Scorer>::const_iterator itsc = _scorers.begin(); itsc!=_scorers.end(); itsc++) {
+ for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
+ itsc != m_scorers.end(); ++itsc) {
//int numScores = (*itsc)->NumberOfScores();
statscores_t tscores;
(*itsc)->score(candidates,diffs,tscores);
size_t inc = 0;
- for (statscores_t::iterator itstatsc = tscores.begin(); itstatsc!=tscores.end(); itstatsc++) {
+ for (statscores_t::iterator itstatsc = tscores.begin();
+ itstatsc != tscores.end(); ++itstatsc) {
//cout << "Scores " << (*itstatsc) << endl;
- float weight = _scorerWeights[scorerNum];
+ float weight = m_scorer_weights[scorerNum];
if (weight == 0) {
stringstream msg;
msg << "No weights for scorer" << scorerNum ;
@@ -139,7 +143,8 @@ void InterpolatedScorer::score(const candidates_t& candidates, const diffs_t& di
void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
{
- for (ScopedVector<Scorer>::iterator itsc = _scorers.begin(); itsc!=_scorers.end(); itsc++) {
+ for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
+ itsc != m_scorers.end(); ++itsc) {
(*itsc)->setReferenceFiles(referenceFiles);
}
}
@@ -147,8 +152,9 @@ void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
void InterpolatedScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
stringstream buff;
- int i=0;
- for (ScopedVector<Scorer>::iterator itsc = _scorers.begin(); itsc!=_scorers.end(); itsc++) {
+ int i = 0;
+ for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
+ itsc != m_scorers.end(); ++itsc) {
ScoreStats tempEntry;
(*itsc)->prepareStats(sid, text, tempEntry);
if (i > 0) buff << " ";
@@ -167,16 +173,10 @@ void InterpolatedScorer::setFactors(const string& factors)
vector<string> fsplit;
split(factors, ',', fsplit);
- if (fsplit.size() != _scorers.size()) throw runtime_error("Number of factor specifications does not equal number of interpolated scorers.");
-
- for (size_t i = 0; i < _scorers.size(); ++i)
- {
- _scorers[i]->setFactors(fsplit[i]);
+ if (fsplit.size() != m_scorers.size())
+ throw runtime_error("Number of factor specifications does not equal number of interpolated scorers.");
+
+ for (size_t i = 0; i < m_scorers.size(); ++i) {
+ m_scorers[i]->setFactors(fsplit[i]);
}
}
-
-
-
-
-
-
diff --git a/mert/InterpolatedScorer.h b/mert/InterpolatedScorer.h
index 2a538bc39..5f76be538 100644
--- a/mert/InterpolatedScorer.h
+++ b/mert/InterpolatedScorer.h
@@ -1,14 +1,6 @@
-#ifndef __INTERPOLATED_SCORER_H__
-#define __INTERPOLATED_SCORER_H__
-
-#include <algorithm>
-#include <cmath>
-#include <iostream>
-#include <iterator>
-#include <limits>
-#include <set>
-#include <sstream>
-#include <stdexcept>
+#ifndef MERT_INTERPOLATED_SCORER_H_
+#define MERT_INTERPOLATED_SCORER_H_
+
#include <string>
#include <vector>
#include "Types.h"
@@ -33,12 +25,13 @@ public:
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
virtual size_t NumberOfScores() const {
- size_t sz=0;
- for (ScopedVector<Scorer>::const_iterator itsc = _scorers.begin(); itsc != _scorers.end(); itsc++) {
+ size_t sz = 0;
+ for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
+ itsc != m_scorers.end(); ++itsc) {
sz += (*itsc)->NumberOfScores();
}
return sz;
- };
+ }
virtual void setScoreData(ScoreData* data);
@@ -48,13 +41,13 @@ public:
virtual void setFactors(const string& factors);
protected:
- ScopedVector<Scorer> _scorers;
+ ScopedVector<Scorer> m_scorers;
// Take the ownership of the heap-allocated the objects
// by Scorer objects.
ScopedVector<ScoreData> m_scorers_score_data;
- vector<float> _scorerWeights;
+ vector<float> m_scorer_weights;
};
-#endif //__INTERPOLATED_SCORER_H
+#endif // MERT_INTERPOLATED_SCORER_H_
diff --git a/mert/MergeScorer.cpp b/mert/MergeScorer.cpp
index 1dbae600f..7a80f1477 100644
--- a/mert/MergeScorer.cpp
+++ b/mert/MergeScorer.cpp
@@ -14,7 +14,8 @@
using namespace TERCpp;
MergeScorer::MergeScorer(const string& config)
- : StatisticsBasedScorer("MERGE",config), kLENGTH(4) {}
+ : StatisticsBasedScorer("MERGE", config) {}
+
MergeScorer::~MergeScorer() {}
void MergeScorer::setReferenceFiles(const vector<string>& referenceFiles)
diff --git a/mert/MergeScorer.h b/mert/MergeScorer.h
index 579703412..2d7030421 100644
--- a/mert/MergeScorer.h
+++ b/mert/MergeScorer.h
@@ -13,6 +13,8 @@ using namespace std;
class PerScorer;
class ScoreStats;
+const int kMergeScorerLength = 4;
+
/**
* Merge scoring.
*/
@@ -23,23 +25,13 @@ public:
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
-
- virtual size_t NumberOfScores() const
- {
- return 0;
- }
-
- void whoami() const {
- cerr << "I AM MergeScorer" << endl;
- }
+ virtual size_t NumberOfScores() const { return 0; }
protected:
friend class PerScorer;
virtual float calculateScore(const vector<int>& comps) const;
private:
- const int kLENGTH;
-
// no copying allowed
MergeScorer(const MergeScorer&);
MergeScorer& operator=(const MergeScorer&);
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index 0e2ce9312..ca5af3736 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -32,36 +32,25 @@ inline float intersect(float m1, float b1, float m2, float b2)
} // namespace
-
-void Optimizer::SetScorer(Scorer *_scorer)
-{
- scorer = _scorer;
-}
-
-void Optimizer::SetFData(FeatureDataHandle _FData)
-{
- FData = _FData;
-}
-
Optimizer::Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
- : scorer(NULL), FData(), number_of_random_directions(nrandom)
+ : m_scorer(NULL), m_feature_data(), m_num_random_directions(nrandom)
{
- // Warning: the init vector is a full set of parameters, of dimension pdim!
- Point::pdim = Pd;
+ // Warning: the init vector is a full set of parameters, of dimension m_pdim!
+ Point::m_pdim = Pd;
CHECK(start.size() == Pd);
- Point::dim = i2O.size();
- Point::optindices = i2O;
- if (Point::pdim > Point::dim) {
- for (unsigned int i = 0; i < Point::pdim; i++) {
+ Point::m_dim = i2O.size();
+ Point::m_opt_indices = i2O;
+ if (Point::m_pdim > Point::m_dim) {
+ for (unsigned int i = 0; i < Point::m_pdim; i++) {
unsigned int j = 0;
- while (j < Point::dim && i != i2O[j])
+ while (j < Point::m_dim && i != i2O[j])
j++;
- // The index i wasnt found on optindices, it is a fixed index,
+ // The index i wasnt found on m_opt_indices, it is a fixed index,
// we use the value of the start vector.
- if (j == Point::dim)
- Point::fixedweights[i] = start[i];
+ if (j == Point::m_dim)
+ Point::m_fixed_weights[i] = start[i];
}
}
}
@@ -112,12 +101,12 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
//cerr << "Sentence " << S << endl;
multimap<float, unsigned> gradient;
vector<float> f0;
- f0.resize(FData->get(S).size());
- for (unsigned j = 0; j < FData->get(S).size(); j++) {
+ f0.resize(m_feature_data->get(S).size());
+ for (unsigned j = 0; j < m_feature_data->get(S).size(); j++) {
// gradient of the feature function for this particular target sentence
- gradient.insert(pair<float, unsigned>(direction * (FData->get(S,j)), j));
+ gradient.insert(pair<float, unsigned>(direction * (m_feature_data->get(S,j)), j));
// compute the feature function at the origin point
- f0[j] = origin * FData->get(S, j);
+ f0[j] = origin * m_feature_data->get(S, j);
}
// Now let's compute the 1best for each value of x.
@@ -308,7 +297,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
{
- CHECK(FData);
+ CHECK(m_feature_data);
bests.clear();
bests.resize(size());
@@ -316,8 +305,8 @@ void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
float bestfs = MIN_FLOAT;
unsigned idx = 0;
unsigned j;
- for (j = 0; j < FData->get(i).size(); j++) {
- float curfs = P * FData->get(i, j);
+ for (j = 0; j < m_feature_data->get(i).size(); j++) {
+ float curfs = P * m_feature_data->get(i, j);
if (curfs > bestfs) {
bestfs = curfs;
idx = j;
@@ -330,15 +319,15 @@ void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
statscore_t Optimizer::Run(Point& P) const
{
- if (!FData) {
+ if (!m_feature_data) {
cerr << "error trying to optimize without Features loaded" << endl;
exit(2);
}
- if (!scorer) {
+ if (!m_scorer) {
cerr << "error trying to optimize without a Scorer loaded" << endl;
exit(2);
}
- if (scorer->getReferenceSize() != FData->size()) {
+ if (m_scorer->getReferenceSize() != m_feature_data->size()) {
cerr << "error length mismatch between feature file and score file" << endl;
exit(2);
}
@@ -361,11 +350,11 @@ statscore_t Optimizer::Run(Point& P) const
vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst, vector<vector <pair<unsigned,unsigned> > > thediffs) const
{
- CHECK(scorer);
+ CHECK(m_scorer);
vector<statscore_t> theres;
- scorer->score(thefirst, thediffs, theres);
+ m_scorer->score(thefirst, thediffs, theres);
return theres;
}
@@ -392,7 +381,7 @@ statscore_t SimpleOptimizer::TrueRun(Point& P) const
Point linebest;
- for (unsigned int d = 0; d < Point::getdim()+number_of_random_directions; d++) {
+ for (unsigned int d = 0; d < Point::getdim() + m_num_random_directions; d++) {
if (verboselevel() > 4) {
// cerr<<"minimizing along direction "<<d<<endl;
cerr << "starting point: " << P << " => " << prevscore << endl;
@@ -440,7 +429,7 @@ statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const
// do specified number of random direction optimizations
unsigned int nrun = 0;
unsigned int nrun_no_change = 0;
- for (; nrun_no_change < number_of_random_directions; nrun++, nrun_no_change++)
+ for (; nrun_no_change < m_num_random_directions; nrun++, nrun_no_change++)
{
// choose a random direction in which to optimize
Point direction;
@@ -476,32 +465,32 @@ statscore_t RandomOptimizer::TrueRun(Point& P) const
//--------------------------------------
-vector<string> OptimizerFactory::typenames;
+vector<string> OptimizerFactory::m_type_names;
void OptimizerFactory::SetTypeNames()
{
- if (typenames.empty()) {
- typenames.resize(NOPTIMIZER);
- typenames[POWELL]="powell";
- typenames[RANDOM_DIRECTION]="random-direction";
- typenames[RANDOM]="random";
+ if (m_type_names.empty()) {
+ m_type_names.resize(NOPTIMIZER);
+ m_type_names[POWELL]="powell";
+ m_type_names[RANDOM_DIRECTION]="random-direction";
+ m_type_names[RANDOM]="random";
// Add new type there
}
}
vector<string> OptimizerFactory::GetTypeNames()
{
- if (typenames.empty())
+ if (m_type_names.empty())
SetTypeNames();
- return typenames;
+ return m_type_names;
}
OptimizerFactory::OptType OptimizerFactory::GetOType(const string& type)
{
unsigned int thetype;
- if (typenames.empty())
+ if (m_type_names.empty())
SetTypeNames();
- for (thetype = 0; thetype < typenames.size(); thetype++)
- if (typenames[thetype] == type)
+ for (thetype = 0; thetype < m_type_names.size(); thetype++)
+ if (m_type_names[thetype] == type)
break;
return((OptType)thetype);
}
@@ -513,8 +502,8 @@ Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim, vector<unsigned> i2o,
cerr << "Error: unknown Optimizer type " << type << endl;
cerr << "Known Algorithm are:" << endl;
unsigned int thetype;
- for (thetype = 0; thetype < typenames.size(); thetype++)
- cerr << typenames[thetype] << endl;
+ for (thetype = 0; thetype < m_type_names.size(); thetype++)
+ cerr << m_type_names[thetype] << endl;
throw ("unknown Optimizer Type");
}
diff --git a/mert/Optimizer.h b/mert/Optimizer.h
index 4a964665d..519e9ecbc 100644
--- a/mert/Optimizer.h
+++ b/mert/Optimizer.h
@@ -20,18 +20,19 @@ class Point;
class Optimizer
{
protected:
- Scorer *scorer; // no accessor for them only child can use them
- FeatureDataHandle FData; // no accessor for them only child can use them
- unsigned int number_of_random_directions;
+ Scorer *m_scorer; // no accessor for them only child can use them
+ FeatureDataHandle m_feature_data; // no accessor for them only child can use them
+ unsigned int m_num_random_directions;
public:
Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom);
- void SetScorer(Scorer *_scorer);
- void SetFData(FeatureDataHandle _FData);
+
+ void SetScorer(Scorer *scorer) { m_scorer = scorer; }
+ void SetFeatureData(FeatureDataHandle feature_data) { m_feature_data = feature_data; }
virtual ~Optimizer();
unsigned size() const {
- return FData ? FData->size() : 0;
+ return m_feature_data ? m_feature_data->size() : 0;
}
/**
@@ -53,7 +54,7 @@ public:
* Given a set of nbests, get the Statistical score.
*/
statscore_t GetStatScore(const vector<unsigned>& nbests) const {
- return scorer->score(nbests);
+ return m_scorer->score(nbests);
}
statscore_t GetStatScore(const Point& param) const;
@@ -129,7 +130,7 @@ private:
// Setup optimization types.
static void SetTypeNames();
- static vector<string> typenames;
+ static vector<string> m_type_names;
};
#endif // OPTIMIZER_H
diff --git a/mert/PerScorer.h b/mert/PerScorer.h
index f42974357..d32e14029 100644
--- a/mert/PerScorer.h
+++ b/mert/PerScorer.h
@@ -1,9 +1,7 @@
#ifndef MERT_PER_SCORER_H_
#define MERT_PER_SCORER_H_
-#include <iostream>
#include <set>
-#include <sstream>
#include <string>
#include <vector>
#include "Types.h"
@@ -27,18 +25,9 @@ public:
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
-
- virtual size_t NumberOfScores() const {
- // cerr << "PerScorer: 3" << endl;
- return 3;
- }
-
+ virtual size_t NumberOfScores() const { return 3; }
virtual float calculateScore(const vector<int>& comps) const;
- void whoami() const {
- cerr << "I AM PerScorer" << std::endl;
- }
-
private:
// no copying allowed
PerScorer(const PerScorer&);
diff --git a/mert/Point.cpp b/mert/Point.cpp
index fe371ef53..ae5dbc21b 100644
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@@ -8,41 +8,41 @@
using namespace std;
-vector<unsigned> Point::optindices;
+vector<unsigned> Point::m_opt_indices;
-unsigned Point::dim = 0;
+unsigned Point::m_dim = 0;
-map<unsigned,statscore_t> Point::fixedweights;
+map<unsigned,statscore_t> Point::m_fixed_weights;
-unsigned Point::pdim = 0;
-unsigned Point::ncall = 0;
+unsigned Point::m_pdim = 0;
+unsigned Point::m_ncall = 0;
vector<parameter_t> Point::m_min;
vector<parameter_t> Point::m_max;
-Point::Point() : vector<parameter_t>(dim), score_(0.0) {}
+Point::Point() : vector<parameter_t>(m_dim), m_score(0.0) {}
-//Can initialize from a vector of dim or pdim
+//Can initialize from a vector of dim or m_pdim
Point::Point(const vector<parameter_t>& init,
const vector<parameter_t>& min,
const vector<parameter_t>& max)
- : vector<parameter_t>(Point::dim), score_(0.0)
+ : vector<parameter_t>(Point::m_dim), m_score(0.0)
{
- m_min.resize(Point::dim);
- m_max.resize(Point::dim);
- if(init.size()==dim) {
- for (unsigned int i=0; i<Point::dim; i++) {
- operator[](i)=init[i];
+ m_min.resize(Point::m_dim);
+ m_max.resize(Point::m_dim);
+ if (init.size() == m_dim) {
+ for (unsigned int i = 0; i < Point::m_dim; i++) {
+ operator[](i) = init[i];
m_min[i] = min[i];
m_max[i] = max[i];
}
} else {
- CHECK(init.size()==pdim);
- CHECK(optindices.size() == Point::dim);
- for (unsigned int i=0; i<Point::dim; i++) {
- operator[](i)=init[optindices[i]];
- m_min[i] = min[optindices[i]];
- m_max[i] = max[optindices[i]];
+ CHECK(init.size() == m_pdim);
+ CHECK(m_opt_indices.size() == Point::m_dim);
+ for (unsigned int i = 0; i < Point::m_dim; i++) {
+ operator[](i) = init[m_opt_indices[i]];
+ m_min[i] = min[m_opt_indices[i]];
+ m_max[i] = max[m_opt_indices[i]];
}
}
}
@@ -51,9 +51,9 @@ Point::~Point() {}
void Point::Randomize()
{
- CHECK(m_min.size()==Point::dim);
- CHECK(m_max.size()==Point::dim);
- for (unsigned int i=0; i<size(); i++) {
+ CHECK(m_min.size() == Point::m_dim);
+ CHECK(m_max.size() == Point::m_dim);
+ for (unsigned int i = 0; i < size(); i++) {
operator[](i) = m_min[i] +
static_cast<float>(random()) / static_cast<float>(RAND_MAX) * (m_max[i] - m_min[i]);
}
@@ -61,16 +61,17 @@ void Point::Randomize()
double Point::operator*(const FeatureStats& F) const
{
- ncall++; // to track performance
- double prod=0.0;
- if(OptimizeAll())
+ m_ncall++; // to track performance
+ double prod = 0.0;
+ if (OptimizeAll())
for (unsigned i=0; i<size(); i++)
- prod+= operator[](i)*F.get(i);
+ prod += operator[](i) * F.get(i);
else {
- for (unsigned i=0; i<size(); i++)
- prod+= operator[](i)*F.get(optindices[i]);
- for(map<unsigned,float >::iterator it=fixedweights.begin(); it!=fixedweights.end(); it++)
- prod+=it->second*F.get(it->first);
+ for (unsigned i = 0; i < size(); i++)
+ prod += operator[](i) * F.get(m_opt_indices[i]);
+ for(map<unsigned, float>::iterator it = m_fixed_weights.begin();
+ it != m_fixed_weights.end(); ++it)
+ prod += it->second * F.get(it->first);
}
return prod;
}
@@ -83,7 +84,7 @@ Point Point::operator+(const Point& p2) const
Res[i] += p2[i];
}
- Res.score_ = numeric_limits<statscore_t>::max();
+ Res.m_score = numeric_limits<statscore_t>::max();
return Res;
}
@@ -93,7 +94,7 @@ void Point::operator+=(const Point& p2)
for (unsigned i = 0; i < size(); i++) {
operator[](i) += p2[i];
}
- score_ = numeric_limits<statscore_t>::max();
+ m_score = numeric_limits<statscore_t>::max();
}
Point Point::operator*(float l) const
@@ -102,14 +103,14 @@ Point Point::operator*(float l) const
for (unsigned i = 0; i < size(); i++) {
Res[i] *= l;
}
- Res.score_ = numeric_limits<statscore_t>::max();
+ Res.m_score = numeric_limits<statscore_t>::max();
return Res;
}
ostream& operator<<(ostream& o, const Point& P)
{
vector<parameter_t> w = P.GetAllWeights();
- for (unsigned int i = 0; i < Point::pdim; i++) {
+ for (unsigned int i = 0; i < Point::m_pdim; i++) {
o << w[i] << " ";
}
return o;
@@ -118,24 +119,24 @@ ostream& operator<<(ostream& o, const Point& P)
void Point::NormalizeL2()
{
parameter_t norm=0.0;
- for (unsigned int i=0; i<size(); i++)
- norm+= operator[](i)*operator[](i);
- if(norm!=0.0) {
- norm=sqrt(norm);
- for (unsigned int i=0; i<size(); i++)
- operator[](i)/=norm;
+ for (unsigned int i = 0; i < size(); i++)
+ norm += operator[](i) * operator[](i);
+ if (norm != 0.0) {
+ norm = sqrt(norm);
+ for (unsigned int i = 0; i < size(); i++)
+ operator[](i) /= norm;
}
}
void Point::NormalizeL1()
{
- parameter_t norm=0.0;
- for (unsigned int i=0; i<size(); i++)
- norm+= abs(operator[](i));
- if(norm!=0.0) {
- for (unsigned int i=0; i<size(); i++)
- operator[](i)/=norm;
+ parameter_t norm = 0.0;
+ for (unsigned int i = 0; i < size(); i++)
+ norm += abs(operator[](i));
+ if (norm != 0.0) {
+ for (unsigned int i = 0; i < size(); i++)
+ operator[](i) /= norm;
}
}
@@ -143,14 +144,16 @@ void Point::NormalizeL1()
vector<parameter_t> Point::GetAllWeights()const
{
vector<parameter_t> w;
- if(OptimizeAll()) {
- w=*this;
+ if (OptimizeAll()) {
+ w = *this;
} else {
- w.resize(pdim);
- for (unsigned int i=0; i<size(); i++)
- w[optindices[i]]=operator[](i);
- for(map<unsigned,float >::iterator it=fixedweights.begin(); it!=fixedweights.end(); it++)
+ w.resize(m_pdim);
+ for (unsigned int i = 0; i < size(); i++)
+ w[m_opt_indices[i]] = operator[](i);
+ for (map<unsigned, float>::iterator it = m_fixed_weights.begin();
+ it != m_fixed_weights.end(); ++it) {
w[it->first]=it->second;
+ }
}
return w;
}
diff --git a/mert/Point.h b/mert/Point.h
index 5f55c7a7c..1e3a82a53 100644
--- a/mert/Point.h
+++ b/mert/Point.h
@@ -16,61 +16,55 @@ class Optimizer;
class Point : public vector<parameter_t>
{
friend class Optimizer;
+
private:
/**
* The indices over which we optimize.
*/
- static vector<unsigned int> optindices;
+ static vector<unsigned int> m_opt_indices;
/**
- * Dimension of optindices and of the parent vector.
+ * Dimension of m_opt_indices and of the parent vector.
*/
- static unsigned int dim;
+ static unsigned int m_dim;
/**
* Fixed weights in case of partial optimzation.
*/
- static map<unsigned int,parameter_t> fixedweights;
+ static map<unsigned int,parameter_t> m_fixed_weights;
/**
* Total size of the parameter space; we have
- * pdim = FixedWeight.size() + optinidices.size().
+ * m_pdim = FixedWeight.size() + optinidices.size().
*/
- static unsigned int pdim;
- static unsigned int ncall;
+ static unsigned int m_pdim;
+ static unsigned int m_ncall;
/**
- * The limits for randomization, both vectors are of full length, pdim.
+ * The limits for randomization, both vectors are of full length, m_pdim.
*/
static vector<parameter_t> m_min;
static vector<parameter_t> m_max;
- statscore_t score_;
+ statscore_t m_score;
public:
- static unsigned int getdim() {
- return dim;
- }
- static unsigned int getpdim() {
- return pdim;
- }
- static void setpdim(size_t pd) {
- pdim = pd;
- }
- static void setdim(size_t d) {
- dim = d;
- }
+ static unsigned int getdim() { return m_dim; }
+ static void setdim(size_t d) { m_dim = d; }
+
+ static unsigned int getpdim() { return m_pdim; }
+ static void setpdim(size_t pd) { m_pdim = pd; }
static void set_optindices(const vector<unsigned int>& indices) {
- optindices = indices;
+ m_opt_indices = indices;
}
static const vector<unsigned int>& get_optindices() {
- return optindices;
+ return m_opt_indices;
}
static bool OptimizeAll() {
- return fixedweights.empty();
+ return m_fixed_weights.empty();
}
Point();
@@ -88,7 +82,7 @@ public:
Point operator*(float) const;
/**
- * Write the Whole featureweight to a stream (ie pdim float).
+ * Write the Whole featureweight to a stream (ie m_pdim float).
*/
friend ostream& operator<<(ostream& o,const Point& P);
@@ -97,16 +91,13 @@ public:
void NormalizeL1();
/**
- * Return a vector of size pdim where all weights have been
+ * Return a vector of size m_pdim where all weights have been
* put (including fixed ones).
*/
vector<parameter_t> GetAllWeights() const;
- statscore_t GetScore() const {
- return score_;
- }
-
- void SetScore(statscore_t score) { score_ = score; }
+ statscore_t GetScore() const { return m_score; }
+ void SetScore(statscore_t score) { m_score = score; }
};
#endif // MERT_POINT_H
diff --git a/mert/ScopedVector.h b/mert/ScopedVector.h
index 6bd84e8c5..a2f0e7066 100644
--- a/mert/ScopedVector.h
+++ b/mert/ScopedVector.h
@@ -12,39 +12,39 @@ class ScopedVector {
ScopedVector() {}
virtual ~ScopedVector() { reset(); }
- bool empty() const { return vec_.empty(); }
+ bool empty() const { return m_vec.empty(); }
- void push_back(T *e) { vec_.push_back(e); }
+ void push_back(T *e) { m_vec.push_back(e); }
void reset() {
- for (iterator it = vec_.begin(); it != vec_.end(); ++it) {
+ for (iterator it = m_vec.begin(); it != m_vec.end(); ++it) {
delete *it;
}
- vec_.clear();
+ m_vec.clear();
}
- void reserve(size_t capacity) { vec_.reserve(capacity); }
- void resize(size_t size) { vec_.resize(size); }
+ void reserve(size_t capacity) { m_vec.reserve(capacity); }
+ void resize(size_t size) { m_vec.resize(size); }
- size_t size() const {return vec_.size(); }
+ size_t size() const {return m_vec.size(); }
- iterator begin() { return vec_.begin(); }
- const_iterator begin() const { return vec_.begin(); }
+ iterator begin() { return m_vec.begin(); }
+ const_iterator begin() const { return m_vec.begin(); }
- iterator end() { return vec_.end(); }
- const_iterator end() const { return vec_.end(); }
+ iterator end() { return m_vec.end(); }
+ const_iterator end() const { return m_vec.end(); }
- std::vector<T*>& get() { return vec_; }
- const std::vector<T*>& get() const { return vec_; }
+ std::vector<T*>& get() { return m_vec; }
+ const std::vector<T*>& get() const { return m_vec; }
- std::vector<T*>* operator->() { return &vec_; }
- const std::vector<T*>* operator->() const { return &vec_; }
+ std::vector<T*>* operator->() { return &m_vec; }
+ const std::vector<T*>* operator->() const { return &m_vec; }
- T*& operator[](size_t i) { return vec_[i]; }
- const T* operator[](size_t i) const { return vec_[i]; }
+ T*& operator[](size_t i) { return m_vec[i]; }
+ const T* operator[](size_t i) const { return m_vec[i]; }
private:
- std::vector<T*> vec_;
+ std::vector<T*> m_vec;
// no copying allowed.
ScopedVector<T>(const ScopedVector<T>&);
diff --git a/mert/ScoreArray.cpp b/mert/ScoreArray.cpp
index 16952f976..3afa9ce1a 100644
--- a/mert/ScoreArray.cpp
+++ b/mert/ScoreArray.cpp
@@ -10,15 +10,14 @@
#include "Util.h"
#include "FileStream.h"
-
ScoreArray::ScoreArray()
- : number_of_scores(0), idx("") {}
+ : m_num_scores(0), m_index("") {}
void ScoreArray::savetxt(std::ofstream& outFile, const std::string& sctype)
{
- outFile << SCORES_TXT_BEGIN << " " << idx << " " << array_.size()
- << " " << number_of_scores << " " << sctype << std::endl;
- for (scorearray_t::iterator i = array_.begin(); i !=array_.end(); i++) {
+ outFile << SCORES_TXT_BEGIN << " " << m_index << " " << m_array.size()
+ << " " << m_num_scores << " " << sctype << std::endl;
+ for (scorearray_t::iterator i = m_array.begin(); i !=m_array.end(); i++) {
i->savetxt(outFile);
outFile << std::endl;
}
@@ -27,9 +26,9 @@ void ScoreArray::savetxt(std::ofstream& outFile, const std::string& sctype)
void ScoreArray::savebin(std::ofstream& outFile, const std::string& sctype)
{
- outFile << SCORES_BIN_BEGIN << " " << idx << " " << array_.size()
- << " " << number_of_scores << " " << sctype << std::endl;
- for (scorearray_t::iterator i = array_.begin(); i !=array_.end(); i++)
+ outFile << SCORES_BIN_BEGIN << " " << m_index << " " << m_array.size()
+ << " " << m_num_scores << " " << sctype << std::endl;
+ for (scorearray_t::iterator i = m_array.begin(); i !=m_array.end(); i++)
i->savebin(outFile);
outFile << SCORES_BIN_END << std::endl;
@@ -37,22 +36,24 @@ void ScoreArray::savebin(std::ofstream& outFile, const std::string& sctype)
void ScoreArray::save(std::ofstream& inFile, const std::string& sctype, bool bin)
{
- if (size()>0)
- (bin)?savebin(inFile, sctype):savetxt(inFile, sctype);
+ if (size() <= 0) return;
+ if (bin) {
+ savebin(inFile, sctype);
+ } else {
+ savetxt(inFile, sctype);
+ }
}
void ScoreArray::save(const std::string &file, const std::string& sctype, bool bin)
{
std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
save(outFile, sctype, bin);
-
outFile.close();
}
void ScoreArray::loadbin(ifstream& inFile, size_t n)
{
- ScoreStats entry(number_of_scores);
+ ScoreStats entry(m_num_scores);
for (size_t i=0 ; i < n; i++) {
entry.loadbin(inFile);
@@ -62,9 +63,9 @@ void ScoreArray::loadbin(ifstream& inFile, size_t n)
void ScoreArray::loadtxt(ifstream& inFile, size_t n)
{
- ScoreStats entry(number_of_scores);
+ ScoreStats entry(m_num_scores);
- for (size_t i=0 ; i < n; i++) {
+ for (size_t i = 0; i < n; i++) {
entry.loadtxt(inFile);
add(entry);
}
@@ -72,8 +73,8 @@ void ScoreArray::loadtxt(ifstream& inFile, size_t n)
void ScoreArray::load(ifstream& inFile)
{
- size_t number_of_entries=0;
- bool binmode=false;
+ size_t number_of_entries = 0;
+ bool binmode = false;
std::string substring, stringBuf;
std::string::size_type loc;
@@ -94,20 +95,25 @@ void ScoreArray::load(ifstream& inFile)
}
getNextPound(stringBuf, substring);
getNextPound(stringBuf, substring);
- idx = substring;
+ m_index = substring;
getNextPound(stringBuf, substring);
number_of_entries = atoi(substring.c_str());
getNextPound(stringBuf, substring);
- number_of_scores = atoi(substring.c_str());
+ m_num_scores = atoi(substring.c_str());
getNextPound(stringBuf, substring);
- score_type = substring;
+ m_score_type = substring;
}
- (binmode)?loadbin(inFile, number_of_entries):loadtxt(inFile, number_of_entries);
+ if (binmode) {
+ loadbin(inFile, number_of_entries);
+ } else {
+ loadtxt(inFile, number_of_entries);
+ }
std::getline(inFile, stringBuf);
if (!stringBuf.empty()) {
- if ((loc = stringBuf.find(SCORES_TXT_END)) != 0 && (loc = stringBuf.find(SCORES_BIN_END)) != 0) {
+ if ((loc = stringBuf.find(SCORES_TXT_END)) != 0 &&
+ (loc = stringBuf.find(SCORES_BIN_END)) != 0) {
TRACE_ERR("ERROR: ScoreArray::load(): Wrong footer");
return;
}
@@ -139,7 +145,8 @@ bool ScoreArray::check_consistency() const
if (sz == 0)
return true;
- for (scorearray_t::const_iterator i = array_.begin(); i != array_.end(); ++i) {
+ for (scorearray_t::const_iterator i = m_array.begin();
+ i != m_array.end(); ++i) {
if (i->size() != sz)
return false;
}
diff --git a/mert/ScoreArray.h b/mert/ScoreArray.h
index 5b84e020f..e1fb71357 100644
--- a/mert/ScoreArray.h
+++ b/mert/ScoreArray.h
@@ -24,72 +24,75 @@ const char SCORES_BIN_END[] = "SCORES_BIN_END_0";
class ScoreArray
{
-protected:
- scorearray_t array_;
- std::string score_type;
- size_t number_of_scores;
+ private:
+ scorearray_t m_array;
+ std::string m_score_type;
+ size_t m_num_scores;
-private:
- // idx to identify the utterance.
+ // indexx to identify the utterance.
// It can differ from the index inside the vector.
- std::string idx;
+ std::string m_index;
public:
ScoreArray();
~ScoreArray() {}
inline void clear() {
- array_.clear();
+ m_array.clear();
}
inline std::string getIndex() const {
- return idx;
+ return m_index;
}
+
inline void setIndex(const std::string& value) {
- idx=value;
+ m_index = value;
}
-// inline ScoreStats get(size_t i){ return array_.at(i); }
+// inline ScoreStats get(size_t i){ return m_array.at(i); }
inline ScoreStats& get(size_t i) {
- return array_.at(i);
+ return m_array.at(i);
}
- inline const ScoreStats& get(size_t i)const {
- return array_.at(i);
+
+ inline const ScoreStats& get(size_t i) const {
+ return m_array.at(i);
}
void add(const ScoreStats& e) {
- array_.push_back(e);
+ m_array.push_back(e);
}
//ADDED BY TS
void swap(size_t i, size_t j) {
- std::swap(array_[i],array_[j]);
+ std::swap(m_array[i], m_array[j]);
}
void resize(size_t new_size) {
- array_.resize(std::min(new_size,array_.size()));
+ m_array.resize(std::min(new_size, m_array.size()));
}
//END_ADDED
void merge(ScoreArray& e);
inline std::string name() const {
- return score_type;
+ return m_score_type;
}
- inline void name(std::string &sctype) {
- score_type = sctype;
+ inline void name(std::string &score_type) {
+ m_score_type = score_type;
}
inline size_t size() const {
- return array_.size();
+ return m_array.size();
}
+
inline size_t NumberOfScores() const {
- return number_of_scores;
+ return m_num_scores;
}
+
inline void NumberOfScores(size_t v) {
- number_of_scores = v;
+ m_num_scores = v;
}
void savetxt(ofstream& outFile, const std::string& sctype);
diff --git a/mert/ScoreData.cpp b/mert/ScoreData.cpp
index 2852e413f..ac117289e 100644
--- a/mert/ScoreData.cpp
+++ b/mert/ScoreData.cpp
@@ -12,19 +12,19 @@
#include "FileStream.h"
ScoreData::ScoreData(Scorer& ptr):
- theScorer(&ptr)
+ m_scorer(&ptr)
{
- score_type = theScorer->getName();
+ m_score_type = m_scorer->getName();
// This is not dangerous: we don't use the this pointer in SetScoreData.
- theScorer->setScoreData(this);
- number_of_scores = theScorer->NumberOfScores();
- // TRACE_ERR("ScoreData: number_of_scores: " << number_of_scores << std::endl);
+ m_scorer->setScoreData(this);
+ m_num_scores = m_scorer->NumberOfScores();
+ // TRACE_ERR("ScoreData: m_num_scores: " << m_num_scores << std::endl);
}
void ScoreData::save(std::ofstream& outFile, bool bin)
{
- for (scoredata_t::iterator i = array_.begin(); i !=array_.end(); i++) {
- i->save(outFile, score_type, bin);
+ for (scoredata_t::iterator i = m_array.begin(); i != m_array.end(); ++i) {
+ i->save(outFile, m_score_type, bin);
}
}
@@ -35,11 +35,7 @@ void ScoreData::save(const std::string &file, bool bin)
// matches a stream with a file. Opens the file.
std::ofstream outFile(file.c_str(), std::ios::out);
-
- ScoreStats entry;
-
save(outFile, bin);
-
outFile.close();
}
@@ -48,14 +44,11 @@ void ScoreData::load(ifstream& inFile)
ScoreArray entry;
while (!inFile.eof()) {
-
if (!inFile.good()) {
std::cerr << "ERROR ScoreData::load inFile.good()" << std::endl;
}
-
entry.clear();
entry.load(inFile);
-
if (entry.size() == 0) {
break;
}
@@ -63,31 +56,25 @@ void ScoreData::load(ifstream& inFile)
}
}
-
void ScoreData::load(const std::string &file)
{
TRACE_ERR("loading score data from " << file << std::endl);
-
inputfilestream inFile(file); // matches a stream with a file. Opens the file
-
if (!inFile) {
throw runtime_error("Unable to open score file: " + file);
}
-
load((ifstream&) inFile);
-
inFile.close();
}
-
void ScoreData::add(ScoreArray& e)
{
if (exists(e.getIndex())) { // array at position e.getIndex() already exists
//enlarge array at position e.getIndex()
size_t pos = getIndex(e.getIndex());
- array_.at(pos).merge(e);
+ m_array.at(pos).merge(e);
} else {
- array_.push_back(e);
+ m_array.push_back(e);
setIndex();
}
}
@@ -98,28 +85,28 @@ void ScoreData::add(const ScoreStats& e, const std::string& sent_idx)
// Enlarge array at position e.getIndex()
size_t pos = getIndex(sent_idx);
// TRACE_ERR("Inserting in array " << sent_idx << std::endl);
- array_.at(pos).add(e);
+ m_array.at(pos).add(e);
// TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
} else {
// TRACE_ERR("Creating a new entry in the array" << std::endl);
ScoreArray a;
- a.NumberOfScores(number_of_scores);
+ a.NumberOfScores(m_num_scores);
a.add(e);
a.setIndex(sent_idx);
- size_t idx = array_.size();
- array_.push_back(a);
- idx2arrayname_[idx] = sent_idx;
- arrayname2idx_[sent_idx]=idx;
+ size_t idx = m_array.size();
+ m_array.push_back(a);
+ m_index_to_array_name[idx] = sent_idx;
+ m_array_name_to_index[sent_idx]=idx;
// TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
}
}
bool ScoreData::check_consistency() const
{
- if (array_.size() == 0)
+ if (m_array.size() == 0)
return true;
- for (scoredata_t::const_iterator i = array_.begin(); i != array_.end(); ++i)
+ for (scoredata_t::const_iterator i = m_array.begin(); i != m_array.end(); ++i)
if (!i->check_consistency()) return false;
return true;
@@ -127,10 +114,10 @@ bool ScoreData::check_consistency() const
void ScoreData::setIndex()
{
- size_t j=0;
- for (scoredata_t::iterator i = array_.begin(); i !=array_.end(); i++) {
- idx2arrayname_[j]=i->getIndex();
- arrayname2idx_[i->getIndex()]=j;
+ size_t j = 0;
+ for (scoredata_t::iterator i = m_array.begin(); i != m_array.end(); ++i) {
+ m_index_to_array_name[j] = i->getIndex();
+ m_array_name_to_index[i->getIndex()]=j;
j++;
}
}
diff --git a/mert/ScoreData.h b/mert/ScoreData.h
index d1a635e9e..37413445e 100644
--- a/mert/ScoreData.h
+++ b/mert/ScoreData.h
@@ -11,7 +11,6 @@
#include <fstream>
#include <vector>
-#include <iostream>
#include <stdexcept>
#include <string>
#include "ScoreArray.h"
@@ -23,35 +22,34 @@ class Scorer;
class ScoreData
{
-protected:
- scoredata_t array_;
- idx2name idx2arrayname_; // map from index to name of array
- name2idx arrayname2idx_; // map from name to index of array
-
private:
// Do not allow the user to instanciate without arguments.
ScoreData() {}
- Scorer* theScorer;
- std::string score_type;
- size_t number_of_scores;
+ scoredata_t m_array;
+ idx2name m_index_to_array_name; // map from index to name of array
+ name2idx m_array_name_to_index; // map from name to index of array
+
+ Scorer* m_scorer;
+ std::string m_score_type;
+ size_t m_num_scores;
public:
ScoreData(Scorer& sc);
~ScoreData() {}
inline void clear() {
- array_.clear();
+ m_array.clear();
}
inline ScoreArray get(const std::string& idx) {
- return array_.at(getIndex(idx));
+ return m_array.at(getIndex(idx));
}
inline ScoreArray& get(size_t idx) {
- return array_.at(idx);
+ return m_array.at(idx);
}
inline const ScoreArray& get(size_t idx) const {
- return array_.at(idx);
+ return m_array.at(idx);
}
inline bool exists(const std::string& sent_idx) const {
@@ -59,32 +57,32 @@ public:
}
inline bool exists(int sent_idx) const {
- return (sent_idx > -1 && sent_idx < static_cast<int>(array_.size())) ? true : false;
+ return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
}
inline ScoreStats& get(size_t i, size_t j) {
- return array_.at(i).get(j);
+ return m_array.at(i).get(j);
}
inline const ScoreStats& get(size_t i, size_t j) const {
- return array_.at(i).get(j);
+ return m_array.at(i).get(j);
}
inline std::string name() const {
- return score_type;
+ return m_score_type;
}
inline std::string name(const std::string &sctype) {
- return score_type = sctype;
+ return m_score_type = sctype;
}
void add(ScoreArray& e);
void add(const ScoreStats& e, const std::string& sent_idx);
inline size_t NumberOfScores() const {
- return number_of_scores;
+ return m_num_scores;
}
inline size_t size() const {
- return array_.size();
+ return m_array.size();
}
void save(const std::string &file, bool bin=false);
@@ -100,15 +98,15 @@ public:
void setIndex();
inline int getIndex(const std::string& idx) const {
- name2idx::const_iterator i = arrayname2idx_.find(idx);
- if (i != arrayname2idx_.end())
+ name2idx::const_iterator i = m_array_name_to_index.find(idx);
+ if (i != m_array_name_to_index.end())
return i->second;
else
return -1;
}
inline std::string getIndex(size_t idx) const {
- idx2name::const_iterator i = idx2arrayname_.find(idx);
- if (i != idx2arrayname_.end())
+ idx2name::const_iterator i = m_index_to_array_name.find(idx);
+ if (i != m_index_to_array_name.end())
throw runtime_error("there is no entry at index " + idx);
return i->second;
}
diff --git a/mert/ScoreStats.cpp b/mert/ScoreStats.cpp
index 0f49e8edc..bc0bb5804 100644
--- a/mert/ScoreStats.cpp
+++ b/mert/ScoreStats.cpp
@@ -14,30 +14,30 @@ const int kAvailableSize = 8;
} // namespace
ScoreStats::ScoreStats()
- : available_(kAvailableSize), entries_(0),
- array_(new ScoreStatsType[available_]) {}
+ : m_available_size(kAvailableSize), m_entries(0),
+ m_array(new ScoreStatsType[m_available_size]) {}
ScoreStats::ScoreStats(const size_t size)
- : available_(size), entries_(size),
- array_(new ScoreStatsType[available_])
+ : m_available_size(size), m_entries(size),
+ m_array(new ScoreStatsType[m_available_size])
{
- memset(array_, 0, GetArraySizeWithBytes());
+ memset(m_array, 0, GetArraySizeWithBytes());
}
ScoreStats::~ScoreStats()
{
- if (array_) {
- delete [] array_;
- array_ = NULL;
+ if (m_array) {
+ delete [] m_array;
+ m_array = NULL;
}
}
void ScoreStats::Copy(const ScoreStats &stats)
{
- available_ = stats.available();
- entries_ = stats.size();
- array_ = new ScoreStatsType[available_];
- memcpy(array_, stats.getArray(), GetArraySizeWithBytes());
+ m_available_size = stats.available();
+ m_entries = stats.size();
+ m_array = new ScoreStatsType[m_available_size];
+ memcpy(m_array, stats.getArray(), GetArraySizeWithBytes());
}
ScoreStats::ScoreStats(const ScoreStats &stats)
@@ -47,24 +47,24 @@ ScoreStats::ScoreStats(const ScoreStats &stats)
ScoreStats& ScoreStats::operator=(const ScoreStats &stats)
{
- delete [] array_;
+ delete [] m_array;
Copy(stats);
return *this;
}
void ScoreStats::expand()
{
- available_ *= 2;
- scorestats_t buf = new ScoreStatsType[available_];
- memcpy(buf, array_, GetArraySizeWithBytes());
- delete [] array_;
- array_ = buf;
+ m_available_size *= 2;
+ scorestats_t buf = new ScoreStatsType[m_available_size];
+ memcpy(buf, m_array, GetArraySizeWithBytes());
+ delete [] m_array;
+ m_array = buf;
}
void ScoreStats::add(ScoreStatsType v)
{
if (isfull()) expand();
- array_[entries_++]=v;
+ m_array[m_entries++]=v;
}
void ScoreStats::set(const std::string& str)
@@ -80,7 +80,7 @@ void ScoreStats::set(const std::string& str)
void ScoreStats::loadbin(std::ifstream& inFile)
{
- inFile.read((char*)array_, GetArraySizeWithBytes());
+ inFile.read((char*)m_array, GetArraySizeWithBytes());
}
void ScoreStats::loadtxt(std::ifstream& inFile)
@@ -117,7 +117,7 @@ void ScoreStats::savetxt(std::ofstream& outFile)
void ScoreStats::savebin(std::ofstream& outFile)
{
- outFile.write((char*)array_, GetArraySizeWithBytes());
+ outFile.write((char*)m_array, GetArraySizeWithBytes());
}
ostream& operator<<(ostream& o, const ScoreStats& e)
diff --git a/mert/ScoreStats.h b/mert/ScoreStats.h
index cbdf1c8c2..dc17de82e 100644
--- a/mert/ScoreStats.h
+++ b/mert/ScoreStats.h
@@ -22,11 +22,11 @@ using namespace std;
class ScoreStats
{
private:
- size_t available_;
- size_t entries_;
+ size_t m_available_size;
+ size_t m_entries;
// TODO: Use smart pointer for exceptional-safety.
- scorestats_t array_;
+ scorestats_t m_array;
public:
ScoreStats();
@@ -41,30 +41,24 @@ public:
void Copy(const ScoreStats &stats);
bool isfull() const {
- return (entries_ < available_) ? 0 : 1;
+ return (m_entries < m_available_size) ? 0 : 1;
}
void expand();
void add(ScoreStatsType v);
void clear() {
- memset((void*)array_, 0, GetArraySizeWithBytes());
+ memset((void*)m_array, 0, GetArraySizeWithBytes());
}
void reset() {
- entries_ = 0;
+ m_entries = 0;
clear();
}
- inline ScoreStatsType get(size_t i) {
- return array_[i];
- }
- inline ScoreStatsType get(size_t i)const {
- return array_[i];
- }
- inline scorestats_t getArray() const {
- return array_;
- }
+ inline ScoreStatsType get(size_t i) { return m_array[i]; }
+ inline ScoreStatsType get(size_t i) const { return m_array[i]; }
+ inline scorestats_t getArray() const { return m_array; }
void set(const std::string& str);
@@ -76,20 +70,15 @@ public:
}
}
- inline size_t bytes() const {
- return GetArraySizeWithBytes();
- }
+ inline size_t bytes() const { return GetArraySizeWithBytes(); }
size_t GetArraySizeWithBytes() const {
- return entries_ * sizeof(ScoreStatsType);
+ return m_entries * sizeof(ScoreStatsType);
}
- inline size_t size() const {
- return entries_;
- }
- inline size_t available() const {
- return available_;
- }
+ inline size_t size() const { return m_entries; }
+
+ inline size_t available() const { return m_available_size; }
void savetxt(const std::string &file);
void savetxt(ofstream& outFile);
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 694b48e8e..cd9efa45a 100755
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -434,7 +434,7 @@ int main(int argc, char **argv)
vector<OptimizationTask*>& tasks = allTasks[i];
Optimizer *optimizer = OptimizerFactory::BuildOptimizer(option.pdim, to_optimize, start_list[0], option.optimize_type, option.nrandom);
optimizer->SetScorer(data_ref.getScorer());
- optimizer->SetFData(data_ref.getFeatureData());
+ optimizer->SetFeatureData(data_ref.getFeatureData());
// A task for each start point
for (size_t j = 0; j < startingPoints.size(); ++j) {
OptimizationTask* task = new OptimizationTask(optimizer, startingPoints[j]);
diff --git a/mert/pro.cpp b/mert/pro.cpp
index aa1c2ec16..b29bbe052 100644
--- a/mert/pro.cpp
+++ b/mert/pro.cpp
@@ -21,8 +21,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
-/**
- * This is part of the PRO implementation. It converts the features and scores
+/**
+ * This is part of the PRO implementation. It converts the features and scores
* files into a form suitable for input into the megam maxent trainer.
*
* For details of PRO, refer to Hopkins & May (EMNLP 2011)
@@ -48,48 +48,49 @@ namespace po = boost::program_options;
class SampledPair {
private:
- pair<size_t,size_t> translation1;
- pair<size_t,size_t> translation2;
- float scoreDiff;
+ pair<size_t,size_t> m_translation1;
+ pair<size_t,size_t> m_translation2;
+ float m_score_diff;
+
public:
- SampledPair(const pair<size_t,size_t>& t1, const pair<size_t,size_t>& t2, float diff ) {
- if (diff > 0) {
- translation1 = t1;
- translation2 = t2;
- scoreDiff = diff;
- }
- else {
- translation1 = t2;
- translation2 = t1;
- scoreDiff = -diff;
- }
- }
- float getDiff() const { return scoreDiff; }
- const pair<size_t,size_t>& getTranslation1() const { return translation1; }
- const pair<size_t,size_t>& getTranslation2() const { return translation2; }
+ SampledPair(const pair<size_t,size_t>& t1, const pair<size_t,size_t>& t2, float diff ) {
+ if (diff > 0) {
+ m_translation1 = t1;
+ m_translation2 = t2;
+ m_score_diff = diff;
+ } else {
+ m_translation1 = t2;
+ m_translation2 = t1;
+ m_score_diff = -diff;
+ }
+ }
+
+ float getDiff() const { return m_score_diff; }
+ const pair<size_t,size_t>& getTranslation1() const { return m_translation1; }
+ const pair<size_t,size_t>& getTranslation2() const { return m_translation2; }
};
static float sentenceLevelBleuPlusOne(const vector<float>& stats) {
- float logbleu = 0.0;
- for (unsigned int j=0; j<kBleuNgramOrder; j++) {
- //cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
- logbleu += log(stats[2*j]+1) - log(stats[2*j+1]+1);
- }
- logbleu /= kBleuNgramOrder;
- const float brevity = 1.0 - static_cast<float>(stats[(kBleuNgramOrder * 2)]) / stats[1];
- if (brevity < 0.0) {
- logbleu += brevity;
- }
- //cerr << brevity << " -> " << exp(logbleu) << endl;
- return exp(logbleu);
+ float logbleu = 0.0;
+ for (unsigned int j=0; j<kBleuNgramOrder; j++) {
+ //cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
+ logbleu += log(stats[2*j]+1) - log(stats[2*j+1]+1);
+ }
+ logbleu /= kBleuNgramOrder;
+ const float brevity = 1.0 - static_cast<float>(stats[(kBleuNgramOrder * 2)]) / stats[1];
+ if (brevity < 0.0) {
+ logbleu += brevity;
+ }
+ //cerr << brevity << " -> " << exp(logbleu) << endl;
+ return exp(logbleu);
}
static void outputSample(ostream& out, const FeatureDataItem& f1, const FeatureDataItem& f2) {
// difference in score in regular features
- for(unsigned int j=0; j<f1.dense.size(); j++)
- if (abs(f1.dense[j]-f2.dense[j]) > 0.00001)
- out << " F" << j << " " << (f1.dense[j]-f2.dense[j]);
+ for(unsigned int j=0; j<f1.dense.size(); j++)
+ if (abs(f1.dense[j]-f2.dense[j]) > 0.00001)
+ out << " F" << j << " " << (f1.dense[j]-f2.dense[j]);
if (f1.sparse.size() || f2.sparse.size()) {
out << " ";
@@ -102,27 +103,27 @@ static void outputSample(ostream& out, const FeatureDataItem& f1, const FeatureD
}
}
-
-int main(int argc, char** argv)
+
+int main(int argc, char** argv)
{
bool help;
vector<string> scoreFiles;
vector<string> featureFiles;
int seed;
string outputFile;
- //TODO: options
- const unsigned int n_candidates = 5000; // Gamma, in Hopkins & May
- const unsigned int n_samples = 50; // Xi, in Hopkins & May
- const float min_diff = 0.05;
+ // TODO: Add these constants to options
+ const unsigned int n_candidates = 5000; // Gamma, in Hopkins & May
+ const unsigned int n_samples = 50; // Xi, in Hopkins & May
+ const float min_diff = 0.05;
po::options_description desc("Allowed options");
desc.add_options()
- ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
- ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
- ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
- ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
- ("output-file,o", po::value<string>(&outputFile), "Output file")
- ;
+ ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+ ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
+ ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
+ ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
+ ("output-file,o", po::value<string>(&outputFile), "Output file")
+ ;
po::options_description cmdline_options;
cmdline_options.add(desc);
@@ -135,7 +136,7 @@ int main(int argc, char** argv)
cout << desc << endl;
exit(0);
}
-
+
if (vm.count("random-seed")) {
cerr << "Initialising random seed to " << seed << endl;
srand(seed);
@@ -168,7 +169,7 @@ int main(int argc, char** argv)
out = &cout;
}
-
+
vector<FeatureDataIterator> featureDataIters;
vector<ScoreDataIterator> scoreDataIters;
for (size_t i = 0; i < featureFiles.size(); ++i) {
@@ -180,7 +181,7 @@ int main(int argc, char** argv)
size_t sentenceId = 0;
while(1) {
vector<pair<size_t,size_t> > hypotheses;
- //TODO: de-deuping. Collect hashes of score,feature pairs and
+ //TODO: de-deuping. Collect hashes of score,feature pairs and
//only add index if it's unique.
if (featureDataIters[0] == FeatureDataIterator::end()) {
break;
@@ -215,7 +216,7 @@ int main(int argc, char** argv)
size_t rand2 = rand() % n_translations;
pair<size_t,size_t> translation2 = hypotheses[rand2];
float bleu2 = sentenceLevelBleuPlusOne(scoreDataIters[translation2.first]->operator[](translation2.second));
-
+
/*
cerr << "t(" << translation1.first << "," << translation1.second << ") = " << bleu1 <<
" t(" << translation2.first << "," << translation2.second << ") = " <<
@@ -223,7 +224,7 @@ int main(int argc, char** argv)
*/
if (abs(bleu1-bleu2) < min_diff)
continue;
-
+
samples.push_back(SampledPair(translation1, translation2, bleu1-bleu2));
scores.push_back(1.0-abs(bleu1-bleu2));
}
@@ -262,4 +263,3 @@ int main(int argc, char** argv)
outFile.close();
}
-