Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Haddow <barry.haddow@gmail.com>2012-05-25 00:11:35 +0400
committerBarry Haddow <barry.haddow@gmail.com>2012-05-25 00:11:35 +0400
commitc397d2068bd65fc1eca6bf49057cfda154fa3ce5 (patch)
treee8a8c4e6f60c2b0d47592371d981b9d1a1ac9f85 /mert/FeatureStats.cpp
parent5a17ef82b3c7449d9fb3686875a7936193e14a89 (diff)
parent277fd38bbd345c083b762a0fb36c0a69da2ca7eb (diff)
Merge branch 'trunk' into miramerge. Still to fix build.
Conflicts: Jamroot mert/Data.cpp mert/Data.h mert/FeatureArray.cpp mert/FeatureArray.h mert/FeatureData.cpp mert/FeatureData.h mert/FeatureStats.cpp mert/FeatureStats.h mert/mert.cpp moses-chart-cmd/src/IOWrapper.h moses-chart-cmd/src/Main.cpp moses-cmd/src/IOWrapper.cpp moses-cmd/src/IOWrapper.h moses-cmd/src/Main.cpp moses/src/GlobalLexicalModel.cpp moses/src/Jamfile moses/src/Parameter.cpp moses/src/PhraseDictionary.cpp moses/src/ScoreIndexManager.h moses/src/TargetPhrase.h regression-testing/tests/phrase.lexicalized-reordering-bin/truth/results.txt regression-testing/tests/phrase.lexicalized-reordering-cn/truth/results.txt regression-testing/tests/phrase.lexicalized-reordering/truth/results.txt regression-testing/tests/phrase.multiple-translation-system-lr/truth/results.txt regression-testing/tests/phrase.show-weights.lex-reorder/truth/results.txt regression-testing/tests/phrase.show-weights/truth/results.txt scripts/ems/experiment.meta scripts/ems/experiment.perl scripts/training/filter-model-given-input.pl scripts/training/mert-moses.pl
Diffstat (limited to 'mert/FeatureStats.cpp')
-rw-r--r--mert/FeatureStats.cpp153
1 files changed, 86 insertions, 67 deletions
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index a02c4c3d3..6b86ac304 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -1,6 +1,6 @@
/*
* FeatureStats.cpp
- * met - Minimum Error Training
+ * mert - Minimum Error Rate Training
*
* Created by Nicola Bertoldi on 13/05/08.
*
@@ -8,53 +8,56 @@
#include "FeatureStats.h"
+#include <fstream>
#include <cmath>
#include <stdexcept>
#include "Util.h"
+using namespace std;
+
namespace {
const int kAvailableSize = 8;
} // namespace
-SparseVector::name2id_t SparseVector::name2id_;
-SparseVector::id2name_t SparseVector::id2name_;
+SparseVector::name2id_t SparseVector::m_name_to_id;
+SparseVector::id2name_t SparseVector::m_id_to_name;
FeatureStatsType SparseVector::get(const string& name) const {
- name2id_t::const_iterator name2id_iter = name2id_.find(name);
- if (name2id_iter == name2id_.end()) return 0;
+ name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
+ if (name2id_iter == m_name_to_id.end()) return 0;
size_t id = name2id_iter->second;
return get(id);
}
FeatureStatsType SparseVector::get(size_t id) const {
- fvector_t::const_iterator fvector_iter = fvector_.find(id);
- if (fvector_iter == fvector_.end()) return 0;
+ fvector_t::const_iterator fvector_iter = m_fvector.find(id);
+ if (fvector_iter == m_fvector.end()) return 0;
return fvector_iter->second;
}
void SparseVector::set(const string& name, FeatureStatsType value) {
- name2id_t::const_iterator name2id_iter = name2id_.find(name);
+ name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
size_t id = 0;
- if (name2id_iter == name2id_.end()) {
- id = id2name_.size();
- id2name_.push_back(name);
- name2id_[name] = id;
+ if (name2id_iter == m_name_to_id.end()) {
+ id = m_id_to_name.size();
+ m_id_to_name.push_back(name);
+ m_name_to_id[name] = id;
} else {
id = name2id_iter->second;
}
- fvector_[id] = value;
+ m_fvector[id] = value;
}
void SparseVector::write(ostream& out, const string& sep) const {
- for (fvector_t::const_iterator i = fvector_.begin(); i != fvector_.end(); ++i) {
- if (abs((float)(i->second)) < 0.00001) continue;
- string name = id2name_[i->first];
+ for (fvector_t::const_iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
+ if (abs(i->second) < 0.00001) continue;
+ string name = m_id_to_name[i->first];
out << name << sep << i->second << " ";
}
}
void SparseVector::clear() {
- fvector_.clear();
+ m_fvector.clear();
}
void SparseVector::load(const string& file) {
@@ -76,17 +79,17 @@ void SparseVector::load(const string& file) {
SparseVector& SparseVector::operator-=(const SparseVector& rhs) {
- for (fvector_t::const_iterator i = rhs.fvector_.begin();
- i != rhs.fvector_.end(); ++i) {
- fvector_[i->first] = get(i->first) - (i->second);
+ for (fvector_t::const_iterator i = rhs.m_fvector.begin();
+ i != rhs.m_fvector.end(); ++i) {
+ m_fvector[i->first] = get(i->first) - (i->second);
}
return *this;
}
FeatureStatsType SparseVector::inner_product(const SparseVector& rhs) const {
FeatureStatsType product = 0.0;
- for (fvector_t::const_iterator i = fvector_.begin();
- i != fvector_.end(); ++i) {
+ for (fvector_t::const_iterator i = m_fvector.begin();
+ i != m_fvector.end(); ++i) {
product += ((i->second) * (rhs.get(i->first)));
}
return product;
@@ -107,31 +110,31 @@ FeatureStatsType inner_product(const SparseVector& lhs, const SparseVector& rhs)
}
FeatureStats::FeatureStats()
- : available_(kAvailableSize), entries_(0),
- array_(new FeatureStatsType[available_]) {}
+ : m_available_size(kAvailableSize), m_entries(0),
+ m_array(new FeatureStatsType[m_available_size]) {}
FeatureStats::FeatureStats(const size_t size)
- : available_(size), entries_(size),
- array_(new FeatureStatsType[available_])
+ : m_available_size(size), m_entries(size),
+ m_array(new FeatureStatsType[m_available_size])
{
- memset(array_, 0, GetArraySizeWithBytes());
+ memset(m_array, 0, GetArraySizeWithBytes());
}
FeatureStats::~FeatureStats()
{
- if (array_) {
- delete [] array_;
- array_ = NULL;
+ if (m_array) {
+ delete [] m_array;
+ m_array = NULL;
}
}
void FeatureStats::Copy(const FeatureStats &stats)
{
- available_ = stats.available();
- entries_ = stats.size();
- array_ = new FeatureStatsType[available_];
- memcpy(array_, stats.getArray(), GetArraySizeWithBytes());
- map_ = stats.getSparse();
+ m_available_size = stats.available();
+ m_entries = stats.size();
+ m_array = new FeatureStatsType[m_available_size];
+ memcpy(m_array, stats.getArray(), GetArraySizeWithBytes());
+ m_map = stats.getSparse();
}
FeatureStats::FeatureStats(const FeatureStats &stats)
@@ -141,34 +144,34 @@ FeatureStats::FeatureStats(const FeatureStats &stats)
FeatureStats& FeatureStats::operator=(const FeatureStats &stats)
{
- delete [] array_;
+ delete [] m_array;
Copy(stats);
return *this;
}
void FeatureStats::expand()
{
- available_ *= 2;
- featstats_t t_ = new FeatureStatsType[available_];
- memcpy(t_, array_, GetArraySizeWithBytes());
- delete [] array_;
- array_ = t_;
+ m_available_size *= 2;
+ featstats_t t_ = new FeatureStatsType[m_available_size];
+ memcpy(t_, m_array, GetArraySizeWithBytes());
+ delete [] m_array;
+ m_array = t_;
}
void FeatureStats::add(FeatureStatsType v)
{
if (isfull()) expand();
- array_[entries_++]=v;
+ m_array[m_entries++]=v;
}
void FeatureStats::addSparse(const string& name, FeatureStatsType v)
{
- map_.set(name,v);
+ m_map.set(name,v);
}
-void FeatureStats::set(std::string &theString, const SparseVector& sparseWeights)
+void FeatureStats::set(string &theString, const SparseVector& sparseWeights )
{
- std::string substring, stringBuf;
+ string substring, stringBuf;
reset();
while (!theString.empty()) {
@@ -186,7 +189,7 @@ void FeatureStats::set(std::string &theString, const SparseVector& sparseWeights
if (sparseWeights.size()) {
//Merge the sparse features
- FeatureStatsType merged = inner_product(sparseWeights, map_);
+ FeatureStatsType merged = inner_product(sparseWeights, m_map);
add(merged);
/*
cerr << "Merged ";
@@ -195,7 +198,7 @@ void FeatureStats::set(std::string &theString, const SparseVector& sparseWeights
map_.write(cerr,"=");
cerr << " to give " << merged << endl;
*/
- map_.clear();
+ m_map.clear();
}
/*
cerr << "FS: ";
@@ -205,39 +208,39 @@ void FeatureStats::set(std::string &theString, const SparseVector& sparseWeights
cerr << endl;*/
}
-
-void FeatureStats::loadbin(std::ifstream& inFile)
+void FeatureStats::loadbin(istream* is)
{
- inFile.read((char*) array_, GetArraySizeWithBytes());
+ is->read(reinterpret_cast<char*>(m_array),
+ static_cast<streamsize>(GetArraySizeWithBytes()));
}
-void FeatureStats::loadtxt(std::ifstream& inFile, const SparseVector& sparseWeights)
+void FeatureStats::loadtxt(istream* is, const SparseVector& sparseWeights)
{
- std::string theString;
- std::getline(inFile, theString);
- set(theString, sparseWeights);
+ string line;
+ getline(*is, line);
+ set(line, sparseWeights);
}
-
-void FeatureStats::savetxt(const std::string &file)
+void FeatureStats::savetxt(const string &file)
{
-// TRACE_ERR("saving the stats into " << file << std::endl);
-
- std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
- savetxt(outFile);
+ ofstream ofs(file.c_str(), ios::out);
+ ostream* os = &ofs;
+ savetxt(os);
}
-
-void FeatureStats::savetxt(std::ofstream& outFile)
+void FeatureStats::savetxt(ostream* os)
{
-// TRACE_ERR("saving the stats" << std::endl);
- outFile << *this;
+ *os << *this;
}
-void FeatureStats::savebin(std::ofstream& outFile)
+void FeatureStats::savetxt() {
+ savetxt(&cout);
+}
+
+void FeatureStats::savebin(ostream* os)
{
- outFile.write((char*) array_, GetArraySizeWithBytes());
+ os->write(reinterpret_cast<char*>(m_array),
+ static_cast<streamsize>(GetArraySizeWithBytes()));
}
ostream& operator<<(ostream& o, const FeatureStats& e)
@@ -251,3 +254,19 @@ ostream& operator<<(ostream& o, const FeatureStats& e)
return o;
}
+
+//ADEED_BY_TS
+bool operator==(const FeatureStats& f1, const FeatureStats& f2) {
+ size_t size = f1.size();
+
+ if (size != f2.size())
+ return false;
+
+ for (size_t k=0; k < size; k++) {
+ if (f1.get(k) != f2.get(k))
+ return false;
+ }
+
+ return true;
+}
+//END_ADDED