Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorColin Cherry <Colin.Cherry@nrc-cnrc.gc.ca>2012-06-26 19:40:16 +0400
committerColin Cherry <Colin.Cherry@nrc-cnrc.gc.ca>2012-06-27 00:29:20 +0400
commit32299593fa4699bcc04c0b461ed935b207ab31ad (patch)
treebaf24788cd7051928ebf598bb9d6b7b198d0d138 /mert
parent7cd83567523aba0bea371e32ae4d3282f3bbf34d (diff)
Added debugging info to kbmira.
Diffstat (limited to 'mert')
-rw-r--r--mert/HypPackEnumerator.cpp8
-rw-r--r--mert/HypPackEnumerator.h3
-rw-r--r--mert/MiraFeatureVector.cpp10
-rw-r--r--mert/MiraFeatureVector.h3
-rw-r--r--mert/MiraWeightVector.cpp13
-rw-r--r--mert/MiraWeightVector.h3
-rw-r--r--mert/kbmira.cpp14
7 files changed, 52 insertions, 2 deletions
diff --git a/mert/HypPackEnumerator.cpp b/mert/HypPackEnumerator.cpp
index ffbf3cfb5..808ba8335 100644
--- a/mert/HypPackEnumerator.cpp
+++ b/mert/HypPackEnumerator.cpp
@@ -132,6 +132,10 @@ const ScoreDataItem& StreamingHypPackEnumerator::scoresAt(size_t index) {
return m_scoreDataIters[pij.first]->operator[](pij.second);
}
+size_t StreamingHypPackEnumerator::cur_id() {
+ return m_sentenceId;
+}
+
/* --------- RandomAccessHypPackEnumerator ------------- */
RandomAccessHypPackEnumerator::RandomAccessHypPackEnumerator(vector<string> const& featureFiles,
@@ -181,7 +185,9 @@ const ScoreDataItem& RandomAccessHypPackEnumerator::scoresAt(size_t i) {
return m_scores[m_indexes[m_cur_index]][i];
}
-
+size_t RandomAccessHypPackEnumerator::cur_id() {
+ return m_indexes[m_cur_index];
+}
// --Emacs trickery--
// Local Variables:
// mode:c++
diff --git a/mert/HypPackEnumerator.h b/mert/HypPackEnumerator.h
index d878c2625..420cd740a 100644
--- a/mert/HypPackEnumerator.h
+++ b/mert/HypPackEnumerator.h
@@ -27,6 +27,7 @@ public:
virtual bool finished() = 0;
virtual void next() = 0;
+ virtual std::size_t cur_id() = 0;
virtual std::size_t cur_size() = 0;
virtual std::size_t num_dense() const = 0;
virtual const FeatureDataItem& featuresAt(std::size_t i) = 0;
@@ -46,6 +47,7 @@ public:
virtual bool finished();
virtual void next();
+ virtual std::size_t cur_id();
virtual std::size_t cur_size();
virtual const FeatureDataItem& featuresAt(std::size_t i);
virtual const ScoreDataItem& scoresAt(std::size_t i);
@@ -79,6 +81,7 @@ public:
virtual bool finished();
virtual void next();
+ virtual std::size_t cur_id();
virtual std::size_t cur_size();
virtual const FeatureDataItem& featuresAt(std::size_t i);
virtual const ScoreDataItem& scoresAt(std::size_t i);
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
index a1a5a5bdb..6233d0ba9 100644
--- a/mert/MiraFeatureVector.cpp
+++ b/mert/MiraFeatureVector.cpp
@@ -1,4 +1,5 @@
#include <cmath>
+#include <iomanip>
#include "MiraFeatureVector.h"
@@ -139,6 +140,15 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
return MiraFeatureVector(dense,sparseFeats,sparseVals);
}
+ostream& operator<<(ostream& o, const MiraFeatureVector& e)
+{
+ for(size_t i=0;i<e.size();i++) {
+ if(i>0) o << " ";
+ o << e.feat(i) << ":" << e.val(i);
+ }
+ return o;
+}
+
// --Emacs trickery--
// Local Variables:
// mode:c++
diff --git a/mert/MiraFeatureVector.h b/mert/MiraFeatureVector.h
index 31dd025c3..89c7fc126 100644
--- a/mert/MiraFeatureVector.h
+++ b/mert/MiraFeatureVector.h
@@ -13,6 +13,7 @@
#define MERT_MIRA_FEATURE_VECTOR_H
#include <vector>
+#include <iostream>
#include "FeatureDataIterator.h"
@@ -34,6 +35,8 @@ public:
friend MiraFeatureVector operator-(const MiraFeatureVector& a,
const MiraFeatureVector& b);
+ friend std::ostream& operator<<(std::ostream& o, const MiraFeatureVector& e);
+
private:
std::vector<ValType> m_dense;
std::vector<std::size_t> m_sparseFeats;
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index 7e17a2714..092c5a44b 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -1,5 +1,7 @@
#include "MiraWeightVector.h"
+#include <cmath>
+
using namespace std;
/**
@@ -113,6 +115,17 @@ AvgWeightVector::AvgWeightVector(const MiraWeightVector& wv)
:m_wv(wv)
{}
+ostream& operator<<(ostream& o, const MiraWeightVector& e)
+{
+ for(size_t i=0;i<e.m_weights.size();i++) {
+ if(abs(e.m_weights[i])>1e-8) {
+ if(i>0) o << " ";
+ cerr << i << ":" << e.m_weights[i];
+ }
+ }
+ return o;
+}
+
ValType AvgWeightVector::weight(size_t index) const
{
if(m_wv.m_numUpdates==0) return m_wv.weight(index);
diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h
index 65b374625..20d7e0314 100644
--- a/mert/MiraWeightVector.h
+++ b/mert/MiraWeightVector.h
@@ -11,6 +11,7 @@
#define MERT_MIRA_WEIGHT_VECTOR_H
#include <vector>
+#include <iostream>
#include "MiraFeatureVector.h"
@@ -59,6 +60,8 @@ public:
friend class AvgWeightVector;
+ friend std::ostream& operator<<(std::ostream& o, const MiraWeightVector& e);
+
private:
/**
* Updates a weight and lazily updates its total
diff --git a/mert/kbmira.cpp b/mert/kbmira.cpp
index 0723e9975..80a797860 100644
--- a/mert/kbmira.cpp
+++ b/mert/kbmira.cpp
@@ -84,6 +84,7 @@ int main(int argc, char** argv)
bool streaming = false; // Stream all k-best lists?
bool no_shuffle = false; // Don't shuffle, even for in memory version
bool model_bg = false; // Use model for background corpus
+ bool verbose = false; // Verbose updates
// Command-line processing follows pro.cpp
po::options_description desc("Allowed options");
@@ -100,7 +101,8 @@ int main(int argc, char** argv)
("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
- ("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background");
+ ("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background")
+ ("verbose", po::value(&verbose)->zero_tokens()->default_value(false), "Verbose updates")
;
po::options_description cmdline_options;
@@ -115,6 +117,8 @@ int main(int argc, char** argv)
exit(0);
}
+ cerr << "kbmira with c=" << c << " decay=" << decay << " no_shuffle=" << no_shuffle << endl;
+
if (vm.count("random-seed")) {
cerr << "Initialising random seed to " << seed << endl;
srand(seed);
@@ -233,6 +237,14 @@ int main(int argc, char** argv)
// Loss and update
ValType diff_score = wv.score(diff);
ValType loss = delta - diff_score;
+ if(verbose) {
+ cerr << "Updating sent " << train->cur_id() << endl;
+ cerr << "Wght: " << wv << endl;
+ cerr << "Hope: " << hope << " => " << hopeBleu << " <> " << wv.score(hope) << endl;
+ cerr << "Fear: " << fear << " => " << fearBleu << " <> " << wv.score(fear) << endl;
+ cerr << "Diff: " << diff << " => " << delta << " <> " << diff_score << endl;
+ cerr << endl;
+ }
if(loss > 0) {
ValType eta = min(c, loss / diff.sqrNorm());
wv.update(diff,eta);