Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorBarry Haddow <barry.haddow@gmail.com>2015-04-03 17:46:59 +0300
committerBarry Haddow <barry.haddow@gmail.com>2015-04-03 17:46:59 +0300
commit217f389230fe4e2114a82aedf43d471b1baaa111 (patch)
tree29b0ffcd3b0b26767e0291db9f72db9c65021dcc /mert
parent6626d59cfceb8f8aa980c731ae62b9a375cefa2e (diff)
Attempt at fixing sparse features for hgmira
Diffstat (limited to 'mert')
-rw-r--r--mert/HopeFearDecoder.cpp6
-rw-r--r--mert/Jamfile1
-rw-r--r--mert/MiraFeatureVectorTest.cpp49
-rw-r--r--mert/MiraWeightVector.cpp20
-rw-r--r--mert/MiraWeightVector.h6
5 files changed, 72 insertions, 10 deletions
diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
index 3e62d8171..5288116d6 100644
--- a/mert/HopeFearDecoder.cpp
+++ b/mert/HopeFearDecoder.cpp
@@ -180,7 +180,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
references_.Load(referenceFiles, vocab_);
SparseVector weights;
- wv.ToSparse(&weights);
+ wv.ToSparse(&weights,num_dense_);
scorer_ = scorer;
static const string kWeights = "weights";
@@ -243,7 +243,7 @@ void HypergraphHopeFearDecoder::HopeFear(
{
size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
- wv.ToSparse(&weights);
+ wv.ToSparse(&weights, num_dense_);
const Graph& graph = *(graphs_[sentenceId]);
// ValType hope_scale = 1.0;
@@ -338,7 +338,7 @@ void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValTy
HgHypothesis bestHypo;
size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
- wv.ToSparse(&weights);
+ wv.ToSparse(&weights, num_dense_);
vector<ValType> bg(scorer_->NumberOfScores());
//cerr << "Calculating bleu on " << sentenceId << endl;
Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo);
diff --git a/mert/Jamfile b/mert/Jamfile
index 4dd2fb540..aff2c78be 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -77,6 +77,7 @@ unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_f
unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test forest_rescore_test : ForestRescoreTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test hypergraph_test : HypergraphTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test mira_feature_vector_test : MiraFeatureVectorTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test point_test : PointTest.cpp mert_lib ..//boost_unit_test_framework ;
diff --git a/mert/MiraFeatureVectorTest.cpp b/mert/MiraFeatureVectorTest.cpp
new file mode 100644
index 000000000..d64ba79a5
--- /dev/null
+++ b/mert/MiraFeatureVectorTest.cpp
@@ -0,0 +1,49 @@
+#include "MiraFeatureVector.h"
+#include "MiraWeightVector.h"
+
+#define BOOST_TEST_MODULE MiraFeatureVector
+#include <boost/test/unit_test.hpp>
+
+using namespace MosesTuning;
+
+/* Note that the conversion to and from SparseVector needs to know
+how many of the features are really "dense". This is because in hg mira
+all features (sparse and dense) are to get rolled in to SparseVector
+*/
+
+BOOST_AUTO_TEST_CASE(from_sparse) {
+ SparseVector sp;
+ sp.set("dense0", 0.2);
+ sp.set("dense1", 0.3);
+ sp.set("sparse0", 0.7);
+ sp.set("sparse1", 0.9);
+ sp.set("sparse2", 0.1);
+
+ MiraFeatureVector mfv(sp,2);
+ BOOST_CHECK_EQUAL(mfv.size(),5);
+
+ BOOST_CHECK_EQUAL(mfv.feat(0),0);
+ BOOST_CHECK_EQUAL(mfv.feat(1),1);
+ BOOST_CHECK_EQUAL(mfv.feat(2),4);
+ BOOST_CHECK_EQUAL(mfv.feat(3),5);
+ BOOST_CHECK_EQUAL(mfv.feat(4),6);
+
+ BOOST_CHECK_CLOSE(mfv.val(0), 0.2,1e-5);
+ BOOST_CHECK_CLOSE(mfv.val(1), 0.3,1e-5);
+ BOOST_CHECK_CLOSE(mfv.val(2), 0.7,1e-5);
+ BOOST_CHECK_CLOSE(mfv.val(3), 0.9,1e-5);
+ BOOST_CHECK_CLOSE(mfv.val(4), 0.1,1e-5);
+
+ MiraWeightVector mwv;
+ mwv.update(mfv,1.0);
+ SparseVector sp2;
+ mwv.ToSparse(&sp2,2);
+
+ //check we get back what we started with
+ BOOST_CHECK_CLOSE(sp2.get("dense0"), 0.2,1e-5);
+ BOOST_CHECK_CLOSE(sp2.get("dense1"), 0.3,1e-5);
+ BOOST_CHECK_CLOSE(sp2.get("sparse0"), 0.7,1e-5);
+ BOOST_CHECK_CLOSE(sp2.get("sparse1"), 0.9,1e-5);
+ BOOST_CHECK_CLOSE(sp2.get("sparse2"), 0.1,1e-5);
+
+}
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index eba9617c8..367305c48 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -93,11 +93,17 @@ void MiraWeightVector::update(size_t index, ValType delta)
m_lastUpdated[index] = m_numUpdates;
}
-void MiraWeightVector::ToSparse(SparseVector* sparse) const
+void MiraWeightVector::ToSparse(SparseVector* sparse, size_t denseSize) const
{
for (size_t i = 0; i < m_weights.size(); ++i) {
if(abs(m_weights[i])>1e-8) {
- sparse->set(i,m_weights[i]);
+ if (i < denseSize) {
+ sparse->set(i,m_weights[i]);
+ } else {
+ //The ids in MiraFeatureVector/MiraWeightVector for sparse features
+ //need to be translated when converting back to SparseVector.
+ sparse->set(i-denseSize, m_weights[i]);
+ }
}
}
}
@@ -172,12 +178,18 @@ size_t AvgWeightVector::size() const
return m_wv.m_weights.size();
}
-void AvgWeightVector::ToSparse(SparseVector* sparse) const
+void AvgWeightVector::ToSparse(SparseVector* sparse, size_t denseSize) const
{
for (size_t i = 0; i < size(); ++i) {
ValType w = weight(i);
if(abs(w)>1e-8) {
- sparse->set(i,w);
+ if (i < denseSize) {
+ sparse->set(i,w);
+ } else {
+ //The ids in MiraFeatureVector/MiraWeightVector for sparse features
+ //need to be translated when converting back to SparseVector.
+ sparse->set(i-denseSize, w);
+ }
}
}
}
diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h
index bbc28704b..8200d6013 100644
--- a/mert/MiraWeightVector.h
+++ b/mert/MiraWeightVector.h
@@ -64,9 +64,9 @@ public:
AvgWeightVector avg();
/**
- * Convert to sparse vector, interpreting all features as sparse.
+ * Convert to sparse vector, interpreting all features as sparse. Only used by hgmira.
**/
- void ToSparse(SparseVector* sparse) const;
+ void ToSparse(SparseVector* sparse, size_t denseSize) const;
friend class AvgWeightVector;
@@ -104,7 +104,7 @@ public:
ValType score(const MiraFeatureVector& fv) const;
ValType weight(std::size_t index) const;
std::size_t size() const;
- void ToSparse(SparseVector* sparse) const;
+ void ToSparse(SparseVector* sparse, size_t num_dense) const;
private:
const MiraWeightVector& m_wv;
};