Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mert/BleuScorer.cpp4
-rw-r--r--mert/BleuScorer.h2
-rw-r--r--mert/FeatureStats.cpp30
-rw-r--r--mert/FeatureStats.h7
-rw-r--r--mert/ForestRescore.cpp422
-rw-r--r--mert/ForestRescore.h120
-rw-r--r--mert/ForestRescoreTest.cpp246
-rw-r--r--mert/HopeFearDecoder.cpp328
-rw-r--r--mert/HopeFearDecoder.h151
-rw-r--r--mert/Hypergraph.cpp286
-rw-r--r--mert/Hypergraph.h251
-rw-r--r--mert/HypergraphTest.cpp151
-rw-r--r--mert/Jamfile7
-rw-r--r--mert/MiraFeatureVector.cpp40
-rw-r--r--mert/MiraFeatureVector.h8
-rw-r--r--mert/MiraWeightVector.cpp17
-rw-r--r--mert/MiraWeightVector.h8
-rw-r--r--mert/kbmira.cpp187
18 files changed, 2154 insertions, 111 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 467855d9b..f6ada2aa8 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -266,12 +266,12 @@ float smoothedSentenceBleu
float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg)
{
// Sum sent and background
- std::vector<float> stats;
UTIL_THROW_IF(sent.size()!=bg.size(), util::Exception, "Error");
UTIL_THROW_IF(sent.size() != kBleuNgramOrder * 2 + 1, util::Exception, "Error");
+ std::vector<float> stats(sent.size());
for(size_t i=0; i<sent.size(); i++)
- stats.push_back(sent[i]+bg[i]);
+ stats[i] = sent[i]+bg[i];
// Calculate BLEU
float logbleu = 0.0;
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 8be567574..affa37fbf 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -13,7 +13,7 @@
namespace MosesTuning
{
-const int kBleuNgramOrder = 4;
+const size_t kBleuNgramOrder = 4;
class NgramCounts;
class Reference;
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 5a12be70a..a0c6a6ebc 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -14,6 +14,8 @@
#include <boost/functional/hash.hpp>
+#include "util/murmur_hash.hh"
+
#include "Util.h"
using namespace std;
@@ -59,6 +61,11 @@ void SparseVector::set(const string& name, FeatureStatsType value)
m_fvector[id] = value;
}
+void SparseVector::set(size_t id, FeatureStatsType value) {
+ assert(m_id_to_name.size() > id);
+ m_fvector[id] = value;
+}
+
void SparseVector::write(ostream& out, const string& sep) const
{
for (fvector_t::const_iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
@@ -91,6 +98,16 @@ void SparseVector::load(const string& file)
}
}
+SparseVector& SparseVector::operator+=(const SparseVector& rhs)
+{
+
+ for (fvector_t::const_iterator i = rhs.m_fvector.begin();
+ i != rhs.m_fvector.end(); ++i) {
+ m_fvector[i->first] = get(i->first) + (i->second);
+ }
+ return *this;
+}
+
SparseVector& SparseVector::operator-=(const SparseVector& rhs)
{
@@ -162,12 +179,18 @@ bool operator==(SparseVector const& item1, SparseVector const& item2)
return item1.m_fvector==item2.m_fvector;
}
+
std::size_t hash_value(SparseVector const& item)
{
- boost::hash<SparseVector::fvector_t> hasher;
- return hasher(item.m_fvector);
+ size_t seed = 0;
+ for (SparseVector::fvector_t::const_iterator i = item.m_fvector.begin(); i != item.m_fvector.end(); ++i) {
+ seed = util::MurmurHashNative(&(i->first), sizeof(i->first), seed);
+ seed = util::MurmurHashNative(&(i->second), sizeof(i->second), seed);
+ }
+ return seed;
}
+
FeatureStats::FeatureStats()
: m_available_size(kAvailableSize), m_entries(0),
m_array(new FeatureStatsType[m_available_size]) {}
@@ -181,8 +204,7 @@ FeatureStats::FeatureStats(const size_t size)
FeatureStats::~FeatureStats()
{
- delete [] m_array;
- m_array = NULL;
+ delete [] m_array;
}
void FeatureStats::Copy(const FeatureStats &stats)
diff --git a/mert/FeatureStats.h b/mert/FeatureStats.h
index a882e7358..2ccbac50c 100644
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@@ -14,6 +14,11 @@
#include <map>
#include <string>
#include <vector>
+
+#include <boost/unordered_map.hpp>
+
+#include <util/string_piece.hh>
+
#include "Types.h"
namespace MosesTuning
@@ -31,6 +36,7 @@ public:
FeatureStatsType get(const std::string& name) const;
FeatureStatsType get(std::size_t id) const;
void set(const std::string& name, FeatureStatsType value);
+ void set(size_t id, FeatureStatsType value);
void clear();
void load(const std::string& file);
std::size_t size() const {
@@ -40,6 +46,7 @@ public:
void write(std::ostream& out, const std::string& sep = " ") const;
SparseVector& operator-=(const SparseVector& rhs);
+ SparseVector& operator+=(const SparseVector& rhs);
FeatureStatsType inner_product(const SparseVector& rhs) const;
// Added by cherryc
diff --git a/mert/ForestRescore.cpp b/mert/ForestRescore.cpp
new file mode 100644
index 000000000..c88b58e4c
--- /dev/null
+++ b/mert/ForestRescore.cpp
@@ -0,0 +1,422 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cmath>
+#include <limits>
+#include <list>
+
+#include <boost/unordered_set.hpp>
+
+#include "util/file_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "BleuScorer.h"
+#include "ForestRescore.h"
+
+using namespace std;
+
+namespace MosesTuning {
+
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
+ out << "[";
+ for (size_t i = 0; i < wordVec.size(); ++i) {
+ out << wordVec[i]->first;
+ if (i+1< wordVec.size()) out << " ";
+ }
+ out << "]";
+ return out;
+}
+
+
+void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
+ for (size_t i = 0; i < files.size(); ++i) {
+ util::FilePiece fh(files[i].c_str());
+ size_t sentenceId = 0;
+ while(true) {
+ StringPiece line;
+ try {
+ line = fh.ReadLine();
+ } catch (util::EndOfFileException &e) {
+ break;
+ }
+ AddLine(sentenceId, line, vocab);
+ ++sentenceId;
+ }
+ }
+
+}
+
+void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab) {
+ //cerr << line << endl;
+ NgramCounter ngramCounts;
+ list<WordVec> openNgrams;
+ size_t length = 0;
+ //tokenize & count
+ for (util::TokenIter<util::SingleCharacter, true> j(line, util::SingleCharacter(' ')); j; ++j) {
+ const Vocab::Entry* nextTok = &(vocab.FindOrAdd(*j));
+ ++length;
+ openNgrams.push_front(WordVec());
+ for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
+ k->push_back(nextTok);
+ ++ngramCounts[*k];
+ }
+ if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
+ }
+
+ //merge into overall ngram map
+ for (NgramCounter::const_iterator ni = ngramCounts.begin();
+ ni != ngramCounts.end(); ++ni) {
+ size_t count = ni->second;
+ //cerr << *ni << " " << count << endl;
+ if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
+ NgramMap::iterator totalsIter = ngramCounts_[sentenceId].find(ni->first);
+ if (totalsIter == ngramCounts_[sentenceId].end()) {
+ ngramCounts_[sentenceId][ni->first] = pair<size_t,size_t>(count,count);
+ } else {
+ ngramCounts_[sentenceId][ni->first].first = max(count, ngramCounts_[sentenceId][ni->first].first); //clip
+ ngramCounts_[sentenceId][ni->first].second += count; //no clip
+ }
+ }
+ //length
+ if (lengths_.size() <= sentenceId) lengths_.resize(sentenceId+1);
+ //TODO - length strategy - this is MIN
+ if (!lengths_[sentenceId]) {
+ lengths_[sentenceId] = length;
+ } else {
+ lengths_[sentenceId] = min(length,lengths_[sentenceId]);
+ }
+ //cerr << endl;
+
+}
+
+size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const {
+ const NgramMap& ngramCounts = ngramCounts_.at(sentenceId);
+ NgramMap::const_iterator ngi = ngramCounts.find(ngram);
+ if (ngi == ngramCounts.end()) return 0;
+ return clip ? ngi->second.first : ngi->second.second;
+}
+
+VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {}
+
+void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const {
+ for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
+ //cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) << endl;
+ size_t order = ngi->first.size();
+ size_t count = ngi->second;
+ bleuStats[(order-1)*2 + 1] += count;
+ bleuStats[(order-1) * 2] += min(count, references_.NgramMatches(sentenceId_,ngi->first,false));
+ }
+}
+
+size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
+ size_t targetLength = 0;
+ for (size_t i = 0; i < edge.Words().size(); ++i) {
+ const Vocab::Entry* word = edge.Words()[i];
+ if (word) ++targetLength;
+ }
+ for (size_t i = 0; i < edge.Children().size(); ++i) {
+ const VertexState& state = vertexStates_[edge.Children()[i]];
+ targetLength += state.targetLength;
+ }
+ return targetLength;
+}
+
+FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) {
+ NgramCounter ngramCounts;
+ size_t childId = 0;
+ size_t wordId = 0;
+ size_t contextId = 0; //position within left or right context
+ const VertexState* vertexState = NULL;
+ bool inLeftContext = false;
+ bool inRightContext = false;
+ list<WordVec> openNgrams;
+ const Vocab::Entry* currentWord = NULL;
+ while (wordId < edge.Words().size()) {
+ currentWord = edge.Words()[wordId];
+ if (currentWord != NULL) {
+ ++wordId;
+ } else {
+ if (!inLeftContext && !inRightContext) {
+ //entering a vertex
+ assert(!vertexState);
+ vertexState = &(vertexStates_[edge.Children()[childId]]);
+ ++childId;
+ if (vertexState->leftContext.size()) {
+ inLeftContext = true;
+ contextId = 0;
+ currentWord = vertexState->leftContext[contextId];
+ } else {
+ //empty context
+ vertexState = NULL;
+ ++wordId;
+ continue;
+ }
+ } else {
+ //already in a vertex
+ ++contextId;
+ if (inLeftContext && contextId < vertexState->leftContext.size()) {
+ //still in left context
+ currentWord = vertexState->leftContext[contextId];
+ } else if (inLeftContext) {
+ //at end of left context
+ if (vertexState->leftContext.size() == kBleuNgramOrder-1) {
+ //full size context, jump to right state
+ openNgrams.clear();
+ inLeftContext = false;
+ inRightContext = true;
+ contextId = 0;
+ currentWord = vertexState->rightContext[contextId];
+ } else {
+ //short context, just ignore right context
+ inLeftContext = false;
+ vertexState = NULL;
+ ++wordId;
+ continue;
+ }
+ } else {
+ //in right context
+ if (contextId < vertexState->rightContext.size()) {
+ currentWord = vertexState->rightContext[contextId];
+ } else {
+ //leaving vertex
+ inRightContext = false;
+ vertexState = NULL;
+ ++wordId;
+ continue;
+ }
+ }
+ }
+ }
+ assert(currentWord);
+ if (graph_.IsBoundary(currentWord)) continue;
+ openNgrams.push_front(WordVec());
+ openNgrams.front().reserve(kBleuNgramOrder);
+ for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
+ k->push_back(currentWord);
+ //Only insert ngrams that cross boundaries
+ if (!vertexState || (inLeftContext && k->size() > contextId+1)) ++ngramCounts[*k];
+ }
+ if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
+ }
+
+ //Collect matches
+ //This edge
+ //cerr << "edge ngrams" << endl;
+ UpdateMatches(ngramCounts, bleuStats);
+
+ //Child vertexes
+ for (size_t i = 0; i < edge.Children().size(); ++i) {
+ //cerr << "vertex ngrams " << edge.Children()[i] << endl;
+ for (size_t j = 0; j < bleuStats.size(); ++j) {
+ bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
+ }
+ }
+
+
+ FeatureStatsType sourceLength = head.SourceCovered();
+ size_t referenceLength = references_.Length(sentenceId_);
+ FeatureStatsType effectiveReferenceLength =
+ sourceLength / totalSourceLength_ * referenceLength;
+
+ bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
+ //backgroundBleu_[backgroundBleu_.size()-1] =
+ // backgroundRefLength_ * sourceLength / totalSourceLength_;
+ FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);
+
+ return bleu;
+}
+
+void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) {
+ //TODO: Maybe more efficient to absorb into the Score() method
+ VertexState& vertexState = vertexStates_[vertexId];
+ //cerr << "Updating state for " << vertexId << endl;
+
+ //leftContext
+ int wi = 0;
+ const VertexState* childState = NULL;
+ int contexti = 0; //index within child context
+ int childi = 0;
+ while (vertexState.leftContext.size() < (kBleuNgramOrder-1)) {
+ if ((size_t)wi >= winnerEdge.Words().size()) break;
+ const Vocab::Entry* word = winnerEdge.Words()[wi];
+ if (word != NULL) {
+ vertexState.leftContext.push_back(word);
+ ++wi;
+ } else {
+ if (childState == NULL) {
+ //start of child state
+ childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
+ contexti = 0;
+ }
+ if ((size_t)contexti < childState->leftContext.size()) {
+ vertexState.leftContext.push_back(childState->leftContext[contexti++]);
+ } else {
+ //end of child context
+ childState = NULL;
+ ++wi;
+ }
+ }
+ }
+
+ //rightContext
+ wi = winnerEdge.Words().size() - 1;
+ childState = NULL;
+ childi = winnerEdge.Children().size() - 1;
+ while (vertexState.rightContext.size() < (kBleuNgramOrder-1)) {
+ if (wi < 0) break;
+ const Vocab::Entry* word = winnerEdge.Words()[wi];
+ if (word != NULL) {
+ vertexState.rightContext.push_back(word);
+ --wi;
+ } else {
+ if (childState == NULL) {
+ //start (ie rhs) of child state
+ childState = &(vertexStates_[winnerEdge.Children()[childi--]]);
+ contexti = childState->rightContext.size()-1;
+ }
+ if (contexti >= 0) {
+ vertexState.rightContext.push_back(childState->rightContext[contexti--]);
+ } else {
+ //end (ie lhs) of child context
+ childState = NULL;
+ --wi;
+ }
+ }
+ }
+ reverse(vertexState.rightContext.begin(), vertexState.rightContext.end());
+
+ //length + counts
+ vertexState.targetLength = GetTargetLength(winnerEdge);
+ vertexState.bleuStats = bleuStats;
+}
+
+
+typedef pair<const Edge*,FeatureStatsType> BackPointer;
+
+
+/**
+ * Recurse through back pointers
+ **/
+static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
+ HgHypothesis* bestHypo) {
+ //cerr << "Expanding " << vertexId << endl;
+ //UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
+ if (!bps[vertexId].first) return;
+ const Edge* prevEdge = bps[vertexId].first;
+ bestHypo->featureVector += *(prevEdge->Features().get());
+ size_t childId = 0;
+ for (size_t i = 0; i < prevEdge->Words().size(); ++i) {
+ if (prevEdge->Words()[i] != NULL) {
+ bestHypo->text.push_back(prevEdge->Words()[i]);
+ } else {
+ size_t childVertexId = prevEdge->Children()[childId++];
+ HgHypothesis childHypo;
+ GetBestHypothesis(childVertexId,graph,bps,&childHypo);
+ bestHypo->text.insert(bestHypo->text.end(), childHypo.text.begin(), childHypo.text.end());
+ bestHypo->featureVector += childHypo.featureVector;
+ }
+ }
+}
+
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
+{
+ BackPointer init(NULL,kMinScore);
+ vector<BackPointer> backPointers(graph.VertexSize(),init);
+ HgBleuScorer bleuScorer(references, graph, sentenceId, backgroundBleu);
+ vector<FeatureStatsType> winnerStats(kBleuNgramOrder*2+1);
+ for (size_t vi = 0; vi < graph.VertexSize(); ++vi) {
+ //cerr << "vertex id " << vi << endl;
+ FeatureStatsType winnerScore = kMinScore;
+ const Vertex& vertex = graph.GetVertex(vi);
+ const vector<const Edge*>& incoming = vertex.GetIncoming();
+ if (!incoming.size()) {
+ //UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
+ //If no incoming edges, vertex is a dead end
+ backPointers[vi].first = NULL;
+ backPointers[vi].second = kMinScore/2;
+ } else {
+ //cerr << "\nVertex: " << vi << endl;
+ for (size_t ei = 0; ei < incoming.size(); ++ei) {
+ //cerr << "edge id " << ei << endl;
+ FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
+ for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
+ size_t childId = incoming[ei]->Children()[i];
+ UTIL_THROW_IF(backPointers[childId].second == kMinScore,
+ HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+ incomingScore += backPointers[childId].second;
+ }
+ vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
+ // cerr << "Score: " << incomingScore << " Bleu: ";
+ // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
+ FeatureStatsType totalScore = incomingScore;
+ if (bleuWeight) {
+ FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
+ UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
+ totalScore += bleuWeight * bleuScore;
+ // cerr << bleuScore << " Total: " << incomingScore << endl << endl;
+ //cerr << "is " << incomingScore << " bs " << bleuScore << endl;
+ }
+ if (totalScore >= winnerScore) {
+ //We only store the feature score (not the bleu score) with the vertex,
+ //since the bleu score is always cumulative, ie from counts for the whole span.
+ winnerScore = totalScore;
+ backPointers[vi].first = incoming[ei];
+ backPointers[vi].second = incomingScore;
+ winnerStats = bleuStats;
+ }
+ }
+ //update with winner
+ //if (bleuWeight) {
+ //TODO: Not sure if we need this when computing max-model solution
+ bleuScorer.UpdateState(*(backPointers[vi].first), vi, winnerStats);
+
+ }
+ }
+
+ //expand back pointers
+ GetBestHypothesis(graph.VertexSize()-1, graph, backPointers, bestHypo);
+
+ //bleu stats and fv
+
+ //Need the actual (clipped) stats
+ //TODO: This repeats code in bleu scorer - factor out
+ bestHypo->bleuStats.resize(kBleuNgramOrder*2+1);
+ NgramCounter counts;
+ list<WordVec> openNgrams;
+ for (size_t i = 0; i < bestHypo->text.size(); ++i) {
+ const Vocab::Entry* entry = bestHypo->text[i];
+ if (graph.IsBoundary(entry)) continue;
+ openNgrams.push_front(WordVec());
+ for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
+ k->push_back(entry);
+ ++counts[*k];
+ }
+ if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
+ }
+ for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
+ size_t order = ngi->first.size();
+ size_t count = ngi->second;
+ bestHypo->bleuStats[(order-1)*2 + 1] += count;
+ bestHypo->bleuStats[(order-1) * 2] += min(count, references.NgramMatches(sentenceId,ngi->first,true));
+ }
+ bestHypo->bleuStats[kBleuNgramOrder*2] = references.Length(sentenceId);
+}
+
+
+};
diff --git a/mert/ForestRescore.h b/mert/ForestRescore.h
new file mode 100644
index 000000000..900275b74
--- /dev/null
+++ b/mert/ForestRescore.h
@@ -0,0 +1,120 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+#ifndef MERT_FOREST_RESCORE_H
+#define MERT_FOREST_RESCORE_H
+
+#include <valarray>
+#include <vector>
+
+#include <boost/unordered_set.hpp>
+
+#include "BleuScorer.h"
+#include "Hypergraph.h"
+
+namespace MosesTuning {
+
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
+
+struct NgramHash : public std::unary_function<const WordVec&, std::size_t> {
+ std::size_t operator()(const WordVec& ngram) const {
+ return util::MurmurHashNative(&(ngram[0]), ngram.size() * sizeof(WordVec::value_type));
+ }
+};
+
+struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&, bool> {
+ bool operator()(const WordVec& first, const WordVec& second) const {
+ if (first.size() != second.size()) return false;
+ return memcmp(&(first[0]), &(second[0]), first.size() * sizeof(WordVec::value_type)) == 0;
+ }
+};
+
+typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
+
+
+class ReferenceSet {
+
+
+public:
+
+ void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
+
+ void Load(const std::vector<std::string>& files, Vocab& vocab);
+
+ size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
+
+ size_t Length(size_t sentenceId) const {return lengths_[sentenceId];}
+
+private:
+ //ngrams to (clipped,unclipped) counts
+ typedef boost::unordered_map<WordVec, std::pair<std::size_t,std::size_t>, NgramHash,NgramEquals> NgramMap;
+ std::vector<NgramMap> ngramCounts_;
+ std::vector<size_t> lengths_;
+
+};
+
+struct VertexState {
+ VertexState();
+
+ std::vector<FeatureStatsType> bleuStats;
+ WordVec leftContext;
+ WordVec rightContext;
+ size_t targetLength;
+};
+
+/**
+ * Used to score an rule (ie edge) when we are applying it.
+**/
+class HgBleuScorer {
+ public:
+ HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
+ references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
+ backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
+ vertexStates_.resize(graph.VertexSize());
+ totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
+ }
+
+ FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
+
+ void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
+
+
+ private:
+ const ReferenceSet& references_;
+ std::vector<VertexState> vertexStates_;
+ size_t sentenceId_;
+ size_t totalSourceLength_;
+ const Graph& graph_;
+ std::vector<FeatureStatsType> backgroundBleu_;
+ FeatureStatsType backgroundRefLength_;
+
+ void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
+ size_t GetTargetLength(const Edge& edge) const;
+};
+
+struct HgHypothesis {
+ SparseVector featureVector;
+ WordVec text;
+ std::vector<FeatureStatsType> bleuStats;
+};
+
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo);
+
+};
+
+#endif
diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp
new file mode 100644
index 000000000..86975d3a5
--- /dev/null
+++ b/mert/ForestRescoreTest.cpp
@@ -0,0 +1,246 @@
+#include <iostream>
+
+#include "ForestRescore.h"
+
+#define BOOST_TEST_MODULE MertForestRescore
+#include <boost/test/unit_test.hpp>
+
+
+
+using namespace std;
+using namespace MosesTuning;
+
+BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
+{
+ Vocab vocab;
+ WordVec words;
+ string wordStrings[] =
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
+ for (size_t i = 0; i < 9; ++i) {
+ words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+ }
+
+ const string f1 = "foo";
+ const string f2 = "bar";
+ Graph graph(vocab);
+ graph.SetCounts(5,5);
+
+ Edge* e0 = graph.NewEdge();
+ e0->AddWord(words[0]);
+ e0->AddFeature(f1, 2.0);
+
+ Vertex* v0 = graph.NewVertex();
+ v0->AddEdge(e0);
+
+ Edge* e1 = graph.NewEdge();
+ e1->AddWord(NULL);
+ e1->AddChild(0);
+ e1->AddWord(words[2]);
+ e1->AddWord(words[3]);
+ e1->AddFeature(f1, 1.0);
+ e1->AddFeature(f2, 3.0);
+
+ Vertex* v1 = graph.NewVertex();
+ v1->AddEdge(e1);
+
+ Edge* e2 = graph.NewEdge();
+ e2->AddWord(NULL);
+ e2->AddChild(1);
+ e2->AddWord(words[4]);
+ e2->AddWord(words[5]);
+ e2->AddFeature(f2, 2.5);
+
+ Vertex* v2 = graph.NewVertex();
+ v2->AddEdge(e2);
+
+ Edge* e3 = graph.NewEdge();
+ e3->AddWord(NULL);
+ e3->AddChild(2);
+ e3->AddWord(words[6]);
+ e3->AddWord(words[7]);
+ e3->AddWord(words[8]);
+ e3->AddFeature(f1, -1);
+
+ Vertex* v3 = graph.NewVertex();
+ v3->AddEdge(e3);
+
+ Edge* e4 = graph.NewEdge();
+ e4->AddWord(NULL);
+ e4->AddChild(3);
+ e4->AddWord(words[1]);
+ e3->AddFeature(f2, 0.5);
+
+ Vertex* v4 = graph.NewVertex();
+ v4->AddEdge(e4);
+
+ ReferenceSet references;
+ references.AddLine(0, "a b c k e f o", vocab);
+ HgHypothesis modelHypo;
+ vector<FeatureStatsType> bg(kBleuNgramOrder*2+1);
+ SparseVector weights;
+ weights.set(f1,2);
+ weights.set(f2,1);
+ Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
+ BOOST_CHECK_CLOSE(2.0,modelHypo.featureVector.get(f1), 0.0001);
+ BOOST_CHECK_CLOSE(6.0,modelHypo.featureVector.get(f2), 0.0001);
+
+ BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
+ BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
+ BOOST_CHECK_EQUAL(words[4]->first, modelHypo.text[3]->first);
+ BOOST_CHECK_EQUAL(words[5]->first, modelHypo.text[4]->first);
+ BOOST_CHECK_EQUAL(words[6]->first, modelHypo.text[5]->first);
+ BOOST_CHECK_EQUAL(words[7]->first, modelHypo.text[6]->first);
+ BOOST_CHECK_EQUAL(words[8]->first, modelHypo.text[7]->first);
+ BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[8]->first);
+}
+
+
+
+BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
+{
+ Vocab vocab;
+ WordVec words;
+ string wordStrings[] =
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+ for (size_t i = 0; i < 13; ++i) {
+ words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+ }
+
+ const string f1 = "foo";
+ const string f2 = "bar";
+ Graph graph(vocab);
+ graph.SetCounts(5,8);
+
+ Edge* e0 = graph.NewEdge();
+ e0->AddWord(words[0]);
+
+ Vertex* v0 = graph.NewVertex();
+ v0->AddEdge(e0);
+
+ Edge* e1 = graph.NewEdge();
+ e1->AddWord(NULL);
+ e1->AddChild(0);
+ e1->AddWord(words[2]);
+ e1->AddWord(words[3]);
+ e1->AddFeature(f1,1);
+ e1->AddFeature(f2,1);
+ Edge* e5 = graph.NewEdge();
+ e5->AddWord(NULL);
+ e5->AddChild(0);
+ e5->AddWord(words[9]);
+ e5->AddWord(words[10]);
+ e5->AddFeature(f1,2);
+ e5->AddFeature(f2,-2);
+
+ Vertex* v1 = graph.NewVertex();
+ v1->AddEdge(e1);
+ v1->AddEdge(e5);
+ v1->SetSourceCovered(1);
+
+ Edge* e2 = graph.NewEdge();
+ e2->AddWord(NULL);
+ e2->AddChild(1);
+ e2->AddWord(words[4]);
+ e2->AddWord(words[5]);
+ e2->AddFeature(f2,3);
+
+ Vertex* v2 = graph.NewVertex();
+ v2->AddEdge(e2);
+ v2->SetSourceCovered(3);
+
+ Edge* e3 = graph.NewEdge();
+ e3->AddWord(NULL);
+ e3->AddChild(2);
+ e3->AddWord(words[6]);
+ e3->AddWord(words[7]);
+ e3->AddWord(words[8]);
+ e3->AddFeature(f1,1);
+ Edge* e6 = graph.NewEdge();
+ e6->AddWord(NULL);
+ e6->AddChild(2);
+ e6->AddWord(words[9]);
+ e6->AddWord(words[12]);
+ e6->AddFeature(f2,1);
+ Edge* e7 = graph.NewEdge();
+ e7->AddWord(NULL);
+ e7->AddChild(1);
+ e7->AddWord(words[11]);
+ e7->AddWord(words[12]);
+ e7->AddFeature(f1,2);
+ e7->AddFeature(f2,3);
+
+ Vertex* v3 = graph.NewVertex();
+ v3->AddEdge(e3);
+ v3->AddEdge(e6);
+ v3->AddEdge(e7);
+ v3->SetSourceCovered(5);
+
+ Edge* e4 = graph.NewEdge();
+ e4->AddWord(NULL);
+ e4->AddChild(3);
+ e4->AddWord(words[1]);
+
+ Vertex* v4 = graph.NewVertex();
+ v4->AddEdge(e4);
+ v4->SetSourceCovered(6);
+
+ /*Paths || foo || bar || s(2,1)
+ ab cd hk || 1 || 5 || 7
+ hi cd hk || 2 || 2 || 6
+ ab jk || 3 || 4 || 10
+ hi jk || 4 || 1 || 9
+ ab cd efg || 2 || 4 || 8
+ hi cd efg || 3 || 1 || 7
+ */
+
+ ReferenceSet references;
+ references.AddLine(0, "a b c d h k", vocab);
+ HgHypothesis modelHypo;
+ vector<FeatureStatsType> bg(kBleuNgramOrder*2+1, 0.1);
+ SparseVector weights;
+ weights.set(f1,2);
+ weights.set(f2,1);
+ Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
+ BOOST_CHECK_CLOSE(3.0,modelHypo.featureVector.get(f1), 0.0001);
+ BOOST_CHECK_CLOSE(4.0,modelHypo.featureVector.get(f2), 0.0001);
+
+ BOOST_CHECK_EQUAL(6, modelHypo.text.size());
+
+ //expect ab jk
+ BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
+ BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
+ BOOST_CHECK_EQUAL(words[11]->first, modelHypo.text[3]->first);
+ BOOST_CHECK_EQUAL(words[12]->first, modelHypo.text[4]->first);
+ BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[5]->first);
+
+
+ HgHypothesis hopeHypo;
+ Viterbi(graph, weights, 1, references, 0, bg, &hopeHypo);
+ //expect abcdhk
+ BOOST_CHECK_EQUAL(8, hopeHypo.text.size());
+
+ BOOST_CHECK_EQUAL(words[0]->first, hopeHypo.text[0]->first);
+ BOOST_CHECK_EQUAL(words[2]->first, hopeHypo.text[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, hopeHypo.text[2]->first);
+ BOOST_CHECK_EQUAL(words[4]->first, hopeHypo.text[3]->first);
+ BOOST_CHECK_EQUAL(words[5]->first, hopeHypo.text[4]->first);
+ BOOST_CHECK_EQUAL(words[9]->first, hopeHypo.text[5]->first);
+ BOOST_CHECK_EQUAL(words[12]->first, hopeHypo.text[6]->first);
+ BOOST_CHECK_EQUAL(words[1]->first, hopeHypo.text[7]->first);
+
+ BOOST_CHECK_EQUAL(kBleuNgramOrder*2+1, hopeHypo.bleuStats.size());
+ BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[0]);
+ BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[1]);
+ BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[2]);
+ BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[3]);
+ BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[4]);
+ BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[5]);
+ BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[6]);
+ BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[7]);
+ BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
+}
+
+
+
diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
new file mode 100644
index 000000000..5f5c59966
--- /dev/null
+++ b/mert/HopeFearDecoder.cpp
@@ -0,0 +1,328 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cmath>
+#include <iterator>
+
+#include <boost/filesystem.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+
+#include "BleuScorer.h"
+#include "HopeFearDecoder.h"
+
+using namespace std;
+namespace fs = boost::filesystem;
+
+namespace MosesTuning {
+
+static const ValType BLEU_RATIO = 5;
+
+ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
+ vector<ValType> stats(kBleuNgramOrder*2+1,0);
+ for(reset(); !finished(); next()) {
+ vector<ValType> sent;
+ MaxModel(wv,&sent);
+ for(size_t i=0; i<sent.size(); i++) {
+ stats[i]+=sent[i];
+ }
+ }
+ return unsmoothedBleu(stats);
+}
+
+NbestHopeFearDecoder::NbestHopeFearDecoder(
+ const vector<string>& featureFiles,
+ const vector<string>& scoreFiles,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope
+ ) : safe_hope_(safe_hope) {
+ if (streaming) {
+ train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
+ } else {
+ train_.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
+ }
+}
+
+
+void NbestHopeFearDecoder::next() {
+ train_->next();
+}
+
+bool NbestHopeFearDecoder::finished() {
+ return train_->finished();
+}
+
+void NbestHopeFearDecoder::reset() {
+ train_->reset();
+}
+
+void NbestHopeFearDecoder::HopeFear(
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ ) {
+
+
+ // Hope / fear decode
+ ValType hope_scale = 1.0;
+ size_t hope_index=0, fear_index=0, model_index=0;
+ ValType hope_score=0, fear_score=0, model_score=0;
+ for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
+ ValType hope_bleu, hope_model;
+ for(size_t i=0; i< train_->cur_size(); i++) {
+ const MiraFeatureVector& vec=train_->featuresAt(i);
+ ValType score = wv.score(vec);
+ ValType bleu = sentenceLevelBackgroundBleu(train_->scoresAt(i),backgroundBleu);
+ // Hope
+ if(i==0 || (hope_scale*score + bleu) > hope_score) {
+ hope_score = hope_scale*score + bleu;
+ hope_index = i;
+ hope_bleu = bleu;
+ hope_model = score;
+ }
+ // Fear
+ if(i==0 || (score - bleu) > fear_score) {
+ fear_score = score - bleu;
+ fear_index = i;
+ }
+ // Model
+ if(i==0 || score > model_score) {
+ model_score = score;
+ model_index = i;
+ }
+ }
+ // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+ // where model score is having far more influence than BLEU
+ hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+ if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+ hope_scale = abs(hope_bleu) / abs(hope_model);
+ else break;
+ }
+ hopeFear->modelFeatures = train_->featuresAt(model_index);
+ hopeFear->hopeFeatures = train_->featuresAt(hope_index);
+ hopeFear->fearFeatures = train_->featuresAt(fear_index);
+
+ hopeFear->hopeStats = train_->scoresAt(hope_index);
+ hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
+ const vector<float>& fear_stats = train_->scoresAt(fear_index);
+ hopeFear->fearBleu = sentenceLevelBackgroundBleu(fear_stats, backgroundBleu);
+
+ hopeFear->modelStats = train_->scoresAt(model_index);
+ hopeFear->hopeFearEqual = (hope_index == fear_index);
+}
+
+void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) {
+ // Find max model
+ size_t max_index=0;
+ ValType max_score=0;
+ for(size_t i=0; i<train_->cur_size(); i++) {
+ MiraFeatureVector vec(train_->featuresAt(i));
+ ValType score = wv.score(vec);
+ if(i==0 || score > max_score) {
+ max_index = i;
+ max_score = score;
+ }
+ }
+ *stats = train_->scoresAt(max_index);
+}
+
+
+
+HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
+ (
+ const string& hypergraphDir,
+ const vector<string>& referenceFiles,
+ size_t num_dense,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ size_t hg_pruning,
+ const MiraWeightVector& wv
+ ) :
+ num_dense_(num_dense) {
+
+ UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
+ UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
+ UTIL_THROW_IF(!referenceFiles.size(), util::Exception, "No reference files supplied");
+ references_.Load(referenceFiles, vocab_);
+
+ SparseVector weights;
+ wv.ToSparse(&weights);
+
+ static const string kWeights = "weights";
+ fs::directory_iterator dend;
+ size_t fileCount = 0;
+ cerr << "Reading hypergraphs" << endl;
+ for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
+ if (di->path().filename() == kWeights) continue;
+ Graph graph(vocab_);
+ size_t id = boost::lexical_cast<size_t>(di->path().stem().string());
+ util::FilePiece file(di->path().string().c_str());
+ ReadGraph(file,graph);
+
+ //cerr << "ref length " << references_.Length(id) << endl;
+ size_t edgeCount = hg_pruning * references_.Length(id);
+ boost::shared_ptr<Graph> prunedGraph;
+ prunedGraph.reset(new Graph(vocab_));
+ graph.Prune(prunedGraph.get(), weights, edgeCount);
+ graphs_[id] = prunedGraph;
+ //cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
+ ++fileCount;
+ if (fileCount % 10 == 0) cerr << ".";
+ if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n";
+ }
+ cerr << endl << "Done" << endl;
+
+
+}
+
+void HypergraphHopeFearDecoder::reset() {
+ graphIter_ = graphs_.begin();
+}
+
+void HypergraphHopeFearDecoder::next() {
+ ++graphIter_;
+}
+
+bool HypergraphHopeFearDecoder::finished() {
+ return graphIter_ == graphs_.end();
+}
+
+void HypergraphHopeFearDecoder::HopeFear(
+ const vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ ) {
+ size_t sentenceId = graphIter_->first;
+ SparseVector weights;
+ wv.ToSparse(&weights);
+ const Graph& graph = *(graphIter_->second);
+
+ ValType hope_scale = 1.0;
+ HgHypothesis hopeHypo, fearHypo, modelHypo;
+ for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
+
+ //hope decode
+ Viterbi(graph, weights, 1, references_, sentenceId, backgroundBleu, &hopeHypo);
+
+ //fear decode
+ Viterbi(graph, weights, -1, references_, sentenceId, backgroundBleu, &fearHypo);
+
+ //Model decode
+ Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);
+
+
+ // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+ // where model score is having far more influence than BLEU
+ // hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+ // if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+ // hope_scale = abs(hope_bleu) / abs(hope_model);
+ // else break;
+ //TODO: Don't currently get model and bleu so commented this out for now.
+ break;
+ }
+ //modelFeatures, hopeFeatures and fearFeatures
+ hopeFear->modelFeatures = MiraFeatureVector(modelHypo.featureVector, num_dense_);
+ hopeFear->hopeFeatures = MiraFeatureVector(hopeHypo.featureVector, num_dense_);
+ hopeFear->fearFeatures = MiraFeatureVector(fearHypo.featureVector, num_dense_);
+
+ //Need to know which are to be mapped to dense features!
+
+ //Only C++11
+ //hopeFear->modelStats.assign(std::begin(modelHypo.bleuStats), std::end(modelHypo.bleuStats));
+ vector<ValType> fearStats(kBleuNgramOrder*2+1);
+ hopeFear->hopeStats.reserve(kBleuNgramOrder*2+1);
+ hopeFear->modelStats.reserve(kBleuNgramOrder*2+1);
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ hopeFear->modelStats.push_back(modelHypo.bleuStats[i]);
+ hopeFear->hopeStats.push_back(hopeHypo.bleuStats[i]);
+
+ fearStats[i] = fearHypo.bleuStats[i];
+ }
+ /*
+ cerr << "hope" << endl;;
+ for (size_t i = 0; i < hopeHypo.text.size(); ++i) {
+ cerr << hopeHypo.text[i]->first << " ";
+ }
+ cerr << endl;
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ cerr << hopeHypo.bleuStats[i] << " ";
+ }
+ cerr << endl;
+ cerr << "fear";
+ for (size_t i = 0; i < fearHypo.text.size(); ++i) {
+ cerr << fearHypo.text[i]->first << " ";
+ }
+ cerr << endl;
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ cerr << fearHypo.bleuStats[i] << " ";
+ }
+ cerr << endl;
+ cerr << "model";
+ for (size_t i = 0; i < modelHypo.text.size(); ++i) {
+ cerr << modelHypo.text[i]->first << " ";
+ }
+ cerr << endl;
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ cerr << modelHypo.bleuStats[i] << " ";
+ }
+ cerr << endl;
+ */
+ hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
+ hopeFear->fearBleu = sentenceLevelBackgroundBleu(fearStats, backgroundBleu);
+
+ //If fv and bleu stats are equal, then assume equal
+ hopeFear->hopeFearEqual = true; //(hopeFear->hopeBleu - hopeFear->fearBleu) >= 1e-8;
+ if (hopeFear->hopeFearEqual) {
+ for (size_t i = 0; i < fearStats.size(); ++i) {
+ if (fearStats[i] != hopeFear->hopeStats[i]) {
+ hopeFear->hopeFearEqual = false;
+ break;
+ }
+ }
+ }
+ hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
+}
+
+void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
+ assert(!finished());
+ HgHypothesis bestHypo;
+ size_t sentenceId = graphIter_->first;
+ SparseVector weights;
+ wv.ToSparse(&weights);
+ vector<ValType> bg(kBleuNgramOrder*2+1);
+ Viterbi(*(graphIter_->second), weights, 0, references_, sentenceId, bg, &bestHypo);
+ stats->resize(bestHypo.bleuStats.size());
+ /*
+ for (size_t i = 0; i < bestHypo.text.size(); ++i) {
+ cerr << bestHypo.text[i]->first << " ";
+ }
+ cerr << endl;
+ */
+ for (size_t i = 0; i < bestHypo.bleuStats.size(); ++i) {
+ (*stats)[i] = bestHypo.bleuStats[i];
+ }
+}
+
+
+
+};
diff --git a/mert/HopeFearDecoder.h b/mert/HopeFearDecoder.h
new file mode 100644
index 000000000..e8323fc76
--- /dev/null
+++ b/mert/HopeFearDecoder.h
@@ -0,0 +1,151 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+#ifndef MERT_HOPEFEARDECODER_H
+#define MERT_HOPEFEARDECODER_H
+
+#include <vector>
+
+#include <boost/scoped_ptr.hpp>
+#include <boost/shared_ptr.hpp>
+
+#include "ForestRescore.h"
+#include "Hypergraph.h"
+#include "HypPackEnumerator.h"
+#include "MiraFeatureVector.h"
+#include "MiraWeightVector.h"
+
+//
+// Used by batch mira to get the hope, fear and model hypothesis. This wraps
+// the n-best list and lattice/hypergraph implementations
+//
+
+namespace MosesTuning {
+
+/** To be filled in by the decoder */
+struct HopeFearData {
+ MiraFeatureVector modelFeatures;
+ MiraFeatureVector hopeFeatures;
+ MiraFeatureVector fearFeatures;
+
+ std::vector<float> modelStats;
+ std::vector<float> hopeStats;
+
+ ValType hopeBleu;
+ ValType fearBleu;
+
+ bool hopeFearEqual;
+};
+
+//Abstract base class
+class HopeFearDecoder {
+public:
+ //iterator methods
+ virtual void reset() = 0;
+ virtual void next() = 0;
+ virtual bool finished() = 0;
+
+ /**
+ * Calculate hope, fear and model hypotheses
+ **/
+ virtual void HopeFear(
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ ) = 0;
+
+ /** Max score decoding */
+ virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
+ = 0;
+
+ /** Calculate bleu on training set */
+ ValType Evaluate(const AvgWeightVector& wv);
+
+};
+
+
+/** Gets hope-fear from nbest lists */
+class NbestHopeFearDecoder : public virtual HopeFearDecoder {
+public:
+ NbestHopeFearDecoder(const std::vector<std::string>& featureFiles,
+ const std::vector<std::string>& scoreFiles,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope
+ );
+
+ virtual void reset();
+ virtual void next();
+ virtual bool finished();
+
+ virtual void HopeFear(
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ );
+
+ virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
+
+private:
+ boost::scoped_ptr<HypPackEnumerator> train_;
+ bool safe_hope_;
+
+};
+
+
+
+/** Gets hope-fear from hypergraphs */
+class HypergraphHopeFearDecoder : public virtual HopeFearDecoder {
+public:
+ HypergraphHopeFearDecoder(
+ const std::string& hypergraphDir,
+ const std::vector<std::string>& referenceFiles,
+ size_t num_dense,
+ bool streaming,
+ bool no_shuffle,
+ bool safe_hope,
+ size_t hg_pruning,
+ const MiraWeightVector& wv
+ );
+
+ virtual void reset();
+ virtual void next();
+ virtual bool finished();
+
+ virtual void HopeFear(
+ const std::vector<ValType>& backgroundBleu,
+ const MiraWeightVector& wv,
+ HopeFearData* hopeFear
+ );
+
+ virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
+
+private:
+ size_t num_dense_;
+ //maps sentence Id to graph ptr
+ typedef std::map<size_t, boost::shared_ptr<Graph> > GraphColl;
+ GraphColl graphs_;
+ GraphColl::const_iterator graphIter_;
+ ReferenceSet references_;
+ Vocab vocab_;
+};
+
+};
+
+#endif
+
diff --git a/mert/Hypergraph.cpp b/mert/Hypergraph.cpp
new file mode 100644
index 000000000..d80a5c12c
--- /dev/null
+++ b/mert/Hypergraph.cpp
@@ -0,0 +1,286 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+#include <iostream>
+#include <set>
+
+#include <boost/lexical_cast.hpp>
+
+#include "util/double-conversion/double-conversion.h"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "Hypergraph.h"
+
+using namespace std;
+static const string kBOS = "<s>";
+static const string kEOS = "</s>";
+
+namespace MosesTuning {
+
+StringPiece NextLine(util::FilePiece& from) {
+ StringPiece line;
+ while ((line = from.ReadLine()).starts_with("#"));
+ return line;
+}
+
+Vocab::Vocab() : eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS)){
+}
+
+const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str) {
+#if BOOST_VERSION >= 104200
+ Map::const_iterator i= map_.find(str, Hash(), Equals());
+#else
+ std::string copied_str(str.data(), str.size());
+ Map::const_iterator i = map_.find(copied_str.c_str());
+#endif
+ if (i != map_.end()) return *i;
+ char *copied = static_cast<char*>(piece_backing_.Allocate(str.size() + 1));
+ memcpy(copied, str.data(), str.size());
+ copied[str.size()] = 0;
+ return *map_.insert(Entry(copied, map_.size())).first;
+}
+
+double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
+
+
+/**
+ * Reads an incoming edge. Returns edge and source words covered.
+**/
+static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
+ Edge* edge = graph.NewEdge();
+ StringPiece line = NextLine(from);
+ util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
+ //Target
+ for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
+ StringPiece got = *i;
+ if ('[' == *got.data() && ']' == got.data()[got.size() - 1]) {
+ // non-terminal
+ char *end_ptr;
+ unsigned long int child = std::strtoul(got.data() + 1, &end_ptr, 10);
+ UTIL_THROW_IF(end_ptr != got.data() + got.size() - 1, HypergraphException, "Bad non-terminal" << got);
+ UTIL_THROW_IF(child >= graph.VertexSize(), HypergraphException, "Reference to vertex " << child << " but we only have " << graph.VertexSize() << " vertices. Is the file in bottom-up format?");
+ edge->AddWord(NULL);
+ edge->AddChild(child);
+ } else {
+ const Vocab::Entry &found = graph.MutableVocab().FindOrAdd(got);
+ edge->AddWord(&found);
+ }
+ }
+
+ //Features
+ ++pipes;
+ for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
+ StringPiece fv = *i;
+ if (!fv.size()) break;
+ size_t equals = fv.find_last_of("=");
+ UTIL_THROW_IF(equals == fv.npos, HypergraphException, "Failed to parse feature '" << fv << "'");
+ StringPiece name = fv.substr(0,equals);
+ StringPiece value = fv.substr(equals+1);
+ int processed;
+ float score = converter.StringToFloat(value.data(), value.length(), &processed);
+ UTIL_THROW_IF(isnan(score), HypergraphException, "Failed to parse weight '" << value << "'");
+ edge->AddFeature(name,score);
+ }
+ //Covered words
+ ++pipes;
+ size_t sourceCovered = boost::lexical_cast<size_t>(*pipes);
+ return pair<Edge*,size_t>(edge,sourceCovered);
+}
+
+void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const {
+
+ Graph& newGraph = *pNewGraph;
+ //TODO: Optimise case where no pruning required
+
+ //For debug
+
+ /*
+ map<const Edge*, string> edgeIds;
+ for (size_t i = 0; i < edges_.Size(); ++i) {
+ stringstream str;
+ for (size_t j = 0; j < edges_[i].Words().size(); ++j) {
+ if (edges_[i].Words()[j]) str << edges_[i].Words()[j]->first;
+ }
+ edgeIds[&(edges_[i])] = str.str();
+ }*/
+
+ //end For debug
+
+ map<const Edge*, FeatureStatsType> edgeBackwardScores;
+ map<const Edge*, size_t> edgeHeads;
+ vector<FeatureStatsType> vertexBackwardScores(vertices_.Size(), kMinScore);
+ vector<vector<const Edge*> > outgoing(vertices_.Size());
+
+ //Compute backward scores
+ for (size_t vi = 0; vi < vertices_.Size(); ++vi) {
+ //cerr << "Vertex " << vi << endl;
+ const Vertex& vertex = vertices_[vi];
+ const vector<const Edge*>& incoming = vertex.GetIncoming();
+ if (!incoming.size()) {
+ vertexBackwardScores[vi] = 0;
+ } else {
+ for (size_t ei = 0; ei < incoming.size(); ++ei) {
+ //cerr << "Edge " << edgeIds[incoming[ei]] << endl;
+ edgeHeads[incoming[ei]]= vi;
+ FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
+ for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
+ // cerr << "\tChild " << incoming[ei]->Children()[i] << endl;
+ size_t childId = incoming[ei]->Children()[i];
+ UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore,
+ HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+ outgoing[childId].push_back(incoming[ei]);
+ incomingScore += vertexBackwardScores[childId];
+ }
+ edgeBackwardScores[incoming[ei]]= incomingScore;
+ // cerr << "Backward score: " << incomingScore << endl;
+ if (incomingScore > vertexBackwardScores[vi]) vertexBackwardScores[vi] = incomingScore;
+ }
+ }
+ }
+
+ //Compute forward scores
+ vector<FeatureStatsType> vertexForwardScores(vertices_.Size(), kMinScore);
+ map<const Edge*, FeatureStatsType> edgeForwardScores;
+ for (size_t i = 1; i <= vertices_.Size(); ++i) {
+ size_t vi = vertices_.Size() - i;
+ //cerr << "Vertex " << vi << endl;
+ if (!outgoing[vi].size()) {
+ vertexForwardScores[vi] = 0;
+ } else {
+ for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) {
+ //cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl;
+ FeatureStatsType outgoingScore = 0;
+ //sum scores of siblings
+ for (size_t i = 0; i < outgoing[vi][ei]->Children().size(); ++i) {
+ size_t siblingId = outgoing[vi][ei]->Children()[i];
+ if (siblingId != vi) {
+ // cerr << "\tSibling " << siblingId << endl;
+ outgoingScore += vertexBackwardScores[siblingId];
+ }
+ }
+ //add score of head
+ outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]];
+ //cerr << "Forward score " << outgoingScore << endl;
+ edgeForwardScores[outgoing[vi][ei]] = outgoingScore;
+ outgoingScore += outgoing[vi][ei]->GetScore(weights);
+ if (outgoingScore > vertexForwardScores[vi]) vertexForwardScores[vi] = outgoingScore;
+ }
+ }
+ }
+
+
+
+ multimap<FeatureStatsType, const Edge*> edgeScores;
+ for (size_t i = 0; i < edges_.Size(); ++i) {
+ const Edge* edge = &(edges_[i]);
+ if (edgeForwardScores.find(edge) == edgeForwardScores.end()) {
+ //This edge has no children, so didn't get a forward score. Its forward score
+ //is that of its head
+ edgeForwardScores[edge] = vertexForwardScores[edgeHeads[edge]];
+ }
+ FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge];
+ edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge));
+ // cerr << edgeIds[edge] << " " << score << endl;
+ }
+
+
+
+ multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin();
+ size_t edgeCount = 1;
+ while(edgeCount < minEdgeCount && ei != edgeScores.rend()) {
+ ++ei;
+ ++edgeCount;
+ }
+ multimap<FeatureStatsType, const Edge*>::const_iterator lowest = edgeScores.begin();
+ if (ei != edgeScores.rend()) lowest = edgeScores.lower_bound(ei->first);
+
+ //cerr << "Retained edges" << endl;
+ set<size_t> retainedVertices;
+ set<const Edge*> retainedEdges;
+ for (; lowest != edgeScores.end(); ++lowest) {
+ //cerr << lowest->first << " " << edgeIds[lowest->second] << endl;
+ retainedEdges.insert(lowest->second);
+ retainedVertices.insert(edgeHeads[lowest->second]);
+ for (size_t i = 0; i < lowest->second->Children().size(); ++i) {
+ retainedVertices.insert(lowest->second->Children()[i]);
+ }
+ }
+ newGraph.SetCounts(retainedVertices.size(), retainedEdges.size());
+
+ //cerr << "Retained vertices" << endl;
+ map<size_t,size_t> oldIdToNew;
+ size_t vi = 0;
+ for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) {
+ //cerr << *i << endl;
+ oldIdToNew[*i] = vi;
+ Vertex* vertex = newGraph.NewVertex();
+ vertex->SetSourceCovered(vertices_[*i].SourceCovered());
+ }
+
+ for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) {
+ Edge* newEdge = newGraph.NewEdge();
+ const Edge* oldEdge = *i;
+ for (size_t j = 0; j < oldEdge->Words().size(); ++j) {
+ newEdge->AddWord(oldEdge->Words()[j]);
+ }
+ for (size_t j = 0; j < oldEdge->Children().size(); ++j) {
+ newEdge->AddChild(oldIdToNew[oldEdge->Children()[j]]);
+ }
+ newEdge->SetFeatures(oldEdge->Features());
+ Vertex& newHead = newGraph.vertices_[oldIdToNew[edgeHeads[oldEdge]]];
+ newHead.AddEdge(newEdge);
+ }
+
+
+
+
+}
+
+/**
+ * Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments)
+**/
+void ReadGraph(util::FilePiece &from, Graph &graph) {
+
+ //First line should contain field names
+ StringPiece line = from.ReadLine();
+ UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'");
+ line = NextLine(from);
+
+ //Then expect numbers of vertices
+ util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' '));
+ unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i);
+ ++i;
+ unsigned long int edges = boost::lexical_cast<unsigned long int>(*i);
+ graph.SetCounts(vertices, edges);
+ //cerr << "vertices: " << vertices << "; edges: " << edges << endl;
+ for (size_t i = 0; i < vertices; ++i) {
+ line = NextLine(from);
+ unsigned long int edge_count = boost::lexical_cast<unsigned long int>(line);
+ Vertex* vertex = graph.NewVertex();
+ for (unsigned long int e = 0; e < edge_count; ++e) {
+ pair<Edge*,size_t> edge = ReadEdge(from, graph);
+ vertex->AddEdge(edge.first);
+ //Note: the file format attaches this to the edge, but it's really a property
+ //of the vertex.
+ if (!e) {vertex->SetSourceCovered(edge.second);}
+ }
+ }
+}
+
+};
diff --git a/mert/Hypergraph.h b/mert/Hypergraph.h
new file mode 100644
index 000000000..b6ee6c3f8
--- /dev/null
+++ b/mert/Hypergraph.h
@@ -0,0 +1,251 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef MERT_HYPERGRAPH_H
+#define MERT_HYPERGRAPH_H
+
+#include <string>
+
+#include <boost/noncopyable.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/functional/hash/hash.hpp>
+#include <boost/unordered_map.hpp>
+
+
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+#include "util/string_piece.hh"
+
+#include "FeatureStats.h"
+
+namespace MosesTuning {
+
+typedef unsigned int WordIndex;
+const WordIndex kMaxWordIndex = UINT_MAX;
+const FeatureStatsType kMinScore = -std::numeric_limits<FeatureStatsType>::max();
+
+template <class T> class FixedAllocator : boost::noncopyable {
+ public:
+ FixedAllocator() : current_(NULL), end_(NULL) {}
+
+ void Init(std::size_t count) {
+ assert(!current_);
+ array_.reset(new T[count]);
+ current_ = array_.get();
+ end_ = current_ + count;
+ }
+
+ T &operator[](std::size_t idx) {
+ return array_.get()[idx];
+ }
+ const T &operator[](std::size_t idx) const {
+ return array_.get()[idx];
+ }
+
+ T *New() {
+ T *ret = current_++;
+ UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
+ return ret;
+ }
+
+ std::size_t Capacity() const {
+ return end_ - array_.get();
+ }
+
+ std::size_t Size() const {
+ return current_ - array_.get();
+ }
+
+ private:
+ boost::scoped_array<T> array_;
+ T *current_, *end_;
+};
+
+
+class Vocab {
+ public:
+ Vocab();
+
+ typedef std::pair<const char *const, WordIndex> Entry;
+
+ const Entry &FindOrAdd(const StringPiece &str);
+
+ const Entry& Bos() const {return bos_;}
+
+ const Entry& Eos() const {return eos_;}
+
+ private:
+ util::Pool piece_backing_;
+
+ struct Hash : public std::unary_function<const char *, std::size_t> {
+ std::size_t operator()(StringPiece str) const {
+ return util::MurmurHashNative(str.data(), str.size());
+ }
+ };
+
+ struct Equals : public std::binary_function<const char *, const char *, bool> {
+ bool operator()(StringPiece first, StringPiece second) const {
+ return first == second;
+ }
+ };
+
+ typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
+ Map map_;
+ Entry eos_;
+ Entry bos_;
+
+};
+
+typedef std::vector<const Vocab::Entry*> WordVec;
+
+class Vertex;
+
+//Use shared pointer to save copying when we prune
+typedef boost::shared_ptr<SparseVector> FeaturePtr;
+
+/**
+ * An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector.
+**/
+class Edge {
+ public:
+ Edge() {features_.reset(new SparseVector());}
+
+ void AddWord(const Vocab::Entry *word) {
+ words_.push_back(word);
+ }
+
+ void AddChild(size_t child) {
+ children_.push_back(child);
+ }
+
+ void AddFeature(const StringPiece& name, FeatureStatsType value) {
+ //TODO StringPiece interface
+ features_->set(name.as_string(),value);
+ }
+
+
+ const WordVec &Words() const {
+ return words_;
+ }
+
+ const FeaturePtr& Features() const {
+ return features_;
+ }
+
+ void SetFeatures(const FeaturePtr& features) {
+ features_ = features;
+ }
+
+ const std::vector<size_t>& Children() const {
+ return children_;
+ }
+
+ FeatureStatsType GetScore(const SparseVector& weights) const {
+ return inner_product(*(features_.get()), weights);
+ }
+
+ private:
+ // NULL for non-terminals.
+ std::vector<const Vocab::Entry*> words_;
+ std::vector<size_t> children_;
+ boost::shared_ptr<SparseVector> features_;
+};
+
+/*
+ * A vertex has 0..n incoming edges
+ **/
+class Vertex {
+ public:
+ Vertex() : sourceCovered_(0) {}
+
+ void AddEdge(const Edge* edge) {incoming_.push_back(edge);}
+
+ void SetSourceCovered(size_t sourceCovered) {sourceCovered_ = sourceCovered;}
+
+ const std::vector<const Edge*>& GetIncoming() const {return incoming_;}
+
+ size_t SourceCovered() const {return sourceCovered_;}
+
+ private:
+ std::vector<const Edge*> incoming_;
+ size_t sourceCovered_;
+};
+
+
+class Graph : boost::noncopyable {
+ public:
+ Graph(Vocab& vocab) : vocab_(vocab) {}
+
+ void SetCounts(std::size_t vertices, std::size_t edges) {
+ vertices_.Init(vertices);
+ edges_.Init(edges);
+ }
+
+ Vocab &MutableVocab() { return vocab_; }
+
+ Edge *NewEdge() {
+ return edges_.New();
+ }
+
+ Vertex *NewVertex() {
+ return vertices_.New();
+ }
+
+ const Vertex &GetVertex(std::size_t index) const {
+ return vertices_[index];
+ }
+
+ Edge &GetEdge(std::size_t index) {
+ return edges_[index];
+ }
+
+ /* Created a pruned copy of this graph with minEdgeCount edges. Uses
+ the scores in the max-product semiring to rank edges, as suggested by
+ Colin Cherry */
+ void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
+
+ std::size_t VertexSize() const { return vertices_.Size(); }
+ std::size_t EdgeSize() const { return edges_.Size(); }
+
+ bool IsBoundary(const Vocab::Entry* word) const {
+ return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
+ }
+
+ private:
+ FixedAllocator<Edge> edges_;
+ FixedAllocator<Vertex> vertices_;
+ Vocab& vocab_;
+};
+
+class HypergraphException : public util::Exception {
+ public:
+ HypergraphException() {}
+ ~HypergraphException() throw() {}
+};
+
+
+void ReadGraph(util::FilePiece &from, Graph &graph);
+
+
+};
+
+#endif
diff --git a/mert/HypergraphTest.cpp b/mert/HypergraphTest.cpp
new file mode 100644
index 000000000..345a445f0
--- /dev/null
+++ b/mert/HypergraphTest.cpp
@@ -0,0 +1,151 @@
+#include <iostream>
+
+#define BOOST_TEST_MODULE MertForestRescore
+#include <boost/test/unit_test.hpp>
+
+#include "Hypergraph.h"
+
+using namespace std;
+using namespace MosesTuning;
+
+BOOST_AUTO_TEST_CASE(prune)
+{
+ Vocab vocab;
+ WordVec words;
+ string wordStrings[] =
+ {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+ for (size_t i = 0; i < 13; ++i) {
+ words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+ }
+
+ const string f1 = "foo";
+ const string f2 = "bar";
+ Graph graph(vocab);
+ graph.SetCounts(5,8);
+
+ Edge* e0 = graph.NewEdge();
+ e0->AddWord(words[0]);
+
+ Vertex* v0 = graph.NewVertex();
+ v0->AddEdge(e0);
+
+ Edge* e1 = graph.NewEdge();
+ e1->AddWord(NULL);
+ e1->AddChild(0);
+ e1->AddWord(words[2]);
+ e1->AddWord(words[3]);
+ e1->AddFeature(f1,1);
+ e1->AddFeature(f2,1);
+ Edge* e5 = graph.NewEdge();
+ e5->AddWord(NULL);
+ e5->AddChild(0);
+ e5->AddWord(words[9]);
+ e5->AddWord(words[10]);
+ e5->AddFeature(f1,2);
+ e5->AddFeature(f2,-2);
+
+ Vertex* v1 = graph.NewVertex();
+ v1->AddEdge(e1);
+ v1->AddEdge(e5);
+ v1->SetSourceCovered(1);
+
+ Edge* e2 = graph.NewEdge();
+ e2->AddWord(NULL);
+ e2->AddChild(1);
+ e2->AddWord(words[4]);
+ e2->AddWord(words[5]);
+ e2->AddFeature(f2,3);
+
+ Vertex* v2 = graph.NewVertex();
+ v2->AddEdge(e2);
+ v2->SetSourceCovered(3);
+
+ Edge* e3 = graph.NewEdge();
+ e3->AddWord(NULL);
+ e3->AddChild(2);
+ e3->AddWord(words[6]);
+ e3->AddWord(words[7]);
+ e3->AddWord(words[8]);
+ e3->AddFeature(f1,1);
+ Edge* e6 = graph.NewEdge();
+ e6->AddWord(NULL);
+ e6->AddChild(2);
+ e6->AddWord(words[9]);
+ e6->AddWord(words[12]);
+ e6->AddFeature(f2,1);
+ Edge* e7 = graph.NewEdge();
+ e7->AddWord(NULL);
+ e7->AddChild(1);
+ e7->AddWord(words[11]);
+ e7->AddWord(words[12]);
+ e7->AddFeature(f1,2);
+ e7->AddFeature(f2,3);
+
+ Vertex* v3 = graph.NewVertex();
+ v3->AddEdge(e3);
+ v3->AddEdge(e6);
+ v3->AddEdge(e7);
+ v3->SetSourceCovered(5);
+
+ Edge* e4 = graph.NewEdge();
+ e4->AddWord(NULL);
+ e4->AddChild(3);
+ e4->AddWord(words[1]);
+
+ Vertex* v4 = graph.NewVertex();
+ v4->AddEdge(e4);
+ v4->SetSourceCovered(6);
+
+ SparseVector weights;
+ weights.set(f1,2);
+ weights.set(f2,1);
+
+ Graph pruned(vocab);
+ graph.Prune(&pruned, weights, 5);
+
+ BOOST_CHECK_EQUAL(5, pruned.EdgeSize());
+ BOOST_CHECK_EQUAL(4, pruned.VertexSize());
+
+ //edges retained should be best path (<s> ab jk </s>) and hi
+ BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size());
+ BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size());
+ BOOST_CHECK_EQUAL(1, pruned.GetVertex(2).GetIncoming().size());
+ BOOST_CHECK_EQUAL(1, pruned.GetVertex(3).GetIncoming().size());
+
+ const Edge* edge;
+
+ edge = pruned.GetVertex(0).GetIncoming()[0];
+ BOOST_CHECK_EQUAL(1, edge->Words().size());
+ BOOST_CHECK_EQUAL(words[0], edge->Words()[0]);
+
+ edge = pruned.GetVertex(1).GetIncoming()[0];
+ BOOST_CHECK_EQUAL(3, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first);
+
+ edge = pruned.GetVertex(1).GetIncoming()[1];
+ BOOST_CHECK_EQUAL(3, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first);
+
+ edge = pruned.GetVertex(2).GetIncoming()[0];
+ BOOST_CHECK_EQUAL(3, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first);
+ BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first);
+
+ edge = pruned.GetVertex(3).GetIncoming()[0];
+ BOOST_CHECK_EQUAL(2, edge->Words().size());
+ BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+ BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first);
+
+
+
+
+
+// BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);
+
+
+}
diff --git a/mert/Jamfile b/mert/Jamfile
index 34c640b06..d848c258f 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -15,6 +15,9 @@ FeatureStats.cpp
FeatureArray.cpp
FeatureData.cpp
FeatureDataIterator.cpp
+ForestRescore.cpp
+HopeFearDecoder.cpp
+Hypergraph.cpp
MiraFeatureVector.cpp
MiraWeightVector.cpp
HypPackEnumerator.cpp
@@ -62,13 +65,15 @@ exe sentence-bleu : sentence-bleu.cpp mert_lib ;
exe pro : pro.cpp mert_lib ..//boost_program_options ;
-exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ;
+exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ..//boost_filesystem ;
alias programs : mert extractor evaluator pro kbmira sentence-bleu ;
unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test forest_rescore_test : ForestRescoreTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test hypergraph_test : HypergraphTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test point_test : PointTest.cpp mert_lib ..//boost_unit_test_framework ;
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
index dea9b9b83..347ad488e 100644
--- a/mert/MiraFeatureVector.cpp
+++ b/mert/MiraFeatureVector.cpp
@@ -9,18 +9,17 @@ namespace MosesTuning
{
-MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
- : m_dense(vec.dense)
-{
- vector<size_t> sparseFeats = vec.sparse.feats();
+void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit) {
+ vector<size_t> sparseFeats = sparse.feats();
bool bFirst = true;
size_t lastFeat = 0;
m_sparseFeats.reserve(sparseFeats.size());
m_sparseVals.reserve(sparseFeats.size());
for(size_t i=0; i<sparseFeats.size(); i++) {
+ if (sparseFeats[i] < ignoreLimit) continue;
size_t feat = m_dense.size() + sparseFeats[i];
m_sparseFeats.push_back(feat);
- m_sparseVals.push_back(vec.sparse.get(sparseFeats[i]));
+ m_sparseVals.push_back(sparse.get(sparseFeats[i]));
// Check ordered property
if(bFirst) {
@@ -35,6 +34,21 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
}
}
+MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
+ : m_dense(vec.dense)
+{
+ InitSparse(vec.sparse);
+}
+
+MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense) {
+ m_dense.resize(num_dense);
+ //Assume that features with id [0,num_dense) are the dense features
+ for (size_t id = 0; id < num_dense; ++id) {
+ m_dense[id] = sparse.get(id);
+ }
+ InitSparse(sparse,num_dense);
+}
+
MiraFeatureVector::MiraFeatureVector(const MiraFeatureVector& other)
: m_dense(other.m_dense),
m_sparseFeats(other.m_sparseFeats),
@@ -148,6 +162,22 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
return MiraFeatureVector(dense,sparseFeats,sparseVals);
}
+bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b) {
+ ValType eps = 1e-8;
+ //dense features
+ if (a.m_dense.size() != b.m_dense.size()) return false;
+ for (size_t i = 0; i < a.m_dense.size(); ++i) {
+ if (fabs(a.m_dense[i]-b.m_dense[i]) < eps) return false;
+ }
+ if (a.m_sparseFeats.size() != b.m_sparseFeats.size()) return false;
+ for (size_t i = 0; i < a.m_sparseFeats.size(); ++i) {
+ if (a.m_sparseFeats[i] != b.m_sparseFeats[i]) return false;
+ if (fabs(a.m_sparseVals[i] != b.m_sparseVals[i])) return false;
+ }
+ return true;
+
+}
+
ostream& operator<<(ostream& o, const MiraFeatureVector& e)
{
for(size_t i=0; i<e.size(); i++) {
diff --git a/mert/MiraFeatureVector.h b/mert/MiraFeatureVector.h
index cb2b1c87d..48aa496b5 100644
--- a/mert/MiraFeatureVector.h
+++ b/mert/MiraFeatureVector.h
@@ -26,7 +26,10 @@ typedef FeatureStatsType ValType;
class MiraFeatureVector
{
public:
+ MiraFeatureVector() {}
MiraFeatureVector(const FeatureDataItem& vec);
+ //Assumes that features in sparse with id < num_dense are dense features
+ MiraFeatureVector(const SparseVector& sparse, size_t num_dense);
MiraFeatureVector(const MiraFeatureVector& other);
MiraFeatureVector(const std::vector<ValType>& dense,
const std::vector<std::size_t>& sparseFeats,
@@ -42,7 +45,12 @@ public:
friend std::ostream& operator<<(std::ostream& o, const MiraFeatureVector& e);
+ friend bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b);
+
private:
+ //Ignore any sparse features with id < ignoreLimit
+ void InitSparse(const SparseVector& sparse, size_t ignoreLimit = 0);
+
std::vector<ValType> m_dense;
std::vector<std::size_t> m_sparseFeats;
std::vector<ValType> m_sparseVals;
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index e23804cbf..3b7b1780c 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -93,6 +93,14 @@ void MiraWeightVector::update(size_t index, ValType delta)
m_lastUpdated[index] = m_numUpdates;
}
+void MiraWeightVector::ToSparse(SparseVector* sparse) const {
+ for (size_t i = 0; i < m_weights.size(); ++i) {
+ if(abs(m_weights[i])>1e-8) {
+ sparse->set(i,m_weights[i]);
+ }
+ }
+}
+
/**
* Make sure everyone's total is up-to-date
*/
@@ -163,6 +171,15 @@ size_t AvgWeightVector::size() const
return m_wv.m_weights.size();
}
+void AvgWeightVector::ToSparse(SparseVector* sparse) const {
+ for (size_t i = 0; i < size(); ++i) {
+ ValType w = weight(i);
+ if(abs(w)>1e-8) {
+ sparse->set(i,w);
+ }
+ }
+}
+
// --Emacs trickery--
// Local Variables:
// mode:c++
diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h
index eb27e8a6d..bbc28704b 100644
--- a/mert/MiraWeightVector.h
+++ b/mert/MiraWeightVector.h
@@ -63,6 +63,11 @@ public:
*/
AvgWeightVector avg();
+ /**
+ * Convert to sparse vector, interpreting all features as sparse.
+ **/
+ void ToSparse(SparseVector* sparse) const;
+
friend class AvgWeightVector;
friend std::ostream& operator<<(std::ostream& o, const MiraWeightVector& e);
@@ -99,12 +104,12 @@ public:
ValType score(const MiraFeatureVector& fv) const;
ValType weight(std::size_t index) const;
std::size_t size() const;
+ void ToSparse(SparseVector* sparse) const;
private:
const MiraWeightVector& m_wv;
};
-#endif // MERT_WEIGHT_VECTOR_H
// --Emacs trickery--
// Local Variables:
@@ -113,3 +118,4 @@ private:
// End:
}
+#endif // MERT_WEIGHT_VECTOR_H
diff --git a/mert/kbmira.cpp b/mert/kbmira.cpp
index a2665ac13..da552fa36 100644
--- a/mert/kbmira.cpp
+++ b/mert/kbmira.cpp
@@ -39,8 +39,10 @@ de recherches du Canada
#include <boost/program_options.hpp>
#include <boost/scoped_ptr.hpp>
+#include "util/exception.hh"
+
#include "BleuScorer.h"
-#include "HypPackEnumerator.h"
+#include "HopeFearDecoder.h"
#include "MiraFeatureVector.h"
#include "MiraWeightVector.h"
@@ -49,38 +51,16 @@ using namespace MosesTuning;
namespace po = boost::program_options;
-ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv)
-{
- vector<ValType> stats(kBleuNgramOrder*2+1,0);
- for(train->reset(); !train->finished(); train->next()) {
- // Find max model
- size_t max_index=0;
- ValType max_score=0;
- for(size_t i=0; i<train->cur_size(); i++) {
- MiraFeatureVector vec(train->featuresAt(i));
- ValType score = wv.score(vec);
- if(i==0 || score > max_score) {
- max_index = i;
- max_score = score;
- }
- }
- // Update stats
- const vector<float>& sent = train->scoresAt(max_index);
- for(size_t i=0; i<sent.size(); i++) {
- stats[i]+=sent[i];
- }
- }
- return unsmoothedBleu(stats);
-}
-
int main(int argc, char** argv)
{
- const ValType BLEU_RATIO = 5;
bool help;
string denseInitFile;
string sparseInitFile;
+ string type = "nbest";
vector<string> scoreFiles;
vector<string> featureFiles;
+ vector<string> referenceFiles; //for hg mira
+ string hgDir;
int seed;
string outputFile;
float c = 0.01; // Step-size cap C
@@ -91,25 +71,30 @@ int main(int argc, char** argv)
bool model_bg = false; // Use model for background corpus
bool verbose = false; // Verbose updates
bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
+ size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word
// Command-line processing follows pro.cpp
po::options_description desc("Allowed options");
desc.add_options()
("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+ ("type,t", po::value<string>(&type), "Either nbest or hypergraph")
("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
+ ("hgdir,H", po::value<string> (&hgDir), "Directory containing hypergraphs")
+ ("reference,R", po::value<vector<string> > (&referenceFiles), "Reference files, only required for hypergraph mira")
("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
("output-file,o", po::value<string>(&outputFile), "Output file")
("cparam,C", po::value<float>(&c), "MIRA C-parameter, lower for more regularization (default 0.01)")
("decay,D", po::value<float>(&decay), "BLEU background corpus decay rate (default 0.999)")
("iters,J", po::value<int>(&n_iters), "Number of MIRA iterations to run (default 60)")
- ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features")
+ ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features. This should have 'name= value' on each line, or (legacy) should be the Moses mert 'init.opt' format.")
("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background")
("verbose", po::value(&verbose)->zero_tokens()->default_value(false), "Verbose updates")
("safe-hope", po::value(&safe_hope)->zero_tokens()->default_value(false), "Mode score's influence on hope decoding is limited")
+ ("hg-prune", po::value<size_t>(&hgPruning), "Prune hypergraphs to have this many edges per reference word")
;
po::options_description cmdline_options;
@@ -145,12 +130,56 @@ int main(int argc, char** argv)
cerr << "could not open dense initfile: " << denseInitFile << endl;
exit(3);
}
+ if (verbose) cerr << "Reading dense features:" << endl;
parameter_t val;
getline(opt,buffer);
- istringstream strstrm(buffer);
- while(strstrm >> val) {
- initParams.push_back(val);
+ if (buffer.find_first_of("=") == buffer.npos) {
+ UTIL_THROW_IF(type == "hypergraph", util::Exception, "For hypergraph version, require dense features in 'name= value' format");
+ cerr << "WARN: dense features in deprecated Moses mert format. Prefer 'name= value' format." << endl;
+ istringstream strstrm(buffer);
+ while(strstrm >> val) {
+ initParams.push_back(val);
+ if(verbose) cerr << val << endl;
+ }
+ } else {
+ vector<string> names;
+ string last_name = "";
+ size_t feature_ctr = 0;
+ do {
+ size_t equals = buffer.find_last_of("=");
+ UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
+ << buffer << "'");
+ string name = buffer.substr(0,equals);
+ names.push_back(name);
+ initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));
+
+ //Names for features with several values need to have their id added
+ if (name != last_name) feature_ctr = 0;
+ last_name = name;
+ if (feature_ctr) {
+ stringstream namestr;
+ namestr << names.back() << feature_ctr;
+ names[names.size()-1] = namestr.str();
+ if (feature_ctr == 1) {
+ stringstream namestr;
+ namestr << names[names.size()-2] << (feature_ctr-1);
+ names[names.size()-2] = namestr.str();
+ }
+ }
+ ++feature_ctr;
+
+ } while(getline(opt,buffer));
+
+
+ //Make sure that SparseVector encodes dense feature names as 0..n-1.
+ for (size_t i = 0; i < names.size(); ++i) {
+ size_t id = SparseVector::encode(names[i]);
+ assert(id == i);
+ if (verbose) cerr << names[i] << " " << initParams[i] << endl;
+ }
+
}
+
opt.close();
}
size_t initDenseSize = initParams.size();
@@ -188,82 +217,45 @@ int main(int argc, char** argv)
}
bg.push_back(kBleuNgramOrder);
+ boost::scoped_ptr<HopeFearDecoder> decoder;
+ if (type == "nbest") {
+ decoder.reset(new NbestHopeFearDecoder(featureFiles, scoreFiles, streaming, no_shuffle, safe_hope));
+ } else if (type == "hypergraph") {
+ decoder.reset(new HypergraphHopeFearDecoder(hgDir, referenceFiles, initDenseSize, streaming, no_shuffle, safe_hope, hgPruning, wv));
+ } else {
+ UTIL_THROW(util::Exception, "Unknown batch mira type: '" << type << "'");
+ }
+
// Training loop
- boost::scoped_ptr<HypPackEnumerator> train;
- if(streaming)
- train.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
- else
- train.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
- cerr << "Initial BLEU = " << evaluate(train.get(), wv.avg()) << endl;
+ cerr << "Initial BLEU = " << decoder->Evaluate(wv.avg()) << endl;
ValType bestBleu = 0;
for(int j=0; j<n_iters; j++) {
// MIRA train for one epoch
- int iNumHyps = 0;
int iNumExamples = 0;
int iNumUpdates = 0;
ValType totalLoss = 0.0;
- for(train->reset(); !train->finished(); train->next()) {
- // Hope / fear decode
- ValType hope_scale = 1.0;
- size_t hope_index=0, fear_index=0, model_index=0;
- ValType hope_score=0, fear_score=0, model_score=0;
- int iNumHypsBackup = iNumHyps;
- for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
- iNumHyps = iNumHypsBackup;
- ValType hope_bleu, hope_model;
- for(size_t i=0; i< train->cur_size(); i++) {
- const MiraFeatureVector& vec=train->featuresAt(i);
- ValType score = wv.score(vec);
- ValType bleu = sentenceLevelBackgroundBleu(train->scoresAt(i),bg);
- // Hope
- if(i==0 || (hope_scale*score + bleu) > hope_score) {
- hope_score = hope_scale*score + bleu;
- hope_index = i;
- hope_bleu = bleu;
- hope_model = score;
- }
- // Fear
- if(i==0 || (score - bleu) > fear_score) {
- fear_score = score - bleu;
- fear_index = i;
- }
- // Model
- if(i==0 || score > model_score) {
- model_score = score;
- model_index = i;
- }
- iNumHyps++;
- }
- // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
- // where model score is having far more influence than BLEU
- hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
- if(safe_hope && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
- hope_scale = abs(hope_bleu) / abs(hope_model);
- else break;
- }
+ size_t sentenceIndex = 0;
+ for(decoder->reset();!decoder->finished(); decoder->next()) {
+ HopeFearData hfd;
+ decoder->HopeFear(bg,wv,&hfd);
+
// Update weights
- if(hope_index!=fear_index) {
+ if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
// Vector difference
- const MiraFeatureVector& hope=train->featuresAt(hope_index);
- const MiraFeatureVector& fear=train->featuresAt(fear_index);
- MiraFeatureVector diff = hope - fear;
+ MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
// Bleu difference
- const vector<float>& hope_stats = train->scoresAt(hope_index);
- ValType hopeBleu = sentenceLevelBackgroundBleu(hope_stats, bg);
- const vector<float>& fear_stats = train->scoresAt(fear_index);
- ValType fearBleu = sentenceLevelBackgroundBleu(fear_stats, bg);
- assert(hopeBleu + 1e-8 >= fearBleu);
- ValType delta = hopeBleu - fearBleu;
+ //assert(hfd.hopeBleu + 1e-8 >= hfd.fearBleu);
+ ValType delta = hfd.hopeBleu - hfd.fearBleu;
// Loss and update
ValType diff_score = wv.score(diff);
ValType loss = delta - diff_score;
if(verbose) {
- cerr << "Updating sent " << train->cur_id() << endl;
+ cerr << "Updating sent " << sentenceIndex << endl;
cerr << "Wght: " << wv << endl;
- cerr << "Hope: " << hope << " BLEU:" << hopeBleu << " Score:" << wv.score(hope) << endl;
- cerr << "Fear: " << fear << " BLEU:" << fearBleu << " Score:" << wv.score(fear) << endl;
+ cerr << "Hope: " << hfd.hopeFeatures << " BLEU:" << hfd.hopeBleu << " Score:" << wv.score(hfd.hopeFeatures) << endl;
+ cerr << "Fear: " << hfd.fearFeatures << " BLEU:" << hfd.fearBleu << " Score:" << wv.score(hfd.fearFeatures) << endl;
cerr << "Diff: " << diff << " BLEU:" << delta << " Score:" << diff_score << endl;
- cerr << "Loss: " << loss << " Scale: " << hope_scale << endl;
+ cerr << "Loss: " << loss << " Scale: " << 1 << endl;
cerr << endl;
}
if(loss > 0) {
@@ -273,16 +265,16 @@ int main(int argc, char** argv)
iNumUpdates++;
}
// Update BLEU statistics
- const vector<float>& model_stats = train->scoresAt(model_index);
for(size_t k=0; k<bg.size(); k++) {
bg[k]*=decay;
if(model_bg)
- bg[k]+=model_stats[k];
+ bg[k]+=hfd.modelStats[k];
else
- bg[k]+=hope_stats[k];
+ bg[k]+=hfd.hopeStats[k];
}
}
iNumExamples++;
+ ++sentenceIndex;
}
// Training Epoch summary
cerr << iNumUpdates << "/" << iNumExamples << " updates"
@@ -291,15 +283,16 @@ int main(int argc, char** argv)
// Evaluate current average weights
AvgWeightVector avg = wv.avg();
- ValType bleu = evaluate(train.get(), avg);
+ ValType bleu = decoder->Evaluate(avg);
cerr << ", BLEU = " << bleu << endl;
if(bleu > bestBleu) {
+ /*
size_t num_dense = train->num_dense();
if(initDenseSize>0 && initDenseSize!=num_dense) {
cerr << "Error: Initial dense feature count and dense feature count from n-best do not match: "
<< initDenseSize << "!=" << num_dense << endl;
exit(1);
- }
+ }*/
// Write to a file
ostream* out;
ofstream outFile;
@@ -314,11 +307,11 @@ int main(int argc, char** argv)
out = &cout;
}
for(size_t i=0; i<avg.size(); i++) {
- if(i<num_dense)
+ if(i<initDenseSize)
*out << "F" << i << " " << avg.weight(i) << endl;
else {
if(abs(avg.weight(i))>1e-8)
- *out << SparseVector::decode(i-num_dense) << " " << avg.weight(i) << endl;
+ *out << SparseVector::decode(i-initDenseSize) << " " << avg.weight(i) << endl;
}
}
outFile.close();