Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorBarry Haddow <barry.haddow@gmail.com>2014-08-04 23:51:45 +0400
committerBarry Haddow <barry.haddow@gmail.com>2014-08-04 23:51:45 +0400
commit05455eb0c116a979224f0ed397e30f7cfada4909 (patch)
tree4ef25d49e4ad12331a414aca0b4f8fb807312a89 /mert
parente2e07940ae4803ad3486560d857f81ef00304214 (diff)
Implement shuffling correctly
Diffstat (limited to 'mert')
-rw-r--r--mert/HopeFearDecoder.cpp38
-rw-r--r--mert/HopeFearDecoder.h3
2 files changed, 20 insertions, 21 deletions
diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
index ab7c1a644..23ba1e099 100644
--- a/mert/HopeFearDecoder.cpp
+++ b/mert/HopeFearDecoder.cpp
@@ -173,21 +173,14 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
static const string kWeights = "weights";
fs::directory_iterator dend;
size_t fileCount = 0;
- vector<fs::path> hypergraphFiles;
- for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
- if (di->path().filename() == kWeights) continue;
- hypergraphFiles.push_back(di->path());
- }
-
- if (!no_shuffle) {
- random_shuffle(hypergraphFiles.begin(), hypergraphFiles.end());
- }
- cerr << "Reading " << hypergraphFiles.size() << " hypergraphs" << endl;
- for (vector<fs::path>::const_iterator di = hypergraphFiles.begin(); di != hypergraphFiles.end(); ++di) {
+ cerr << "Reading hypergraphs" << endl;
+ for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
+ const fs::path& hgpath = di->path();
+ if (hgpath.filename() == kWeights) continue;
Graph graph(vocab_);
- size_t id = boost::lexical_cast<size_t>(di->stem().string());
- util::scoped_fd fd(util::OpenReadOrThrow(di->string().c_str()));
+ size_t id = boost::lexical_cast<size_t>(hgpath.stem().string());
+ util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str()));
//util::FilePiece file(di->path().string().c_str());
util::FilePiece file(fd.release());
ReadGraph(file,graph);
@@ -205,19 +198,24 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
}
cerr << endl << "Done" << endl;
+ sentenceIds_.resize(graphs_.size());
+ for (size_t i = 0; i < graphs_.size(); ++i) sentenceIds_[i] = i;
+ if (!no_shuffle) {
+ random_shuffle(sentenceIds_.begin(), sentenceIds_.end());
+ }
}
void HypergraphHopeFearDecoder::reset() {
- graphIter_ = graphs_.begin();
+ sentenceIdIter_ = sentenceIds_.begin();
}
void HypergraphHopeFearDecoder::next() {
- ++graphIter_;
+ sentenceIdIter_++;
}
bool HypergraphHopeFearDecoder::finished() {
- return graphIter_ == graphs_.end();
+ return sentenceIdIter_ == sentenceIds_.end();
}
void HypergraphHopeFearDecoder::HopeFear(
@@ -225,10 +223,10 @@ void HypergraphHopeFearDecoder::HopeFear(
const MiraWeightVector& wv,
HopeFearData* hopeFear
) {
- size_t sentenceId = graphIter_->first;
+ size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
wv.ToSparse(&weights);
- const Graph& graph = *(graphIter_->second);
+ const Graph& graph = *(graphs_[sentenceId]);
ValType hope_scale = 1.0;
HgHypothesis hopeHypo, fearHypo, modelHypo;
@@ -319,11 +317,11 @@ void HypergraphHopeFearDecoder::HopeFear(
void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
assert(!finished());
HgHypothesis bestHypo;
- size_t sentenceId = graphIter_->first;
+ size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
wv.ToSparse(&weights);
vector<ValType> bg(kBleuNgramOrder*2+1);
- Viterbi(*(graphIter_->second), weights, 0, references_, sentenceId, bg, &bestHypo);
+ Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo);
stats->resize(bestHypo.bleuStats.size());
/*
for (size_t i = 0; i < bestHypo.text.size(); ++i) {
diff --git a/mert/HopeFearDecoder.h b/mert/HopeFearDecoder.h
index e8323fc76..694a3217e 100644
--- a/mert/HopeFearDecoder.h
+++ b/mert/HopeFearDecoder.h
@@ -140,7 +140,8 @@ private:
//maps sentence Id to graph ptr
typedef std::map<size_t, boost::shared_ptr<Graph> > GraphColl;
GraphColl graphs_;
- GraphColl::const_iterator graphIter_;
+ std::vector<size_t> sentenceIds_;
+ std::vector<size_t>::const_iterator sentenceIdIter_;
ReferenceSet references_;
Vocab vocab_;
};