/*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2014- University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include #include #include #define BOOST_FILESYSTEM_VERSION 3 #include #include #include "util/exception.hh" #include "util/file_piece.hh" #include "Scorer.h" #include "HopeFearDecoder.h" using namespace std; namespace fs = boost::filesystem; namespace MosesTuning { static const ValType BLEU_RATIO = 5; std::pair InitialiseWeights(const string& denseInitFile, const string& sparseInitFile, const string& type, bool verbose) { // Dense vector initParams; if(!denseInitFile.empty()) { ifstream opt(denseInitFile.c_str()); string buffer; if (opt.fail()) { cerr << "could not open dense initfile: " << denseInitFile << endl; exit(3); } if (verbose) cerr << "Reading dense features:" << endl; parameter_t val; getline(opt,buffer); if (buffer.find_first_of("=") == buffer.npos) { UTIL_THROW_IF(type == "hypergraph", util::Exception, "For hypergraph version, require dense features in 'name= value' format"); cerr << "WARN: dense features in deprecated Moses mert format. Prefer 'name= value' format." << endl; istringstream strstrm(buffer); while(strstrm >> val) { initParams.push_back(val); if(verbose) cerr << val << endl; } } else { vector names; string last_name = ""; size_t feature_ctr = 1; do { size_t equals = buffer.find_last_of("="); UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '" << buffer << "'"); string name = buffer.substr(0,equals); names.push_back(name); initParams.push_back(boost::lexical_cast(buffer.substr(equals+2))); //Names for features with several values need to have their id added if (name != last_name) feature_ctr = 1; last_name = name; if (feature_ctr>1) { stringstream namestr; namestr << names.back() << "_" << feature_ctr; names[names.size()-1] = namestr.str(); if (feature_ctr == 2) { stringstream namestr; namestr << names[names.size()-2] << "_" << (feature_ctr-1); names[names.size()-2] = namestr.str(); } } ++feature_ctr; } while(getline(opt,buffer)); //Make sure that SparseVector encodes dense feature names as 0..n-1. for (size_t i = 0; i < names.size(); ++i) { size_t id = SparseVector::encode(names[i]); assert(id == i); if (verbose) cerr << names[i] << " " << initParams[i] << endl; } } opt.close(); } size_t initDenseSize = initParams.size(); // Sparse if(!sparseInitFile.empty()) { if(initDenseSize==0) { cerr << "sparse initialization requires dense initialization" << endl; exit(3); } ifstream opt(sparseInitFile.c_str()); if(opt.fail()) { cerr << "could not open sparse initfile: " << sparseInitFile << endl; exit(3); } int sparseCount=0; parameter_t val; std::string name; while(opt >> name >> val) { size_t id = SparseVector::encode(name) + initDenseSize; while(initParams.size()<=id) initParams.push_back(0.0); initParams[id] = val; sparseCount++; } cerr << "Found " << sparseCount << " initial sparse features" << endl; opt.close(); } return pair(new MiraWeightVector(initParams), initDenseSize); } ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) { vector stats(scorer_->NumberOfScores(),0); for(reset(); !finished(); next()) { vector sent; MaxModel(wv,&sent); for(size_t i=0; icalculateScore(stats); } NbestHopeFearDecoder::NbestHopeFearDecoder( const vector& featureFiles, const vector& scoreFiles, bool streaming, bool no_shuffle, bool safe_hope, Scorer* scorer ) : safe_hope_(safe_hope) { scorer_ = scorer; if (streaming) { train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles)); } else { train_.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle)); } } void NbestHopeFearDecoder::next() { train_->next(); } bool NbestHopeFearDecoder::finished() { return train_->finished(); } void NbestHopeFearDecoder::reset() { train_->reset(); } void NbestHopeFearDecoder::HopeFear( const std::vector& backgroundBleu, const MiraWeightVector& wv, HopeFearData* hopeFear ) { // Hope / fear decode ValType hope_scale = 1.0; size_t hope_index=0, fear_index=0, model_index=0; ValType hope_score=0, fear_score=0, model_score=0; for(size_t safe_loop=0; safe_loop<2; safe_loop++) { ValType hope_bleu=0, hope_model=0; for(size_t i=0; i< train_->cur_size(); i++) { const MiraFeatureVector& vec=train_->featuresAt(i); ValType score = wv.score(vec); ValType bleu = scorer_->calculateSentenceLevelBackgroundScore(train_->scoresAt(i),backgroundBleu); // Hope if(i==0 || (hope_scale*score + bleu) > hope_score) { hope_score = hope_scale*score + bleu; hope_index = i; hope_bleu = bleu; hope_model = score; } // Fear if(i==0 || (score - bleu) > fear_score) { fear_score = score - bleu; fear_index = i; } // Model if(i==0 || score > model_score) { model_score = score; model_index = i; } } // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases // where model score is having far more influence than BLEU hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)modelFeatures = train_->featuresAt(model_index); hopeFear->hopeFeatures = train_->featuresAt(hope_index); hopeFear->fearFeatures = train_->featuresAt(fear_index); hopeFear->hopeStats = train_->scoresAt(hope_index); hopeFear->hopeBleu = scorer_->calculateSentenceLevelBackgroundScore(hopeFear->hopeStats, backgroundBleu); const vector& fear_stats = train_->scoresAt(fear_index); hopeFear->fearBleu = scorer_->calculateSentenceLevelBackgroundScore(fear_stats, backgroundBleu); hopeFear->modelStats = train_->scoresAt(model_index); hopeFear->hopeFearEqual = (hope_index == fear_index); } void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector* stats) { // Find max model size_t max_index=0; ValType max_score=0; for(size_t i=0; icur_size(); i++) { MiraFeatureVector vec(train_->featuresAt(i)); ValType score = wv.score(vec); if(i==0 || score > max_score) { max_index = i; max_score = score; } } *stats = train_->scoresAt(max_index); } HypergraphHopeFearDecoder::HypergraphHopeFearDecoder ( const string& hypergraphDir, const vector& referenceFiles, size_t num_dense, bool streaming, bool no_shuffle, bool safe_hope, size_t hg_pruning, const MiraWeightVector& wv, Scorer* scorer ) : num_dense_(num_dense) { UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs"); UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist"); UTIL_THROW_IF(!referenceFiles.size(), util::Exception, "No reference files supplied"); references_.Load(referenceFiles, vocab_); SparseVector weights; wv.ToSparse(&weights,num_dense_); scorer_ = scorer; static const string kWeights = "weights"; fs::directory_iterator dend; size_t fileCount = 0; cerr << "Reading hypergraphs" << endl; for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) { const fs::path& hgpath = di->path(); if (hgpath.filename() == kWeights) continue; // cerr << "Reading " << hgpath.filename() << endl; Graph graph(vocab_); size_t id = boost::lexical_cast(hgpath.stem().string()); util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str())); //util::FilePiece file(di->path().string().c_str()); util::FilePiece file(fd.release()); ReadGraph(file,graph); //cerr << "ref length " << references_.Length(id) << endl; size_t edgeCount = hg_pruning * references_.Length(id); boost::shared_ptr prunedGraph; prunedGraph.reset(new Graph(vocab_)); graph.Prune(prunedGraph.get(), weights, edgeCount); graphs_[id] = prunedGraph; // cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl; ++fileCount; if (fileCount % 10 == 0) cerr << "."; if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n"; } cerr << endl << "Done" << endl; sentenceIds_.resize(graphs_.size()); for (size_t i = 0; i < graphs_.size(); ++i) sentenceIds_[i] = i; if (!no_shuffle) { random_shuffle(sentenceIds_.begin(), sentenceIds_.end()); } } void HypergraphHopeFearDecoder::reset() { sentenceIdIter_ = sentenceIds_.begin(); } void HypergraphHopeFearDecoder::next() { sentenceIdIter_++; } bool HypergraphHopeFearDecoder::finished() { return sentenceIdIter_ == sentenceIds_.end(); } void HypergraphHopeFearDecoder::HopeFear( const vector& backgroundBleu, const MiraWeightVector& wv, HopeFearData* hopeFear ) { size_t sentenceId = *sentenceIdIter_; SparseVector weights; wv.ToSparse(&weights, num_dense_); const Graph& graph = *(graphs_[sentenceId]); // ValType hope_scale = 1.0; HgHypothesis hopeHypo, fearHypo, modelHypo; for(size_t safe_loop=0; safe_loop<2; safe_loop++) { //hope decode Viterbi(graph, weights, 1, references_, sentenceId, backgroundBleu, &hopeHypo); //fear decode Viterbi(graph, weights, -1, references_, sentenceId, backgroundBleu, &fearHypo); //Model decode Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo); // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases // where model score is having far more influence than BLEU // hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU // if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)modelFeatures = MiraFeatureVector(modelHypo.featureVector, num_dense_); hopeFear->hopeFeatures = MiraFeatureVector(hopeHypo.featureVector, num_dense_); hopeFear->fearFeatures = MiraFeatureVector(fearHypo.featureVector, num_dense_); //Need to know which are to be mapped to dense features! //Only C++11 //hopeFear->modelStats.assign(std::begin(modelHypo.bleuStats), std::end(modelHypo.bleuStats)); vector fearStats(scorer_->NumberOfScores()); hopeFear->hopeStats.reserve(scorer_->NumberOfScores()); hopeFear->modelStats.reserve(scorer_->NumberOfScores()); for (size_t i = 0; i < fearStats.size(); ++i) { hopeFear->modelStats.push_back(modelHypo.bleuStats[i]); hopeFear->hopeStats.push_back(hopeHypo.bleuStats[i]); fearStats[i] = fearHypo.bleuStats[i]; } /* cerr << "hope" << endl;; for (size_t i = 0; i < hopeHypo.text.size(); ++i) { cerr << hopeHypo.text[i]->first << " "; } cerr << endl; for (size_t i = 0; i < fearStats.size(); ++i) { cerr << hopeHypo.bleuStats[i] << " "; } cerr << endl; cerr << "fear"; for (size_t i = 0; i < fearHypo.text.size(); ++i) { cerr << fearHypo.text[i]->first << " "; } cerr << endl; for (size_t i = 0; i < fearStats.size(); ++i) { cerr << fearHypo.bleuStats[i] << " "; } cerr << endl; cerr << "model"; for (size_t i = 0; i < modelHypo.text.size(); ++i) { cerr << modelHypo.text[i]->first << " "; } cerr << endl; for (size_t i = 0; i < fearStats.size(); ++i) { cerr << modelHypo.bleuStats[i] << " "; } cerr << endl; */ hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu); hopeFear->fearBleu = sentenceLevelBackgroundBleu(fearStats, backgroundBleu); //If fv and bleu stats are equal, then assume equal hopeFear->hopeFearEqual = true; //(hopeFear->hopeBleu - hopeFear->fearBleu) >= 1e-8; if (hopeFear->hopeFearEqual) { for (size_t i = 0; i < fearStats.size(); ++i) { if (fearStats[i] != hopeFear->hopeStats[i]) { hopeFear->hopeFearEqual = false; break; } } } hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures); } void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector* stats) { assert(!finished()); HgHypothesis bestHypo; size_t sentenceId = *sentenceIdIter_; SparseVector weights; wv.ToSparse(&weights, num_dense_); vector bg(scorer_->NumberOfScores()); //cerr << "Calculating bleu on " << sentenceId << endl; Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo); stats->resize(bestHypo.bleuStats.size()); /* for (size_t i = 0; i < bestHypo.text.size(); ++i) { cerr << bestHypo.text[i]->first << " "; } cerr << endl; */ for (size_t i = 0; i < bestHypo.bleuStats.size(); ++i) { (*stats)[i] = bestHypo.bleuStats[i]; } } };