From d122605c0d1a44d92526cccaeff640eee695aa62 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Tue, 12 May 2015 02:28:58 +0100 Subject: Code reorganization with respect to hypergraph output. --- moses/BaseManager.cpp | 69 +++++++++++++++++++- moses/BaseManager.h | 7 +- moses/ChartManager.cpp | 22 ++++--- moses/ChartManager.h | 2 +- moses/HypergraphOutput.cpp | 157 +++++++-------------------------------------- moses/IOWrapper.cpp | 62 +++++++++++++----- moses/IOWrapper.h | 7 ++ moses/Incremental.h | 7 +- moses/Manager.cpp | 21 +++--- moses/Manager.h | 2 +- moses/Syntax/Manager.h | 8 ++- moses/TranslationTask.cpp | 7 +- 12 files changed, 193 insertions(+), 178 deletions(-) diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp index 83d48e6e4..70bacdede 100644 --- a/moses/BaseManager.cpp +++ b/moses/BaseManager.cpp @@ -1,11 +1,17 @@ -#include - -#include "StaticData.h" #include "BaseManager.h" +#include "StaticData.h" #include "moses/FF/StatelessFeatureFunction.h" #include "moses/FF/StatefulFeatureFunction.h" #include "moses/TranslationTask.h" +#include +#include +#include +#include +#include +#include +#include + using namespace std; namespace Moses @@ -21,6 +27,63 @@ BaseManager::GetSource() const return m_source; } +void +BaseManager:: +OutputSearchGraphAsHypergraph(std::ostream& out) const +{ + // This virtual function that may not be implemented everywhere, but it should for + // derived classes that use it + UTIL_THROW2("Not implemented."); +} + +void +BaseManager:: +OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision) const +{ + std::string odir = boost::filesystem::path(fname).parent_path().string(); + if (! boost::filesystem::exists(odir)) + boost::filesystem::create_directory(odir); + UTIL_THROW_IF2(!boost::filesystem::is_directory(odir), + "Cannot output hypergraphs to " << odir + << " because that path exists but is not a directory."); + + // not clear why we need to output the weights every time we dump a search + // graph into a file again, but that's what the old code did. + + string weightsFile = odir + "/weights"; + TRACE_ERR("The weights file is " << weightsFile << "\n"); + ofstream weightsOut; + weightsOut.open(weightsFile.c_str()); + weightsOut.setf(std::ios::fixed); + weightsOut.precision(6); + // just temporarily, till we've implemented weight scoring in the manager + // (or the translation task) + StaticData::Instance().GetAllWeights().Save(weightsOut); + weightsOut.close(); + + boost::iostreams::filtering_ostream file; + if (boost::ends_with(fname, ".gz")) + file.push(boost::iostreams::gzip_compressor()); + else if (boost::ends_with(fname, ".bz2")) + file.push( boost::iostreams::bzip2_compressor() ); + file.push( boost::iostreams::file_sink(fname, ios_base::out) ); + if (file.is_complete() && file.good()) + { + file.setf(std::ios::fixed); + file.precision(precision); + this->OutputSearchGraphAsHypergraph(file); + file.flush(); + } + else + { + TRACE_ERR("Cannot output hypergraph for line " + << this->GetSource().GetTranslationId() + << " because the output file " << fname + << " is not open or not ready for writing" + << std::endl); + } + file.pop(); +} diff --git a/moses/BaseManager.h b/moses/BaseManager.h index d7a25e7fd..c24e41ad3 100644 --- a/moses/BaseManager.h +++ b/moses/BaseManager.h @@ -63,13 +63,16 @@ public: virtual void OutputSearchGraph(OutputCollector *collector) const = 0; virtual void OutputUnknowns(OutputCollector *collector) const = 0; virtual void OutputSearchGraphSLF() const = 0; - virtual void OutputSearchGraphHypergraph() const = 0; + // virtual void OutputSearchGraphHypergraph() const = 0; + virtual void OutputSearchGraphAsHypergraph(std::ostream& out) const; + virtual void OutputSearchGraphAsHypergraph(std::string const& fname, + size_t const precision) const; /*** * to be called after processing a sentence */ virtual void CalcDecoderStatistics() const = 0; - + }; } diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp index a0b39167a..9170b2a7c 100644 --- a/moses/ChartManager.cpp +++ b/moses/ChartManager.cpp @@ -291,9 +291,11 @@ void ChartManager::FindReachableHypotheses( } } -void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const +void +ChartManager:: +OutputSearchGraphAsHypergraph(std::ostream& out) const { - ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream); + ChartSearchGraphWriterHypergraph writer(&out); WriteSearchGraph(writer); } @@ -812,14 +814,14 @@ void ChartManager::OutputDetailedAllTranslationReport( collector->Write(translationId, out.str()); } -void ChartManager::OutputSearchGraphHypergraph() const -{ - const StaticData &staticData = StaticData::Instance(); - if (staticData.GetOutputSearchGraphHypergraph()) { - HypergraphOutput hypergraphOutputChart(PRECISION); - hypergraphOutputChart.Write(*this); - } -} +// void ChartManager::OutputSearchGraphHypergraph() const +// { +// const StaticData &staticData = StaticData::Instance(); +// if (staticData.GetOutputSearchGraphHypergraph()) { +// HypergraphOutput hypergraphOutputChart(PRECISION); +// hypergraphOutputChart.Write(*this); +// } +// } void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const { diff --git a/moses/ChartManager.h b/moses/ChartManager.h index bf5851806..9d5485fb9 100644 --- a/moses/ChartManager.h +++ b/moses/ChartManager.h @@ -154,7 +154,7 @@ public: void OutputSearchGraph(OutputCollector *collector) const; void OutputSearchGraphSLF() const { } - void OutputSearchGraphHypergraph() const; + // void OutputSearchGraphHypergraph() const; }; diff --git a/moses/HypergraphOutput.cpp b/moses/HypergraphOutput.cpp index 6b353a83b..412f92578 100644 --- a/moses/HypergraphOutput.cpp +++ b/moses/HypergraphOutput.cpp @@ -44,128 +44,13 @@ using namespace std; namespace Moses { -template -HypergraphOutput::HypergraphOutput(size_t precision) : - m_precision(precision) -{ - const StaticData& staticData = StaticData::Instance(); - vector hypergraphParameters; - const PARAM_VEC *params = staticData.GetParameter().GetParam("output-search-graph-hypergraph"); - if (params) { - hypergraphParameters = *params; - } - - if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") { - m_appendSuffix = true; - } else { - m_appendSuffix = false; - } - - string compression; - if (hypergraphParameters.size() > 1) { - m_compression = hypergraphParameters[1]; - } else { - m_compression = "txt"; - } - UTIL_THROW_IF(m_compression != "txt" && m_compression != "gz" && m_compression != "bz2", - util::Exception, "Unknown compression type: " << m_compression); - - if ( hypergraphParameters.size() > 2 ) { - m_hypergraphDir = hypergraphParameters[2]; - } else { - string nbestFile = staticData.GetNBestFilePath(); - if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) { - boost::filesystem::path nbestPath(nbestFile); - - // In the Boost filesystem API version 2, - // which was the default prior to Boost 1.46, - // the filename() method returned a string. - // - // In the Boost filesystem API version 3, - // which is the default starting with Boost 1.46, - // the filename() method returns a path object. - // - // To get a string from the path object, - // the native() method must be called. - // hypergraphDir = nbestPath.parent_path().filename() - //#if BOOST_VERSION >= 104600 - // .native() - //#endif - //; - - // Hopefully the following compiles under all versions of Boost. - // - // If this line gives you compile errors, - // contact Lane Schwartz on the Moses mailing list - m_hypergraphDir = nbestPath.parent_path().string(); - if (m_hypergraphDir.empty()) m_hypergraphDir="."; - - } else { - stringstream hypergraphDirName; - hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph"; - m_hypergraphDir = hypergraphDirName.str(); - } - } - - if ( ! boost::filesystem::exists(m_hypergraphDir) ) { - boost::filesystem::create_directory(m_hypergraphDir); - } - - UTIL_THROW_IF(!boost::filesystem::is_directory(m_hypergraphDir), - util::Exception, "Cannot output hypergraphs to " << m_hypergraphDir << " because that path exists, but is not a directory"); - - - ofstream weightsOut; - stringstream weightsFilename; - weightsFilename << m_hypergraphDir << "/weights"; - - TRACE_ERR("The weights file is " << weightsFilename.str() << "\n"); - weightsOut.open(weightsFilename.str().c_str()); - weightsOut.setf(std::ios::fixed); - weightsOut.precision(6); - staticData.GetAllWeights().Save(weightsOut); - weightsOut.close(); -} - -template -void HypergraphOutput::Write(const M& manager) const -{ - - stringstream fileName; - fileName << m_hypergraphDir << "/" << manager.GetSource().GetTranslationId(); - if ( m_appendSuffix ) { - fileName << "." << m_compression; - } - boost::iostreams::filtering_ostream file; - - if ( m_compression == "gz" ) { - file.push( boost::iostreams::gzip_compressor() ); - } else if ( m_compression == "bz2" ) { - file.push( boost::iostreams::bzip2_compressor() ); - } - - file.push( boost::iostreams::file_sink(fileName.str(), ios_base::out) ); - - if (file.is_complete() && file.good()) { - file.setf(std::ios::fixed); - file.precision(m_precision); - manager.OutputSearchGraphAsHypergraph(file); - file.flush(); - } else { - TRACE_ERR("Cannot output hypergraph for line " << manager.GetSource().GetTranslationId() - << " because the output file " << fileName.str() - << " is not open or not ready for writing" - << std::endl); - } - file.pop(); -} - template class HypergraphOutput; template class HypergraphOutput; - -void ChartSearchGraphWriterMoses::WriteHypos -(const ChartHypothesisCollection& hypos, const map &reachable) const +void +ChartSearchGraphWriterMoses:: +WriteHypos(const ChartHypothesisCollection& hypos, + const map &reachable) const { ChartHypothesisCollection::const_iterator iter; @@ -177,28 +62,31 @@ void ChartSearchGraphWriterMoses::WriteHypos } const ChartArcList *arcList = mainHypo.GetArcList(); - if (arcList) { - ChartArcList::const_iterator iterArc; - for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) { - const ChartHypothesis &arc = **iterArc; - if (reachable.find(arc.GetId()) != reachable.end()) { - (*m_out) << m_lineNumber << " " << arc << endl; - } + if (arcList) + { + ChartArcList::const_iterator iterArc; + for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) + { + const ChartHypothesis &arc = **iterArc; + if (reachable.find(arc.GetId()) != reachable.end()) + (*m_out) << m_lineNumber << " " << arc << endl; + } } - } } - } -void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers) const -{ +void +ChartSearchGraphWriterHypergraph:: +WriteHeader(size_t winners, size_t losers) const +{ (*m_out) << "# target ||| features ||| source-covered" << endl; (*m_out) << winners << " " << (winners+losers) << endl; - } -void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollection& hypos, - const map &reachable) const +void +ChartSearchGraphWriterHypergraph:: +WriteHypos(const ChartHypothesisCollection& hypos, + const map &reachable) const { ChartHypothesisCollection::const_iterator iter; @@ -225,7 +113,8 @@ void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollectio } } (*m_out) << edges.size() << endl; - for (vector::const_iterator ei = edges.begin(); ei != edges.end(); ++ei) { + for (vector::const_iterator ei = edges.begin(); + ei != edges.end(); ++ei) { const ChartHypothesis* hypo = *ei; const TargetPhrase& target = hypo->GetCurrTargetPhrase(); size_t ntIndex = 0; diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp index 63c8ab5e0..3e7b916ca 100644 --- a/moses/IOWrapper.cpp +++ b/moses/IOWrapper.cpp @@ -64,30 +64,33 @@ POSSIBILITY OF SUCH DAMAGE. #include "IOWrapper.h" +#include +#include +#include +#include +#include +#include + using namespace std; namespace Moses { IOWrapper::IOWrapper() - :m_nBestStream(NULL) - - ,m_outputWordGraphStream(NULL) - ,m_outputSearchGraphStream(NULL) - ,m_detailedTranslationReportingStream(NULL) - ,m_unknownsStream(NULL) - ,m_alignmentInfoStream(NULL) - ,m_latticeSamplesStream(NULL) - - ,m_surpressSingleBestOutput(false) - + : m_nBestStream(NULL) + , m_outputWordGraphStream(NULL) + , m_outputSearchGraphStream(NULL) + , m_detailedTranslationReportingStream(NULL) + , m_unknownsStream(NULL) + , m_alignmentInfoStream(NULL) + , m_latticeSamplesStream(NULL) + , m_surpressSingleBestOutput(false) , m_look_ahead(0) , m_look_back(0) , m_buffered_ahead(0) - - ,spe_src(NULL) - ,spe_trg(NULL) - ,spe_aln(NULL) + , spe_src(NULL) + , spe_trg(NULL) + , spe_aln(NULL) { const StaticData &staticData = StaticData::Instance(); @@ -214,6 +217,26 @@ IOWrapper::IOWrapper() m_singleBestOutputCollector.reset(new Moses::OutputCollector(&std::cout)); } + // setup file pattern for hypergraph output + char const* key = "output-search-graph-hypergraph"; + PARAM_VEC const* p = staticData.GetParameter().GetParam(key); + std::string& fmt = m_hypergraph_output_filepattern; + // first, determine the output directory + if (p && p->size() > 2) fmt = p->at(2); + else if (nBestFilePath.size() && nBestFilePath != "-" && + ! boost::starts_with(nBestFilePath, "/dev/stdout")) + { + fmt = boost::filesystem::path(nBestFilePath).parent_path().string(); + if (fmt.empty()) fmt = "."; + } + else fmt = boost::filesystem::current_path().string() + "/hypergraph"; + if (*fmt.rbegin() != '/') fmt += "/"; + std::string extension = (p && p->size() > 1 ? p->at(1) : std::string("txt")); + UTIL_THROW_IF2(extension != "txt" && extension != "gz" && extension != "bz2", + "Unknown compression type '" << extension + << "' for hypergraph output!"); + fmt += string("%d.") + extension; + if (staticData.GetParameter().GetParam("spe-src")) { spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str()); spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str()); @@ -333,5 +356,14 @@ set_context_for(InputType& source) } + +std::string +IOWrapper:: +GetHypergraphOutputFileName(size_t const id) const +{ + return str(boost::format(m_hypergraph_output_filepattern) % id); +} + + } // namespace diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h index b15fd631c..07acf6e77 100644 --- a/moses/IOWrapper.h +++ b/moses/IOWrapper.h @@ -63,6 +63,8 @@ POSSIBILITY OF SUCH DAMAGE. #include "search/applied.hh" +#include + namespace Moses { class ScoreComponentCollection; @@ -119,6 +121,9 @@ protected: size_t m_buffered_ahead; /// number of words buffered ahead // For context-sensitive decoding: // Number of context words ahead and before the current sentence. + + std::string m_hypergraph_output_filepattern; + public: IOWrapper(); ~IOWrapper(); @@ -173,6 +178,8 @@ public: m_inputStream = &input; } + std::string GetHypergraphOutputFileName(size_t const id) const; + // post editing std::ifstream *spe_src, *spe_trg, *spe_aln; diff --git a/moses/Incremental.h b/moses/Incremental.h index 91b5dc5a0..3f679d57c 100644 --- a/moses/Incremental.h +++ b/moses/Incremental.h @@ -56,8 +56,11 @@ public: } void OutputSearchGraphSLF() const { } - void OutputSearchGraphHypergraph() const { - } + + void + OutputSearchGraphAsHypergraph + ( std::string const& fname, size_t const precision ) const + { } private: diff --git a/moses/Manager.cpp b/moses/Manager.cpp index bb27e368b..9d56d4cfa 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -1974,14 +1974,19 @@ void Manager::OutputSearchGraphSLF() const } -void Manager::OutputSearchGraphHypergraph() const -{ - const StaticData &staticData = StaticData::Instance(); - if (staticData.GetOutputSearchGraphHypergraph()) { - HypergraphOutput hypergraphOutput(PRECISION); - hypergraphOutput.Write(*this); - } -} +// void Manager::OutputSearchGraphHypergraph() const +// { +// const StaticData &staticData = StaticData::Instance(); +// if (!staticData.GetOutputSearchGraphHypergraph()) return; + +// static char const* key = "output-search-graph-hypergraph"; +// PARAM_VEC const* p = staticData.GetParameter().GetParam(key); +// ScoreComponentCollection const& weights = staticData.GetAllWeights(); +// string const& nBestFile = staticData.GetNBestFilePath(); +// HypergraphOutput hypergraphOutput(PRECISION, p, nBestFile, weights); +// hypergraphOutput.Write(*this); + +// } void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector& solutions,long translationId) const { diff --git a/moses/Manager.h b/moses/Manager.h index 398d456c6..25d84a784 100644 --- a/moses/Manager.h +++ b/moses/Manager.h @@ -209,7 +209,7 @@ public: void OutputWordGraph(OutputCollector *collector) const; void OutputSearchGraph(OutputCollector *collector) const; void OutputSearchGraphSLF() const; - void OutputSearchGraphHypergraph() const; + // void OutputSearchGraphHypergraph() const; }; diff --git a/moses/Syntax/Manager.h b/moses/Syntax/Manager.h index ed36c7c1d..8e26e0679 100644 --- a/moses/Syntax/Manager.h +++ b/moses/Syntax/Manager.h @@ -29,7 +29,13 @@ public: OutputCollector *collector) const {} void OutputLatticeSamples(OutputCollector *collector) const {} void OutputSearchGraph(OutputCollector *collector) const {} - void OutputSearchGraphHypergraph() const {} + // void OutputSearchGraphHypergraph() const {} + + void + OutputSearchGraphAsHypergraph + ( std::string const& fname, size_t const precision ) const + { } + void OutputSearchGraphSLF() const {} void OutputWordGraph(OutputCollector *collector) const {} void OutputDetailedTranslationReport(OutputCollector *collector) const {} diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index 3794d35e7..0b425ef23 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -178,7 +178,12 @@ void TranslationTask::Run() // Output search graph in hypergraph format for Kenneth Heafield's // lazy hypergraph decoder; writes to stderr - manager->OutputSearchGraphHypergraph(); + if (StaticData::Instance().GetOutputSearchGraphHypergraph()) + { + size_t transId = manager->GetSource().GetTranslationId(); + string fname = io->GetHypergraphOutputFileName(transId); + manager->OutputSearchGraphAsHypergraph(fname, PRECISION); + } additionalReportingTime.stop(); -- cgit v1.2.3