Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2015-05-12 04:28:58 +0300
committerUlrich Germann <ugermann@inf.ed.ac.uk>2015-05-12 04:28:58 +0300
commitd122605c0d1a44d92526cccaeff640eee695aa62 (patch)
tree74b2de0c05adf515aa684fc7713993f5d618aa1b
parentf087fce65eae426fa38efdea55405753992f10f7 (diff)
Code reorganization with respect to hypergraph output.
-rw-r--r--moses/BaseManager.cpp69
-rw-r--r--moses/BaseManager.h7
-rw-r--r--moses/ChartManager.cpp22
-rw-r--r--moses/ChartManager.h2
-rw-r--r--moses/HypergraphOutput.cpp157
-rw-r--r--moses/IOWrapper.cpp62
-rw-r--r--moses/IOWrapper.h7
-rw-r--r--moses/Incremental.h7
-rw-r--r--moses/Manager.cpp21
-rw-r--r--moses/Manager.h2
-rw-r--r--moses/Syntax/Manager.h8
-rw-r--r--moses/TranslationTask.cpp7
12 files changed, 193 insertions, 178 deletions
diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp
index 83d48e6e4..70bacdede 100644
--- a/moses/BaseManager.cpp
+++ b/moses/BaseManager.cpp
@@ -1,11 +1,17 @@
-#include <vector>
-
-#include "StaticData.h"
#include "BaseManager.h"
+#include "StaticData.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/TranslationTask.h"
+#include <vector>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/filter/bzip2.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/filesystem.hpp>
+
using namespace std;
namespace Moses
@@ -21,6 +27,63 @@ BaseManager::GetSource() const
return m_source;
}
+void
+BaseManager::
+OutputSearchGraphAsHypergraph(std::ostream& out) const
+{
+ // This virtual function that may not be implemented everywhere, but it should for
+ // derived classes that use it
+ UTIL_THROW2("Not implemented.");
+}
+
+void
+BaseManager::
+OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision) const
+{
+ std::string odir = boost::filesystem::path(fname).parent_path().string();
+ if (! boost::filesystem::exists(odir))
+ boost::filesystem::create_directory(odir);
+ UTIL_THROW_IF2(!boost::filesystem::is_directory(odir),
+ "Cannot output hypergraphs to " << odir
+ << " because that path exists but is not a directory.");
+
+ // not clear why we need to output the weights every time we dump a search
+ // graph into a file again, but that's what the old code did.
+
+ string weightsFile = odir + "/weights";
+ TRACE_ERR("The weights file is " << weightsFile << "\n");
+ ofstream weightsOut;
+ weightsOut.open(weightsFile.c_str());
+ weightsOut.setf(std::ios::fixed);
+ weightsOut.precision(6);
+ // just temporarily, till we've implemented weight scoring in the manager
+ // (or the translation task)
+ StaticData::Instance().GetAllWeights().Save(weightsOut);
+ weightsOut.close();
+
+ boost::iostreams::filtering_ostream file;
+ if (boost::ends_with(fname, ".gz"))
+ file.push(boost::iostreams::gzip_compressor());
+ else if (boost::ends_with(fname, ".bz2"))
+ file.push( boost::iostreams::bzip2_compressor() );
+ file.push( boost::iostreams::file_sink(fname, ios_base::out) );
+ if (file.is_complete() && file.good())
+ {
+ file.setf(std::ios::fixed);
+ file.precision(precision);
+ this->OutputSearchGraphAsHypergraph(file);
+ file.flush();
+ }
+ else
+ {
+ TRACE_ERR("Cannot output hypergraph for line "
+ << this->GetSource().GetTranslationId()
+ << " because the output file " << fname
+ << " is not open or not ready for writing"
+ << std::endl);
+ }
+ file.pop();
+}
diff --git a/moses/BaseManager.h b/moses/BaseManager.h
index d7a25e7fd..c24e41ad3 100644
--- a/moses/BaseManager.h
+++ b/moses/BaseManager.h
@@ -63,13 +63,16 @@ public:
virtual void OutputSearchGraph(OutputCollector *collector) const = 0;
virtual void OutputUnknowns(OutputCollector *collector) const = 0;
virtual void OutputSearchGraphSLF() const = 0;
- virtual void OutputSearchGraphHypergraph() const = 0;
+ // virtual void OutputSearchGraphHypergraph() const = 0;
+ virtual void OutputSearchGraphAsHypergraph(std::ostream& out) const;
+ virtual void OutputSearchGraphAsHypergraph(std::string const& fname,
+ size_t const precision) const;
/***
* to be called after processing a sentence
*/
virtual void CalcDecoderStatistics() const = 0;
-
+
};
}
diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp
index a0b39167a..9170b2a7c 100644
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@@ -291,9 +291,11 @@ void ChartManager::FindReachableHypotheses(
}
}
-void ChartManager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
+void
+ChartManager::
+OutputSearchGraphAsHypergraph(std::ostream& out) const
{
- ChartSearchGraphWriterHypergraph writer(&outputSearchGraphStream);
+ ChartSearchGraphWriterHypergraph writer(&out);
WriteSearchGraph(writer);
}
@@ -812,14 +814,14 @@ void ChartManager::OutputDetailedAllTranslationReport(
collector->Write(translationId, out.str());
}
-void ChartManager::OutputSearchGraphHypergraph() const
-{
- const StaticData &staticData = StaticData::Instance();
- if (staticData.GetOutputSearchGraphHypergraph()) {
- HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
- hypergraphOutputChart.Write(*this);
- }
-}
+// void ChartManager::OutputSearchGraphHypergraph() const
+// {
+// const StaticData &staticData = StaticData::Instance();
+// if (staticData.GetOutputSearchGraphHypergraph()) {
+// HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION);
+// hypergraphOutputChart.Write(*this);
+// }
+// }
void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const
{
diff --git a/moses/ChartManager.h b/moses/ChartManager.h
index bf5851806..9d5485fb9 100644
--- a/moses/ChartManager.h
+++ b/moses/ChartManager.h
@@ -154,7 +154,7 @@ public:
void OutputSearchGraph(OutputCollector *collector) const;
void OutputSearchGraphSLF() const {
}
- void OutputSearchGraphHypergraph() const;
+ // void OutputSearchGraphHypergraph() const;
};
diff --git a/moses/HypergraphOutput.cpp b/moses/HypergraphOutput.cpp
index 6b353a83b..412f92578 100644
--- a/moses/HypergraphOutput.cpp
+++ b/moses/HypergraphOutput.cpp
@@ -44,128 +44,13 @@ using namespace std;
namespace Moses
{
-template<class M>
-HypergraphOutput<M>::HypergraphOutput(size_t precision) :
- m_precision(precision)
-{
- const StaticData& staticData = StaticData::Instance();
- vector<string> hypergraphParameters;
- const PARAM_VEC *params = staticData.GetParameter().GetParam("output-search-graph-hypergraph");
- if (params) {
- hypergraphParameters = *params;
- }
-
- if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") {
- m_appendSuffix = true;
- } else {
- m_appendSuffix = false;
- }
-
- string compression;
- if (hypergraphParameters.size() > 1) {
- m_compression = hypergraphParameters[1];
- } else {
- m_compression = "txt";
- }
- UTIL_THROW_IF(m_compression != "txt" && m_compression != "gz" && m_compression != "bz2",
- util::Exception, "Unknown compression type: " << m_compression);
-
- if ( hypergraphParameters.size() > 2 ) {
- m_hypergraphDir = hypergraphParameters[2];
- } else {
- string nbestFile = staticData.GetNBestFilePath();
- if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) {
- boost::filesystem::path nbestPath(nbestFile);
-
- // In the Boost filesystem API version 2,
- // which was the default prior to Boost 1.46,
- // the filename() method returned a string.
- //
- // In the Boost filesystem API version 3,
- // which is the default starting with Boost 1.46,
- // the filename() method returns a path object.
- //
- // To get a string from the path object,
- // the native() method must be called.
- // hypergraphDir = nbestPath.parent_path().filename()
- //#if BOOST_VERSION >= 104600
- // .native()
- //#endif
- //;
-
- // Hopefully the following compiles under all versions of Boost.
- //
- // If this line gives you compile errors,
- // contact Lane Schwartz on the Moses mailing list
- m_hypergraphDir = nbestPath.parent_path().string();
- if (m_hypergraphDir.empty()) m_hypergraphDir=".";
-
- } else {
- stringstream hypergraphDirName;
- hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph";
- m_hypergraphDir = hypergraphDirName.str();
- }
- }
-
- if ( ! boost::filesystem::exists(m_hypergraphDir) ) {
- boost::filesystem::create_directory(m_hypergraphDir);
- }
-
- UTIL_THROW_IF(!boost::filesystem::is_directory(m_hypergraphDir),
- util::Exception, "Cannot output hypergraphs to " << m_hypergraphDir << " because that path exists, but is not a directory");
-
-
- ofstream weightsOut;
- stringstream weightsFilename;
- weightsFilename << m_hypergraphDir << "/weights";
-
- TRACE_ERR("The weights file is " << weightsFilename.str() << "\n");
- weightsOut.open(weightsFilename.str().c_str());
- weightsOut.setf(std::ios::fixed);
- weightsOut.precision(6);
- staticData.GetAllWeights().Save(weightsOut);
- weightsOut.close();
-}
-
-template<class M>
-void HypergraphOutput<M>::Write(const M& manager) const
-{
-
- stringstream fileName;
- fileName << m_hypergraphDir << "/" << manager.GetSource().GetTranslationId();
- if ( m_appendSuffix ) {
- fileName << "." << m_compression;
- }
- boost::iostreams::filtering_ostream file;
-
- if ( m_compression == "gz" ) {
- file.push( boost::iostreams::gzip_compressor() );
- } else if ( m_compression == "bz2" ) {
- file.push( boost::iostreams::bzip2_compressor() );
- }
-
- file.push( boost::iostreams::file_sink(fileName.str(), ios_base::out) );
-
- if (file.is_complete() && file.good()) {
- file.setf(std::ios::fixed);
- file.precision(m_precision);
- manager.OutputSearchGraphAsHypergraph(file);
- file.flush();
- } else {
- TRACE_ERR("Cannot output hypergraph for line " << manager.GetSource().GetTranslationId()
- << " because the output file " << fileName.str()
- << " is not open or not ready for writing"
- << std::endl);
- }
- file.pop();
-}
-
template class HypergraphOutput<Manager>;
template class HypergraphOutput<ChartManager>;
-
-void ChartSearchGraphWriterMoses::WriteHypos
-(const ChartHypothesisCollection& hypos, const map<unsigned, bool> &reachable) const
+void
+ChartSearchGraphWriterMoses::
+WriteHypos(const ChartHypothesisCollection& hypos,
+ const map<unsigned, bool> &reachable) const
{
ChartHypothesisCollection::const_iterator iter;
@@ -177,28 +62,31 @@ void ChartSearchGraphWriterMoses::WriteHypos
}
const ChartArcList *arcList = mainHypo.GetArcList();
- if (arcList) {
- ChartArcList::const_iterator iterArc;
- for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) {
- const ChartHypothesis &arc = **iterArc;
- if (reachable.find(arc.GetId()) != reachable.end()) {
- (*m_out) << m_lineNumber << " " << arc << endl;
- }
+ if (arcList)
+ {
+ ChartArcList::const_iterator iterArc;
+ for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc)
+ {
+ const ChartHypothesis &arc = **iterArc;
+ if (reachable.find(arc.GetId()) != reachable.end())
+ (*m_out) << m_lineNumber << " " << arc << endl;
+ }
}
- }
}
-
}
-void ChartSearchGraphWriterHypergraph::WriteHeader(size_t winners, size_t losers) const
-{
+void
+ChartSearchGraphWriterHypergraph::
+WriteHeader(size_t winners, size_t losers) const
+{
(*m_out) << "# target ||| features ||| source-covered" << endl;
(*m_out) << winners << " " << (winners+losers) << endl;
-
}
-void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollection& hypos,
- const map<unsigned, bool> &reachable) const
+void
+ChartSearchGraphWriterHypergraph::
+WriteHypos(const ChartHypothesisCollection& hypos,
+ const map<unsigned, bool> &reachable) const
{
ChartHypothesisCollection::const_iterator iter;
@@ -225,7 +113,8 @@ void ChartSearchGraphWriterHypergraph::WriteHypos(const ChartHypothesisCollectio
}
}
(*m_out) << edges.size() << endl;
- for (vector<const ChartHypothesis*>::const_iterator ei = edges.begin(); ei != edges.end(); ++ei) {
+ for (vector<const ChartHypothesis*>::const_iterator ei = edges.begin();
+ ei != edges.end(); ++ei) {
const ChartHypothesis* hypo = *ei;
const TargetPhrase& target = hypo->GetCurrTargetPhrase();
size_t ntIndex = 0;
diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp
index 63c8ab5e0..3e7b916ca 100644
--- a/moses/IOWrapper.cpp
+++ b/moses/IOWrapper.cpp
@@ -64,30 +64,33 @@ POSSIBILITY OF SUCH DAMAGE.
#include "IOWrapper.h"
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/filter/bzip2.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+
using namespace std;
namespace Moses
{
IOWrapper::IOWrapper()
- :m_nBestStream(NULL)
-
- ,m_outputWordGraphStream(NULL)
- ,m_outputSearchGraphStream(NULL)
- ,m_detailedTranslationReportingStream(NULL)
- ,m_unknownsStream(NULL)
- ,m_alignmentInfoStream(NULL)
- ,m_latticeSamplesStream(NULL)
-
- ,m_surpressSingleBestOutput(false)
-
+ : m_nBestStream(NULL)
+ , m_outputWordGraphStream(NULL)
+ , m_outputSearchGraphStream(NULL)
+ , m_detailedTranslationReportingStream(NULL)
+ , m_unknownsStream(NULL)
+ , m_alignmentInfoStream(NULL)
+ , m_latticeSamplesStream(NULL)
+ , m_surpressSingleBestOutput(false)
, m_look_ahead(0)
, m_look_back(0)
, m_buffered_ahead(0)
-
- ,spe_src(NULL)
- ,spe_trg(NULL)
- ,spe_aln(NULL)
+ , spe_src(NULL)
+ , spe_trg(NULL)
+ , spe_aln(NULL)
{
const StaticData &staticData = StaticData::Instance();
@@ -214,6 +217,26 @@ IOWrapper::IOWrapper()
m_singleBestOutputCollector.reset(new Moses::OutputCollector(&std::cout));
}
+ // setup file pattern for hypergraph output
+ char const* key = "output-search-graph-hypergraph";
+ PARAM_VEC const* p = staticData.GetParameter().GetParam(key);
+ std::string& fmt = m_hypergraph_output_filepattern;
+ // first, determine the output directory
+ if (p && p->size() > 2) fmt = p->at(2);
+ else if (nBestFilePath.size() && nBestFilePath != "-" &&
+ ! boost::starts_with(nBestFilePath, "/dev/stdout"))
+ {
+ fmt = boost::filesystem::path(nBestFilePath).parent_path().string();
+ if (fmt.empty()) fmt = ".";
+ }
+ else fmt = boost::filesystem::current_path().string() + "/hypergraph";
+ if (*fmt.rbegin() != '/') fmt += "/";
+ std::string extension = (p && p->size() > 1 ? p->at(1) : std::string("txt"));
+ UTIL_THROW_IF2(extension != "txt" && extension != "gz" && extension != "bz2",
+ "Unknown compression type '" << extension
+ << "' for hypergraph output!");
+ fmt += string("%d.") + extension;
+
if (staticData.GetParameter().GetParam("spe-src")) {
spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str());
@@ -333,5 +356,14 @@ set_context_for(InputType& source)
}
+
+std::string
+IOWrapper::
+GetHypergraphOutputFileName(size_t const id) const
+{
+ return str(boost::format(m_hypergraph_output_filepattern) % id);
+}
+
+
} // namespace
diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h
index b15fd631c..07acf6e77 100644
--- a/moses/IOWrapper.h
+++ b/moses/IOWrapper.h
@@ -63,6 +63,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "search/applied.hh"
+#include <boost/format.hpp>
+
namespace Moses
{
class ScoreComponentCollection;
@@ -119,6 +121,9 @@ protected:
size_t m_buffered_ahead; /// number of words buffered ahead
// For context-sensitive decoding:
// Number of context words ahead and before the current sentence.
+
+ std::string m_hypergraph_output_filepattern;
+
public:
IOWrapper();
~IOWrapper();
@@ -173,6 +178,8 @@ public:
m_inputStream = &input;
}
+ std::string GetHypergraphOutputFileName(size_t const id) const;
+
// post editing
std::ifstream *spe_src, *spe_trg, *spe_aln;
diff --git a/moses/Incremental.h b/moses/Incremental.h
index 91b5dc5a0..3f679d57c 100644
--- a/moses/Incremental.h
+++ b/moses/Incremental.h
@@ -56,8 +56,11 @@ public:
}
void OutputSearchGraphSLF() const {
}
- void OutputSearchGraphHypergraph() const {
- }
+
+ void
+ OutputSearchGraphAsHypergraph
+ ( std::string const& fname, size_t const precision ) const
+ { }
private:
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index bb27e368b..9d56d4cfa 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -1974,14 +1974,19 @@ void Manager::OutputSearchGraphSLF() const
}
-void Manager::OutputSearchGraphHypergraph() const
-{
- const StaticData &staticData = StaticData::Instance();
- if (staticData.GetOutputSearchGraphHypergraph()) {
- HypergraphOutput<Manager> hypergraphOutput(PRECISION);
- hypergraphOutput.Write(*this);
- }
-}
+// void Manager::OutputSearchGraphHypergraph() const
+// {
+// const StaticData &staticData = StaticData::Instance();
+// if (!staticData.GetOutputSearchGraphHypergraph()) return;
+
+// static char const* key = "output-search-graph-hypergraph";
+// PARAM_VEC const* p = staticData.GetParameter().GetParam(key);
+// ScoreComponentCollection const& weights = staticData.GetAllWeights();
+// string const& nBestFile = staticData.GetNBestFilePath();
+// HypergraphOutput<Manager> hypergraphOutput(PRECISION, p, nBestFile, weights);
+// hypergraphOutput.Write(*this);
+
+// }
void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) const
{
diff --git a/moses/Manager.h b/moses/Manager.h
index 398d456c6..25d84a784 100644
--- a/moses/Manager.h
+++ b/moses/Manager.h
@@ -209,7 +209,7 @@ public:
void OutputWordGraph(OutputCollector *collector) const;
void OutputSearchGraph(OutputCollector *collector) const;
void OutputSearchGraphSLF() const;
- void OutputSearchGraphHypergraph() const;
+ // void OutputSearchGraphHypergraph() const;
};
diff --git a/moses/Syntax/Manager.h b/moses/Syntax/Manager.h
index ed36c7c1d..8e26e0679 100644
--- a/moses/Syntax/Manager.h
+++ b/moses/Syntax/Manager.h
@@ -29,7 +29,13 @@ public:
OutputCollector *collector) const {}
void OutputLatticeSamples(OutputCollector *collector) const {}
void OutputSearchGraph(OutputCollector *collector) const {}
- void OutputSearchGraphHypergraph() const {}
+ // void OutputSearchGraphHypergraph() const {}
+
+ void
+ OutputSearchGraphAsHypergraph
+ ( std::string const& fname, size_t const precision ) const
+ { }
+
void OutputSearchGraphSLF() const {}
void OutputWordGraph(OutputCollector *collector) const {}
void OutputDetailedTranslationReport(OutputCollector *collector) const {}
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 3794d35e7..0b425ef23 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -178,7 +178,12 @@ void TranslationTask::Run()
// Output search graph in hypergraph format for Kenneth Heafield's
// lazy hypergraph decoder; writes to stderr
- manager->OutputSearchGraphHypergraph();
+ if (StaticData::Instance().GetOutputSearchGraphHypergraph())
+ {
+ size_t transId = manager->GetSource().GetTranslationId();
+ string fname = io->GetHypergraphOutputFileName(transId);
+ manager->OutputSearchGraphAsHypergraph(fname, PRECISION);
+ }
additionalReportingTime.stop();