From f6ddc452241755733c947723a8618aab7245c8f1 Mon Sep 17 00:00:00 2001 From: Phil Williams Date: Thu, 4 Jun 2015 14:36:39 +0100 Subject: Ongoing moses/phrase-extract refactoring --- phrase-extract/pcfg-common/Jamfile | 1 - phrase-extract/pcfg-common/pcfg.cc | 108 ------------------------- phrase-extract/pcfg-common/pcfg.h | 63 --------------- phrase-extract/pcfg-common/tool.cc | 82 ------------------- phrase-extract/pcfg-common/tool.h | 93 --------------------- phrase-extract/pcfg-common/typedef.h | 38 --------- phrase-extract/pcfg-extract/Jamfile | 2 +- phrase-extract/pcfg-extract/pcfg_extract.cc | 5 +- phrase-extract/pcfg-extract/pcfg_extract.h | 2 +- phrase-extract/pcfg-extract/rule_collection.cc | 2 +- phrase-extract/pcfg-extract/rule_collection.h | 2 +- phrase-extract/pcfg-extract/rule_extractor.h | 4 +- phrase-extract/pcfg-score/Jamfile | 2 +- phrase-extract/pcfg-score/pcfg_score.cc | 5 +- phrase-extract/pcfg-score/pcfg_score.h | 2 +- phrase-extract/pcfg-score/tree_scorer.h | 4 +- phrase-extract/syntax-common/pcfg.cc | 87 ++++++++++++++++++++ phrase-extract/syntax-common/pcfg.h | 38 +++++++++ phrase-extract/syntax-common/tool.cc | 82 +++++++++++++++++++ phrase-extract/syntax-common/tool.h | 93 +++++++++++++++++++++ phrase-extract/syntax-common/vocabulary.h | 13 +++ 21 files changed, 327 insertions(+), 401 deletions(-) delete mode 100644 phrase-extract/pcfg-common/Jamfile delete mode 100644 phrase-extract/pcfg-common/pcfg.cc delete mode 100644 phrase-extract/pcfg-common/pcfg.h delete mode 100644 phrase-extract/pcfg-common/tool.cc delete mode 100644 phrase-extract/pcfg-common/tool.h delete mode 100644 phrase-extract/pcfg-common/typedef.h create mode 100644 phrase-extract/syntax-common/pcfg.cc create mode 100644 phrase-extract/syntax-common/pcfg.h create mode 100644 phrase-extract/syntax-common/tool.cc create mode 100644 phrase-extract/syntax-common/tool.h create mode 100644 phrase-extract/syntax-common/vocabulary.h (limited to 'phrase-extract') diff --git a/phrase-extract/pcfg-common/Jamfile b/phrase-extract/pcfg-common/Jamfile deleted file mode 100644 index 5669b443e..000000000 --- a/phrase-extract/pcfg-common/Jamfile +++ /dev/null @@ -1 +0,0 @@ -lib pcfg_common : [ glob *.cc ] ..//syntax-common ..//deps : .. ; diff --git a/phrase-extract/pcfg-common/pcfg.cc b/phrase-extract/pcfg-common/pcfg.cc deleted file mode 100644 index 988367c9b..000000000 --- a/phrase-extract/pcfg-common/pcfg.cc +++ /dev/null @@ -1,108 +0,0 @@ -/*********************************************************************** - Moses - statistical machine translation system - Copyright (C) 2006-2012 University of Edinburgh - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#include "pcfg.h" - -#include - -#include -#include - -#include "syntax-common/exception.h" - -namespace MosesTraining { -namespace Syntax { -namespace PCFG { - -void Pcfg::Add(const Key &key, double score) { - rules_[key] = score; -} - -bool Pcfg::Lookup(const Key &key, double &score) const { - Map::const_iterator p = rules_.find(key); - if (p == rules_.end()) { - return false; - } - score = p->second; - return true; -} - -void Pcfg::Read(std::istream &input, Vocabulary &vocab) { - std::string line; - std::string lhs_string; - std::vector rhs_strings; - std::string score_string; - Key key; - while (std::getline(input, line)) { - // Read LHS. - std::size_t pos = line.find("|||"); - if (pos == std::string::npos) { - throw Exception("missing first delimiter"); - } - lhs_string = line.substr(0, pos); - boost::trim(lhs_string); - - // Read RHS. - std::size_t begin = pos+3; - pos = line.find("|||", begin); - if (pos == std::string::npos) { - throw Exception("missing second delimiter"); - } - std::string rhs_text = line.substr(begin, pos-begin); - boost::trim(rhs_text); - rhs_strings.clear(); - boost::split(rhs_strings, rhs_text, boost::algorithm::is_space(), - boost::algorithm::token_compress_on); - - // Read score. - score_string = line.substr(pos+3); - boost::trim(score_string); - - // Construct key. - key.clear(); - key.reserve(rhs_strings.size()+1); - key.push_back(vocab.Insert(lhs_string)); - for (std::vector::const_iterator p = rhs_strings.begin(); - p != rhs_strings.end(); ++p) { - key.push_back(vocab.Insert(*p)); - } - - // Add rule. - double score = boost::lexical_cast(score_string); - Add(key, score); - } -} - -void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const { - for (const_iterator p = begin(); p != end(); ++p) { - const Key &key = p->first; - double score = p->second; - std::vector::const_iterator q = key.begin(); - std::vector::const_iterator end = key.end(); - output << vocab.Lookup(*q++) << " |||"; - while (q != end) { - output << " " << vocab.Lookup(*q++); - } - output << " ||| " << score << std::endl; - } -} - -} // namespace PCFG -} // namespace Syntax -} // namespace MosesTraining diff --git a/phrase-extract/pcfg-common/pcfg.h b/phrase-extract/pcfg-common/pcfg.h deleted file mode 100644 index c5c04cba4..000000000 --- a/phrase-extract/pcfg-common/pcfg.h +++ /dev/null @@ -1,63 +0,0 @@ -/*********************************************************************** - Moses - statistical machine translation system - Copyright (C) 2006-2012 University of Edinburgh - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#pragma once -#ifndef PCFG_PCFG_H_ -#define PCFG_PCFG_H_ - -#include -#include -#include -#include - -#include "typedef.h" - -namespace MosesTraining { -namespace Syntax { -namespace PCFG { - -class Pcfg { - public: - typedef std::vector Key; - typedef std::map Map; - typedef Map::iterator iterator; - typedef Map::const_iterator const_iterator; - - Pcfg() {} - - iterator begin() { return rules_.begin(); } - const_iterator begin() const { return rules_.begin(); } - - iterator end() { return rules_.end(); } - const_iterator end() const { return rules_.end(); } - - void Add(const Key &, double); - bool Lookup(const Key &, double &) const; - void Read(std::istream &, Vocabulary &); - void Write(const Vocabulary &, std::ostream &) const; - - private: - Map rules_; -}; - -} // namespace PCFG -} // namespace Syntax -} // namespace MosesTraining - -#endif diff --git a/phrase-extract/pcfg-common/tool.cc b/phrase-extract/pcfg-common/tool.cc deleted file mode 100644 index c41eaf9bd..000000000 --- a/phrase-extract/pcfg-common/tool.cc +++ /dev/null @@ -1,82 +0,0 @@ -/*********************************************************************** - Moses - statistical machine translation system - Copyright (C) 2006-2012 University of Edinburgh - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#include "tool.h" - -#include - -namespace MosesTraining { -namespace Syntax { -namespace PCFG { - -std::istream &Tool::OpenInputOrDie(const std::string &filename) { - // TODO Check that function is only called once? - if (filename.empty() || filename == "-") { - input_ptr_ = &(std::cin); - } else { - input_file_stream_.open(filename.c_str()); - if (!input_file_stream_) { - std::ostringstream msg; - msg << "failed to open input file: " << filename; - Error(msg.str()); - } - input_ptr_ = &input_file_stream_; - } - return *input_ptr_; -} - -std::ostream &Tool::OpenOutputOrDie(const std::string &filename) { - // TODO Check that function is only called once? - if (filename.empty() || filename == "-") { - output_ptr_ = &(std::cout); - } else { - output_file_stream_.open(filename.c_str()); - if (!output_file_stream_) { - std::ostringstream msg; - msg << "failed to open output file: " << filename; - Error(msg.str()); - } - output_ptr_ = &output_file_stream_; - } - return *output_ptr_; -} - -void Tool::OpenNamedInputOrDie(const std::string &filename, - std::ifstream &stream) { - stream.open(filename.c_str()); - if (!stream) { - std::ostringstream msg; - msg << "failed to open input file: " << filename; - Error(msg.str()); - } -} - -void Tool::OpenNamedOutputOrDie(const std::string &filename, - std::ofstream &stream) { - stream.open(filename.c_str()); - if (!stream) { - std::ostringstream msg; - msg << "failed to open output file: " << filename; - Error(msg.str()); - } -} - -} // namespace PCFG -} // namespace Syntax -} // namespace MosesTraining diff --git a/phrase-extract/pcfg-common/tool.h b/phrase-extract/pcfg-common/tool.h deleted file mode 100644 index 2c903a11e..000000000 --- a/phrase-extract/pcfg-common/tool.h +++ /dev/null @@ -1,93 +0,0 @@ -/*********************************************************************** - Moses - statistical machine translation system - Copyright (C) 2006-2012 University of Edinburgh - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#pragma once -#ifndef PCFG_TOOL_H_ -#define PCFG_TOOL_H_ - -#include -#include -#include -#include - -#include - -namespace MosesTraining { -namespace Syntax { -namespace PCFG { - -class Tool { - public: - virtual ~Tool() {} - - const std::string &name() const { return name_; } - - virtual int Main(int argc, char *argv[]) = 0; - - protected: - Tool(const std::string &name) : name_(name) {} - - // Returns the boost::program_options style that should be used by all tools. - static int CommonOptionStyle() { - namespace cls = boost::program_options::command_line_style; - return cls::default_style & (~cls::allow_guessing); - } - - void Warn(const std::string &msg) const { - std::cerr << name_ << ": warning: " << msg << std::endl; - } - - void Error(const std::string &msg) const { - std::cerr << name_ << ": error: " << msg << std::endl; - std::exit(1); - } - - // Initialises the tool's main input stream and returns a reference that is - // valid for the remainder of the tool's lifetime. If filename is empty or - // "-" then input is standard input; otherwise it is the named file. Calls - // Error() if the file cannot be opened for reading. - std::istream &OpenInputOrDie(const std::string &filename); - - // Initialises the tool's main output stream and returns a reference that is - // valid for the remainder of the tool's lifetime. If filename is empty or - // "-" then output is standard output; otherwise it is the named file. Calls - // Error() if the file cannot be opened for writing. - std::ostream &OpenOutputOrDie(const std::string &filename); - - // Opens the named input file using the supplied ifstream. Calls Error() if - // the file cannot be opened for reading. - void OpenNamedInputOrDie(const std::string &, std::ifstream &); - - // Opens the named output file using the supplied ofstream. Calls Error() if - // the file cannot be opened for writing. - void OpenNamedOutputOrDie(const std::string &, std::ofstream &); - - private: - std::string name_; - std::istream *input_ptr_; - std::ifstream input_file_stream_; - std::ostream *output_ptr_; - std::ofstream output_file_stream_; -}; - -} // namespace PCFG -} // namespace Syntax -} // namespace MosesTraining - -#endif diff --git a/phrase-extract/pcfg-common/typedef.h b/phrase-extract/pcfg-common/typedef.h deleted file mode 100644 index 1280b89cf..000000000 --- a/phrase-extract/pcfg-common/typedef.h +++ /dev/null @@ -1,38 +0,0 @@ -/*********************************************************************** - Moses - statistical machine translation system - Copyright (C) 2006-2012 University of Edinburgh - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#pragma once -#ifndef PCFG_TYPEDEF_H_ -#define PCFG_TYPEDEF_H_ - -#include - -#include "syntax-common/numbered_set.h" - -namespace MosesTraining { -namespace Syntax { -namespace PCFG { - -typedef NumberedSet Vocabulary; - -} // namespace PCFG -} // namespace Syntax -} // namespace MosesTraining - -#endif diff --git a/phrase-extract/pcfg-extract/Jamfile b/phrase-extract/pcfg-extract/Jamfile index 2442b967a..2f4ae1e7d 100644 --- a/phrase-extract/pcfg-extract/Jamfile +++ b/phrase-extract/pcfg-extract/Jamfile @@ -1 +1 @@ -exe pcfg-extract : [ glob *.cc ] ..//syntax-common ..//pcfg-common ../..//boost_program_options : .. ; +exe pcfg-extract : [ glob *.cc ] ..//syntax-common ../..//boost_program_options : .. ; diff --git a/phrase-extract/pcfg-extract/pcfg_extract.cc b/phrase-extract/pcfg-extract/pcfg_extract.cc index 87419edb7..45eb9ff3d 100644 --- a/phrase-extract/pcfg-extract/pcfg_extract.cc +++ b/phrase-extract/pcfg-extract/pcfg_extract.cc @@ -32,13 +32,12 @@ #include #include "syntax-common/exception.h" +#include "syntax-common/pcfg.h" +#include "syntax-common/vocabulary.h" #include "syntax-common/xml_tree_parser.h" #include "SyntaxTree.h" -#include "pcfg-common/pcfg.h" -#include "pcfg-common/typedef.h" - #include "options.h" #include "rule_collection.h" #include "rule_extractor.h" diff --git a/phrase-extract/pcfg-extract/pcfg_extract.h b/phrase-extract/pcfg-extract/pcfg_extract.h index 5882e45da..3b084acbe 100644 --- a/phrase-extract/pcfg-extract/pcfg_extract.h +++ b/phrase-extract/pcfg-extract/pcfg_extract.h @@ -21,7 +21,7 @@ #ifndef PCFG_EXTRACT_PCFG_EXTRACT_H_ #define PCFG_EXTRACT_PCFG_EXTRACT_H_ -#include "pcfg-common/tool.h" +#include "syntax-common/tool.h" namespace MosesTraining { diff --git a/phrase-extract/pcfg-extract/rule_collection.cc b/phrase-extract/pcfg-extract/rule_collection.cc index 9db0ce9bf..a814f82d6 100644 --- a/phrase-extract/pcfg-extract/rule_collection.cc +++ b/phrase-extract/pcfg-extract/rule_collection.cc @@ -19,7 +19,7 @@ #include "rule_collection.h" -#include "pcfg-common/pcfg.h" +#include "syntax-common/pcfg.h" #include diff --git a/phrase-extract/pcfg-extract/rule_collection.h b/phrase-extract/pcfg-extract/rule_collection.h index 3d9a9f98b..3bbc32721 100644 --- a/phrase-extract/pcfg-extract/rule_collection.h +++ b/phrase-extract/pcfg-extract/rule_collection.h @@ -25,7 +25,7 @@ #include -#include "pcfg-common/pcfg.h" +#include "syntax-common/pcfg.h" namespace MosesTraining { diff --git a/phrase-extract/pcfg-extract/rule_extractor.h b/phrase-extract/pcfg-extract/rule_extractor.h index d32d76992..91014747c 100644 --- a/phrase-extract/pcfg-extract/rule_extractor.h +++ b/phrase-extract/pcfg-extract/rule_extractor.h @@ -23,7 +23,7 @@ #include "SyntaxTree.h" -#include "pcfg-common/typedef.h" +#include "syntax-common/vocabulary.h" #include "rule_collection.h" @@ -39,7 +39,7 @@ class RuleExtractor { public: RuleExtractor(Vocabulary &); - void Extract(const MosesTraining::SyntaxTree &, RuleCollection &) const; + void Extract(const SyntaxTree &, RuleCollection &) const; private: Vocabulary &non_term_vocab_; }; diff --git a/phrase-extract/pcfg-score/Jamfile b/phrase-extract/pcfg-score/Jamfile index 45d46492a..ca321d04c 100644 --- a/phrase-extract/pcfg-score/Jamfile +++ b/phrase-extract/pcfg-score/Jamfile @@ -1 +1 @@ -exe pcfg-score : [ glob *.cc ] ..//pcfg-common ../..//boost_program_options : .. ; +exe pcfg-score : [ glob *.cc ] ..//syntax-common ../..//boost_program_options : .. ; diff --git a/phrase-extract/pcfg-score/pcfg_score.cc b/phrase-extract/pcfg-score/pcfg_score.cc index e11f73f70..cec84211a 100644 --- a/phrase-extract/pcfg-score/pcfg_score.cc +++ b/phrase-extract/pcfg-score/pcfg_score.cc @@ -36,12 +36,11 @@ #include "SyntaxTree.h" #include "syntax-common/exception.h" +#include "syntax-common/pcfg.h" +#include "syntax-common/vocabulary.h" #include "syntax-common/xml_tree_parser.h" #include "syntax-common/xml_tree_writer.h" -#include "pcfg-common/pcfg.h" -#include "pcfg-common/typedef.h" - namespace MosesTraining { namespace Syntax diff --git a/phrase-extract/pcfg-score/pcfg_score.h b/phrase-extract/pcfg-score/pcfg_score.h index b0b4a77cd..b691b107f 100644 --- a/phrase-extract/pcfg-score/pcfg_score.h +++ b/phrase-extract/pcfg-score/pcfg_score.h @@ -21,7 +21,7 @@ #ifndef PCFG_SCORE_PCFG_SCORE_H_ #define PCFG_SCORE_PCFG_SCORE_H_ -#include "pcfg-common/tool.h" +#include "syntax-common/tool.h" namespace MosesTraining { diff --git a/phrase-extract/pcfg-score/tree_scorer.h b/phrase-extract/pcfg-score/tree_scorer.h index cf9fdd1a3..b95d13ddb 100644 --- a/phrase-extract/pcfg-score/tree_scorer.h +++ b/phrase-extract/pcfg-score/tree_scorer.h @@ -23,8 +23,8 @@ #include "SyntaxTree.h" -#include "pcfg-common/pcfg.h" -#include "pcfg-common/typedef.h" +#include "syntax-common/vocabulary.h" +#include "syntax-common/pcfg.h" namespace MosesTraining { diff --git a/phrase-extract/syntax-common/pcfg.cc b/phrase-extract/syntax-common/pcfg.cc new file mode 100644 index 000000000..3efe04218 --- /dev/null +++ b/phrase-extract/syntax-common/pcfg.cc @@ -0,0 +1,87 @@ +#include "pcfg.h" + +#include + +#include +#include + +#include "syntax-common/exception.h" + +namespace MosesTraining { +namespace Syntax { + +void Pcfg::Add(const Key &key, double score) { + rules_[key] = score; +} + +bool Pcfg::Lookup(const Key &key, double &score) const { + Map::const_iterator p = rules_.find(key); + if (p == rules_.end()) { + return false; + } + score = p->second; + return true; +} + +void Pcfg::Read(std::istream &input, Vocabulary &vocab) { + std::string line; + std::string lhs_string; + std::vector rhs_strings; + std::string score_string; + Key key; + while (std::getline(input, line)) { + // Read LHS. + std::size_t pos = line.find("|||"); + if (pos == std::string::npos) { + throw Exception("missing first delimiter"); + } + lhs_string = line.substr(0, pos); + boost::trim(lhs_string); + + // Read RHS. + std::size_t begin = pos+3; + pos = line.find("|||", begin); + if (pos == std::string::npos) { + throw Exception("missing second delimiter"); + } + std::string rhs_text = line.substr(begin, pos-begin); + boost::trim(rhs_text); + rhs_strings.clear(); + boost::split(rhs_strings, rhs_text, boost::algorithm::is_space(), + boost::algorithm::token_compress_on); + + // Read score. + score_string = line.substr(pos+3); + boost::trim(score_string); + + // Construct key. + key.clear(); + key.reserve(rhs_strings.size()+1); + key.push_back(vocab.Insert(lhs_string)); + for (std::vector::const_iterator p = rhs_strings.begin(); + p != rhs_strings.end(); ++p) { + key.push_back(vocab.Insert(*p)); + } + + // Add rule. + double score = boost::lexical_cast(score_string); + Add(key, score); + } +} + +void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const { + for (const_iterator p = begin(); p != end(); ++p) { + const Key &key = p->first; + double score = p->second; + std::vector::const_iterator q = key.begin(); + std::vector::const_iterator end = key.end(); + output << vocab.Lookup(*q++) << " |||"; + while (q != end) { + output << " " << vocab.Lookup(*q++); + } + output << " ||| " << score << std::endl; + } +} + +} // namespace Syntax +} // namespace MosesTraining diff --git a/phrase-extract/syntax-common/pcfg.h b/phrase-extract/syntax-common/pcfg.h new file mode 100644 index 000000000..0a731cc7a --- /dev/null +++ b/phrase-extract/syntax-common/pcfg.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include + +#include "vocabulary.h" + +namespace MosesTraining { +namespace Syntax { + +class Pcfg { + public: + typedef std::vector Key; + typedef std::map Map; + typedef Map::iterator iterator; + typedef Map::const_iterator const_iterator; + + Pcfg() {} + + iterator begin() { return rules_.begin(); } + const_iterator begin() const { return rules_.begin(); } + + iterator end() { return rules_.end(); } + const_iterator end() const { return rules_.end(); } + + void Add(const Key &, double); + bool Lookup(const Key &, double &) const; + void Read(std::istream &, Vocabulary &); + void Write(const Vocabulary &, std::ostream &) const; + + private: + Map rules_; +}; + +} // namespace Syntax +} // namespace MosesTraining diff --git a/phrase-extract/syntax-common/tool.cc b/phrase-extract/syntax-common/tool.cc new file mode 100644 index 000000000..c41eaf9bd --- /dev/null +++ b/phrase-extract/syntax-common/tool.cc @@ -0,0 +1,82 @@ +/*********************************************************************** + Moses - statistical machine translation system + Copyright (C) 2006-2012 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include "tool.h" + +#include + +namespace MosesTraining { +namespace Syntax { +namespace PCFG { + +std::istream &Tool::OpenInputOrDie(const std::string &filename) { + // TODO Check that function is only called once? + if (filename.empty() || filename == "-") { + input_ptr_ = &(std::cin); + } else { + input_file_stream_.open(filename.c_str()); + if (!input_file_stream_) { + std::ostringstream msg; + msg << "failed to open input file: " << filename; + Error(msg.str()); + } + input_ptr_ = &input_file_stream_; + } + return *input_ptr_; +} + +std::ostream &Tool::OpenOutputOrDie(const std::string &filename) { + // TODO Check that function is only called once? + if (filename.empty() || filename == "-") { + output_ptr_ = &(std::cout); + } else { + output_file_stream_.open(filename.c_str()); + if (!output_file_stream_) { + std::ostringstream msg; + msg << "failed to open output file: " << filename; + Error(msg.str()); + } + output_ptr_ = &output_file_stream_; + } + return *output_ptr_; +} + +void Tool::OpenNamedInputOrDie(const std::string &filename, + std::ifstream &stream) { + stream.open(filename.c_str()); + if (!stream) { + std::ostringstream msg; + msg << "failed to open input file: " << filename; + Error(msg.str()); + } +} + +void Tool::OpenNamedOutputOrDie(const std::string &filename, + std::ofstream &stream) { + stream.open(filename.c_str()); + if (!stream) { + std::ostringstream msg; + msg << "failed to open output file: " << filename; + Error(msg.str()); + } +} + +} // namespace PCFG +} // namespace Syntax +} // namespace MosesTraining diff --git a/phrase-extract/syntax-common/tool.h b/phrase-extract/syntax-common/tool.h new file mode 100644 index 000000000..2c903a11e --- /dev/null +++ b/phrase-extract/syntax-common/tool.h @@ -0,0 +1,93 @@ +/*********************************************************************** + Moses - statistical machine translation system + Copyright (C) 2006-2012 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#pragma once +#ifndef PCFG_TOOL_H_ +#define PCFG_TOOL_H_ + +#include +#include +#include +#include + +#include + +namespace MosesTraining { +namespace Syntax { +namespace PCFG { + +class Tool { + public: + virtual ~Tool() {} + + const std::string &name() const { return name_; } + + virtual int Main(int argc, char *argv[]) = 0; + + protected: + Tool(const std::string &name) : name_(name) {} + + // Returns the boost::program_options style that should be used by all tools. + static int CommonOptionStyle() { + namespace cls = boost::program_options::command_line_style; + return cls::default_style & (~cls::allow_guessing); + } + + void Warn(const std::string &msg) const { + std::cerr << name_ << ": warning: " << msg << std::endl; + } + + void Error(const std::string &msg) const { + std::cerr << name_ << ": error: " << msg << std::endl; + std::exit(1); + } + + // Initialises the tool's main input stream and returns a reference that is + // valid for the remainder of the tool's lifetime. If filename is empty or + // "-" then input is standard input; otherwise it is the named file. Calls + // Error() if the file cannot be opened for reading. + std::istream &OpenInputOrDie(const std::string &filename); + + // Initialises the tool's main output stream and returns a reference that is + // valid for the remainder of the tool's lifetime. If filename is empty or + // "-" then output is standard output; otherwise it is the named file. Calls + // Error() if the file cannot be opened for writing. + std::ostream &OpenOutputOrDie(const std::string &filename); + + // Opens the named input file using the supplied ifstream. Calls Error() if + // the file cannot be opened for reading. + void OpenNamedInputOrDie(const std::string &, std::ifstream &); + + // Opens the named output file using the supplied ofstream. Calls Error() if + // the file cannot be opened for writing. + void OpenNamedOutputOrDie(const std::string &, std::ofstream &); + + private: + std::string name_; + std::istream *input_ptr_; + std::ifstream input_file_stream_; + std::ostream *output_ptr_; + std::ofstream output_file_stream_; +}; + +} // namespace PCFG +} // namespace Syntax +} // namespace MosesTraining + +#endif diff --git a/phrase-extract/syntax-common/vocabulary.h b/phrase-extract/syntax-common/vocabulary.h new file mode 100644 index 000000000..119767245 --- /dev/null +++ b/phrase-extract/syntax-common/vocabulary.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +#include "numbered_set.h" + +namespace MosesTraining { +namespace Syntax { + +typedef NumberedSet Vocabulary; + +} // namespace Syntax +} // namespace MosesTraining -- cgit v1.2.3