Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhil Williams <philip.williams@mac.com>2015-06-02 17:23:41 +0300
committerPhil Williams <philip.williams@mac.com>2015-06-02 17:23:41 +0300
commit2f04d4a56ebab78a97b9fa9ecf4b50ef845a1bdb (patch)
treea5e60d8efce2aece1c850ccb40adb5021387d1c4 /phrase-extract/syntax-common
parent5ece895ab4d7fafe32d76cb2dd7bd7995cd06c7c (diff)
Ongoing moses/phrase-extract refactoring
Diffstat (limited to 'phrase-extract/syntax-common')
-rw-r--r--phrase-extract/syntax-common/xml_tree_parser.cc69
-rw-r--r--phrase-extract/syntax-common/xml_tree_parser.h28
2 files changed, 58 insertions, 39 deletions
diff --git a/phrase-extract/syntax-common/xml_tree_parser.cc b/phrase-extract/syntax-common/xml_tree_parser.cc
index 2f8a904fa..bf3c6d87e 100644
--- a/phrase-extract/syntax-common/xml_tree_parser.cc
+++ b/phrase-extract/syntax-common/xml_tree_parser.cc
@@ -1,17 +1,27 @@
#include "xml_tree_parser.h"
+#include <cassert>
+#include <vector>
+
+#include "util/tokenize.hh"
+
+#include "SyntaxTree.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
-#include "util/tokenize.hh"
-
-#include <cassert>
-#include <vector>
namespace MosesTraining {
namespace Syntax {
-StringTree *XmlTreeParser::Parse(const std::string &line) {
+XmlTreeParser::XmlTreeParser(std::set<std::string> &labelSet,
+ std::map<std::string, int> &topLabelSet)
+ : label_set_(labelSet)
+ , top_label_set_(topLabelSet)
+{
+}
+
+std::auto_ptr<SyntaxTree> XmlTreeParser::Parse(const std::string &line)
+{
line_ = line;
node_collection_.Clear();
try {
@@ -22,38 +32,37 @@ StringTree *XmlTreeParser::Parse(const std::string &line) {
} catch (const XmlException &e) {
throw Exception(e.getMsg());
}
- node_collection_.ConnectNodes();
- SyntaxNode *root = node_collection_.GetTop();
- assert(root);
+ std::auto_ptr<SyntaxTree> root = node_collection_.ExtractTree();
words_ = util::tokenize(line_);
- return ConvertTree(*root, words_);
+ AttachWords(words_, *root);
+ return root;
}
-// Converts a SyntaxNode tree to a StringTree.
-StringTree *XmlTreeParser::ConvertTree(const SyntaxNode &tree,
- const std::vector<std::string> &words) {
- StringTree *root = new StringTree(tree.GetLabel());
- const std::vector<SyntaxNode*> &children = tree.GetChildren();
- if (children.empty()) {
- if (tree.GetStart() != tree.GetEnd()) {
+void XmlTreeParser::AttachWords(const std::vector<std::string> &words,
+ SyntaxTree &root)
+{
+ std::vector<SyntaxTree*> leaves;
+ leaves.reserve(words.size());
+ for (SyntaxTree::LeafIterator p(root); p != SyntaxTree::LeafIterator(); ++p) {
+ leaves.push_back(&*p);
+ }
+
+ std::vector<std::string>::const_iterator q = words.begin();
+ for (std::vector<SyntaxTree*>::iterator p = leaves.begin(); p != leaves.end();
+ ++p) {
+ SyntaxTree *leaf = *p;
+ const int start = leaf->value().GetStart();
+ const int end = leaf->value().GetEnd();
+ if (start != end) {
std::ostringstream msg;
- msg << "leaf node covers multiple words (" << tree.GetStart()
- << "-" << tree.GetEnd() << "): this is currently unsupported";
+ msg << "leaf node covers multiple words (" << start << "-" << end
+ << "): this is currently unsupported";
throw Exception(msg.str());
}
- StringTree *leaf = new StringTree(words[tree.GetStart()]);
- leaf->parent() = root;
- root->children().push_back(leaf);
- } else {
- for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
- p != children.end(); ++p) {
- assert(*p);
- StringTree *child = ConvertTree(**p, words);
- child->parent() = root;
- root->children().push_back(child);
- }
+ SyntaxTree *newLeaf = new SyntaxTree(SyntaxNode(start, end, *q++));
+ leaf->children().push_back(newLeaf);
+ newLeaf->parent() = leaf;
}
- return root;
}
} // namespace Syntax
diff --git a/phrase-extract/syntax-common/xml_tree_parser.h b/phrase-extract/syntax-common/xml_tree_parser.h
index c84ea25ec..e0b75c830 100644
--- a/phrase-extract/syntax-common/xml_tree_parser.h
+++ b/phrase-extract/syntax-common/xml_tree_parser.h
@@ -1,34 +1,44 @@
#pragma once
#include <map>
+#include <memory>
#include <set>
#include <string>
#include <vector>
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
+#include "SyntaxTree.h"
#include "exception.h"
-#include "string_tree.h"
namespace MosesTraining {
namespace Syntax {
-// Parses a string in Moses' XML parse tree format and returns a StringTree
+// Parses a string in Moses' XML parse tree format and returns a SyntaxTree
// object. This is a wrapper around the ProcessAndStripXMLTags function.
class XmlTreeParser {
public:
- StringTree *Parse(const std::string &);
+ XmlTreeParser(std::set<std::string> &, std::map<std::string, int> &);
- private:
- static StringTree *ConvertTree(const MosesTraining::SyntaxNode &,
- const std::vector<std::string> &);
+ std::auto_ptr<SyntaxTree> Parse(const std::string &);
+
+ const std::vector<std::string>& GetWords() {
+ return words_;
+ }
+
+ const SyntaxNodeCollection &GetNodeCollection() const {
+ return node_collection_;
+ }
- std::set<std::string> label_set_;
- std::map<std::string, int> top_label_set_;
+ private:
+ std::set<std::string> &label_set_;
+ std::map<std::string, int> &top_label_set_;
std::string line_;
- MosesTraining::SyntaxNodeCollection node_collection_;
+ SyntaxNodeCollection node_collection_;
std::vector<std::string> words_;
+
+ void AttachWords(const std::vector<std::string> &, SyntaxTree &);
};
} // namespace Syntax