Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'phrase-extract/extract-ghkm/AlignmentGraph.cpp')
-rw-r--r--phrase-extract/extract-ghkm/AlignmentGraph.cpp62
1 files changed, 42 insertions, 20 deletions
diff --git a/phrase-extract/extract-ghkm/AlignmentGraph.cpp b/phrase-extract/extract-ghkm/AlignmentGraph.cpp
index 9dba71331..21708bdfc 100644
--- a/phrase-extract/extract-ghkm/AlignmentGraph.cpp
+++ b/phrase-extract/extract-ghkm/AlignmentGraph.cpp
@@ -34,6 +34,8 @@
namespace MosesTraining
{
+namespace Syntax
+{
namespace GHKM
{
@@ -242,36 +244,24 @@ Node *AlignmentGraph::CopyParseTree(const SyntaxTree *root)
return p;
}
-// Finds the set of frontier nodes. The definition of a frontier node differs
-// from Galley et al's (2004) in the following ways:
-//
-// 1. A node with an empty span is not a frontier node (this excludes
-// unaligned target subtrees).
-// 2. Target word nodes are not frontier nodes.
-// 3. Source word nodes are not frontier nodes.
-// 4. Unless the --AllowUnary option is used, a node is not a frontier node if
-// it has the same span as its parent.
+// Recursively constructs the set of frontier nodes for the tree (or subtree)
+// rooted at the given node.
void AlignmentGraph::ComputeFrontierSet(Node *root,
const Options &options,
std::set<Node *> &frontierSet) const
{
- // Don't include word nodes or unaligned target subtrees.
+ // Non-tree nodes and unaligned target subtrees are not frontier nodes (and
+ // nor are their descendants). See the comment for the function
+ // AlignmentGraph::IsFrontierNode().
if (root->GetType() != TREE || root->GetSpan().empty()) {
return;
}
- if (!SpansIntersect(root->GetComplementSpan(), Closure(root->GetSpan()))) {
- // Unless unary rules are explicitly allowed, we use Chung et al's (2011)
- // modified defintion of a frontier node to eliminate the production of
- // non-lexical unary rules.
- assert(root->GetParents().size() <= 1);
- if (options.allowUnary
- || root->GetParents().empty()
- || root->GetParents()[0]->GetSpan() != root->GetSpan()) {
- frontierSet.insert(root);
- }
+ if (IsFrontierNode(*root, options)) {
+ frontierSet.insert(root);
}
+ // Recursively check descendants.
const std::vector<Node *> &children = root->GetChildren();
for (std::vector<Node *>::const_iterator p(children.begin());
p != children.end(); ++p) {
@@ -279,6 +269,37 @@ void AlignmentGraph::ComputeFrontierSet(Node *root,
}
}
+// Determines whether the given node is a frontier node or not. The definition
+// of a frontier node differs from Galley et al's (2004) in the following ways:
+//
+// 1. A node with an empty span is not a frontier node (this is to exclude
+// unaligned target subtrees).
+// 2. Target word nodes are not frontier nodes.
+// 3. Source word nodes are not frontier nodes.
+// 4. Unless the --AllowUnary option is used, a node is not a frontier node if
+// it has the same span as its parent.
+bool AlignmentGraph::IsFrontierNode(const Node &n, const Options &options) const
+{
+ // Don't include word nodes or unaligned target subtrees.
+ if (n.GetType() != TREE || n.GetSpan().empty()) {
+ return false;
+ }
+ // This is the original GHKM definition of a frontier node.
+ if (SpansIntersect(n.GetComplementSpan(), Closure(n.GetSpan()))) {
+ return false;
+ }
+ // Unless unary rules are explicitly allowed, we use Chung et al's (2011)
+ // modified defintion of a frontier node to eliminate the production of
+ // non-lexical unary rules.
+ assert(n.GetParents().size() <= 1);
+ if (!options.allowUnary &&
+ !n.GetParents().empty() &&
+ n.GetParents()[0]->GetSpan() == n.GetSpan()) {
+ return false;
+ }
+ return true;
+}
+
void AlignmentGraph::CalcComplementSpans(Node *root)
{
Span compSpan;
@@ -393,4 +414,5 @@ Node *AlignmentGraph::DetermineAttachmentPoint(int index)
}
} // namespace GHKM
+} // namespace Syntax
} // namespace MosesTraining