#include "StsgRule.h" #include "Node.h" #include "Subgraph.h" #include "SyntaxTree.h" #include namespace Moses { namespace GHKM { StsgRule::StsgRule(const Subgraph &fragment) : m_targetSide(fragment, true) { // Source side const std::set &sinkNodes = fragment.GetLeaves(); // Collect the subset of sink nodes that excludes target nodes with // empty spans. std::vector productiveSinks; productiveSinks.reserve(sinkNodes.size()); for (std::set::const_iterator p = sinkNodes.begin(); p != sinkNodes.end(); ++p) { const Node *sink = *p; if (!sink->GetSpan().empty()) { productiveSinks.push_back(sink); } } // Sort them into the order defined by their spans. std::sort(productiveSinks.begin(), productiveSinks.end(), PartitionOrderComp); // Build a map from target nodes to source-order indices, so that we // can construct the Alignment object later. std::map > sinkToSourceIndices; std::map nonTermSinkToSourceIndex; m_sourceSide.reserve(productiveSinks.size()); int srcIndex = 0; int nonTermCount = 0; for (std::vector::const_iterator p = productiveSinks.begin(); p != productiveSinks.end(); ++p, ++srcIndex) { const Node &sink = **p; if (sink.GetType() == TREE) { m_sourceSide.push_back(Symbol("X", NonTerminal)); sinkToSourceIndices[&sink].push_back(srcIndex); nonTermSinkToSourceIndex[&sink] = nonTermCount++; } else { assert(sink.GetType() == SOURCE); m_sourceSide.push_back(Symbol(sink.GetLabel(), Terminal)); // Add all aligned target words to the sinkToSourceIndices map const std::vector &parents(sink.GetParents()); for (std::vector::const_iterator q = parents.begin(); q != parents.end(); ++q) { if ((*q)->GetType() == TARGET) { sinkToSourceIndices[*q].push_back(srcIndex); } } } } // Alignment std::vector targetLeaves; m_targetSide.GetTargetLeaves(targetLeaves); m_alignment.reserve(targetLeaves.size()); m_nonTermAlignment.resize(nonTermCount); for (int i = 0, j = 0; i < targetLeaves.size(); ++i) { const Node *leaf = targetLeaves[i]; assert(leaf->GetType() != SOURCE); if (leaf->GetSpan().empty()) { continue; } std::map >::iterator p = sinkToSourceIndices.find(leaf); assert(p != sinkToSourceIndices.end()); std::vector &sourceNodes = p->second; for (std::vector::iterator r = sourceNodes.begin(); r != sourceNodes.end(); ++r) { int srcIndex = *r; m_alignment.push_back(std::make_pair(srcIndex, i)); } if (leaf->GetType() == TREE) { m_nonTermAlignment[nonTermSinkToSourceIndex[leaf]] = j++; } } } } // namespace GHKM } // namespace Moses