diff options
Diffstat (limited to 'scripts/training/phrase-extract/SyntaxTree.cpp')
-rw-r--r-- | scripts/training/phrase-extract/SyntaxTree.cpp | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/scripts/training/phrase-extract/SyntaxTree.cpp b/scripts/training/phrase-extract/SyntaxTree.cpp new file mode 100644 index 000000000..74088acd4 --- /dev/null +++ b/scripts/training/phrase-extract/SyntaxTree.cpp @@ -0,0 +1,157 @@ +// $Id: SyntaxTree.cpp 1960 2008-12-15 12:52:38Z phkoehn $ +// vim:tabstop=2 + +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2009 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + + +#include "SyntaxTree.h" +#include <iostream> + +SyntaxTree::~SyntaxTree() +{ + // loop through all m_nodes, delete them + for(int i=0; i<m_nodes.size(); i++) + { + delete m_nodes[i]; + } +} + +void SyntaxTree::AddNode( int startPos, int endPos, std::string label ) +{ + SyntaxNode* newNode = new SyntaxNode( startPos, endPos, label ); + m_nodes.push_back( newNode ); + m_index[ startPos ][ endPos ].push_back( newNode ); +} + +ParentNodes SyntaxTree::Parse() { + ParentNodes parents; + + int size = m_index.size(); + + // looping through all spans of size >= 2 + for( int length=2; length<=size; length++ ) + { + for( int startPos = 0; startPos <= size-length; startPos++ ) + { + if (HasNode( startPos, startPos+length-1 )) + { + // processing one (parent) span + + //std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":"; + SplitPoints splitPoints; + splitPoints.push_back( startPos ); + //std::cerr << " " << startPos; + + int first = 1; + int covered = 0; + while( covered < length ) + { + // find largest covering subspan (child) + // starting at last covered position + for( int midPos=length-first; midPos>covered; midPos-- ) + { + if( HasNode( startPos+covered, startPos+midPos-1 ) ) + { + covered = midPos; + splitPoints.push_back( startPos+covered ); + // std::cerr << " " << ( startPos+covered ); + first = 0; + } + } + } + // std::cerr << std::endl; + parents.push_back( splitPoints ); + } + } + } + return parents; +} + +bool SyntaxTree::HasNode( int startPos, int endPos ) const +{ + return GetNodes( startPos, endPos).size() > 0; +} + +const std::vector< SyntaxNode* >& SyntaxTree::GetNodes( int startPos, int endPos ) const +{ + SyntaxTreeIndexIterator startIndex = m_index.find( startPos ); + if (startIndex == m_index.end() ) + return m_emptyNode; + + SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos ); + if (endIndex == startIndex->second.end()) + return m_emptyNode; + + return endIndex->second; +} + +// for printing out tree +std::string SyntaxTree::ToString() const +{ + std::stringstream out; + out << *this; + return out.str(); +} + +void SyntaxTree::AddDefaultNonTerms() +{ + size_t phraseSize = GetNumWords(); + for (size_t startPos = 0; startPos < phraseSize; ++startPos) + { + for (size_t endPos = startPos; endPos < phraseSize; ++endPos) + { + const std::vector< SyntaxNode* > &nodes = GetNodes(startPos, endPos); + if (nodes.size() == 0) + { + AddNode(startPos, endPos, "X"); + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const SyntaxTree& t) +{ + int size = t.m_index.size(); + for(size_t length=1; length<=size; length++) + { + for(size_t space=0; space<length; space++) + { + os << " "; + } + for(size_t start=0; start<=size-length; start++) + { + + if (t.HasNode( start, start+(length-1) )) + { + std::string label = t.GetNodes( start, start+(length-1) )[0]->GetLabel() + "#######"; + + os << label.substr(0,7) << " "; + } + else + { + os << "------- "; + } + } + os << std::endl; + } + return os; +} + + |