diff options
author | Phil Williams <philip.williams@mac.com> | 2015-06-03 16:09:49 +0300 |
---|---|---|
committer | Phil Williams <philip.williams@mac.com> | 2015-06-03 16:09:49 +0300 |
commit | 9097fd8965e039f9c5c889d76a614dd4eda19651 (patch) | |
tree | c490c80024dedb9deda60bdf9a1e8d5deee886e9 /phrase-extract | |
parent | ed321791a75c6177b218a0098d184c308bc9c561 (diff) |
Ongoing moses/phrase-extract refactoring
Diffstat (limited to 'phrase-extract')
-rw-r--r-- | phrase-extract/SyntaxNodeCollection.cpp | 20 | ||||
-rw-r--r-- | phrase-extract/SyntaxNodeCollection.h | 44 |
2 files changed, 39 insertions, 25 deletions
diff --git a/phrase-extract/SyntaxNodeCollection.cpp b/phrase-extract/SyntaxNodeCollection.cpp index 7421cc0ed..356c49bf4 100644 --- a/phrase-extract/SyntaxNodeCollection.cpp +++ b/phrase-extract/SyntaxNodeCollection.cpp @@ -47,7 +47,7 @@ SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos, SyntaxNode* newNode = new SyntaxNode(label, startPos, endPos); m_nodes.push_back( newNode ); m_index[ startPos ][ endPos ].push_back( newNode ); - m_size = std::max(endPos+1, m_size); + m_numWords = std::max(endPos+1, m_numWords); return newNode; } @@ -56,8 +56,8 @@ ParentNodes SyntaxNodeCollection::Parse() ParentNodes parents; // looping through all spans of size >= 2 - for( int length=2; length<=m_size; length++ ) { - for( int startPos = 0; startPos <= m_size-length; startPos++ ) { + for( int length=2; length<=m_numWords; length++ ) { + for( int startPos = 0; startPos <= m_numWords-length; startPos++ ) { if (HasNode( startPos, startPos+length-1 )) { // processing one (parent) span @@ -96,13 +96,14 @@ bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const return GetNodes( startPos, endPos).size() > 0; } -const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes( int startPos, int endPos ) const +const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes( + int startPos, int endPos ) const { - SyntaxTreeIndexIterator startIndex = m_index.find( startPos ); + NodeIndex::const_iterator startIndex = m_index.find( startPos ); if (startIndex == m_index.end() ) return m_emptyNode; - SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos ); + InnerNodeIndex::const_iterator endIndex = startIndex->second.find( endPos ); if (endIndex == startIndex->second.end()) return m_emptyNode; @@ -120,14 +121,15 @@ std::auto_ptr<SyntaxTree> SyntaxNodeCollection::ExtractTree() } // Connect the SyntaxTrees. - typedef SyntaxTreeIndex2::const_reverse_iterator InnerIterator; + typedef NodeIndex::const_iterator OuterIterator; + typedef InnerNodeIndex::const_reverse_iterator InnerIterator; SyntaxTree *root = 0; SyntaxNode *prevNode = 0; SyntaxTree *prevTree = 0; // Iterate over all start indices from lowest to highest. - for (SyntaxTreeIndexIterator p = m_index.begin(); p != m_index.end(); ++p) { - const SyntaxTreeIndex2 &inner = p->second; + for (OuterIterator p = m_index.begin(); p != m_index.end(); ++p) { + const InnerNodeIndex &inner = p->second; // Iterate over all end indices from highest to lowest. for (InnerIterator q = inner.rbegin(); q != inner.rend(); ++q) { const std::vector<SyntaxNode*> &nodes = q->second; diff --git a/phrase-extract/SyntaxNodeCollection.h b/phrase-extract/SyntaxNodeCollection.h index c8ca67d3d..060192980 100644 --- a/phrase-extract/SyntaxNodeCollection.h +++ b/phrase-extract/SyntaxNodeCollection.h @@ -34,38 +34,50 @@ namespace MosesTraining typedef std::vector< int > SplitPoints; typedef std::vector< SplitPoints > ParentNodes; +/** A collection of SyntaxNodes organized by start and end position. + * + */ class SyntaxNodeCollection { -protected: - std::vector< SyntaxNode* > m_nodes; - - typedef std::map< int, std::vector< SyntaxNode* > > SyntaxTreeIndex2; - typedef SyntaxTreeIndex2::const_iterator SyntaxTreeIndexIterator2; - typedef std::map< int, SyntaxTreeIndex2 > SyntaxTreeIndex; - typedef SyntaxTreeIndex::const_iterator SyntaxTreeIndexIterator; - SyntaxTreeIndex m_index; - int m_size; - std::vector< SyntaxNode* > m_emptyNode; - public: - SyntaxNodeCollection() : m_size(0) {} + SyntaxNodeCollection() : m_numWords(0) {} ~SyntaxNodeCollection(); + //! Construct and insert a new SyntaxNode. SyntaxNode *AddNode( int startPos, int endPos, const std::string &label ); + // TODO Rename (and move?) ParentNodes Parse(); + + //! Return true iff there are one or more SyntaxNodes with the given span. bool HasNode( int startPos, int endPos ) const; + + //! Lookup the SyntaxNodes for a given span. const std::vector< SyntaxNode* >& GetNodes( int startPos, int endPos ) const; - const std::vector< SyntaxNode* >& GetAllNodes() { - return m_nodes; - }; + + //! Get a vector of pointers to all SyntaxNodes (unordered). + const std::vector< SyntaxNode* >& GetAllNodes() { return m_nodes; }; + size_t GetNumWords() const { - return m_size; + return m_numWords; } void Clear(); std::auto_ptr<SyntaxTree> ExtractTree(); + +private: + typedef std::map< int, std::vector< SyntaxNode* > > InnerNodeIndex; + typedef std::map< int, InnerNodeIndex > NodeIndex; + + // Not copyable. + SyntaxNodeCollection(const SyntaxNodeCollection &); + SyntaxNodeCollection &operator=(const SyntaxNodeCollection &); + + std::vector< SyntaxNode* > m_nodes; + NodeIndex m_index; + int m_numWords; + std::vector< SyntaxNode* > m_emptyNode; }; } // namespace MosesTraining |