Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhil Williams <philip.williams@mac.com>2015-06-03 16:09:49 +0300
committerPhil Williams <philip.williams@mac.com>2015-06-03 16:09:49 +0300
commit9097fd8965e039f9c5c889d76a614dd4eda19651 (patch)
treec490c80024dedb9deda60bdf9a1e8d5deee886e9 /phrase-extract
parented321791a75c6177b218a0098d184c308bc9c561 (diff)
Ongoing moses/phrase-extract refactoring
Diffstat (limited to 'phrase-extract')
-rw-r--r--phrase-extract/SyntaxNodeCollection.cpp20
-rw-r--r--phrase-extract/SyntaxNodeCollection.h44
2 files changed, 39 insertions, 25 deletions
diff --git a/phrase-extract/SyntaxNodeCollection.cpp b/phrase-extract/SyntaxNodeCollection.cpp
index 7421cc0ed..356c49bf4 100644
--- a/phrase-extract/SyntaxNodeCollection.cpp
+++ b/phrase-extract/SyntaxNodeCollection.cpp
@@ -47,7 +47,7 @@ SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
SyntaxNode* newNode = new SyntaxNode(label, startPos, endPos);
m_nodes.push_back( newNode );
m_index[ startPos ][ endPos ].push_back( newNode );
- m_size = std::max(endPos+1, m_size);
+ m_numWords = std::max(endPos+1, m_numWords);
return newNode;
}
@@ -56,8 +56,8 @@ ParentNodes SyntaxNodeCollection::Parse()
ParentNodes parents;
// looping through all spans of size >= 2
- for( int length=2; length<=m_size; length++ ) {
- for( int startPos = 0; startPos <= m_size-length; startPos++ ) {
+ for( int length=2; length<=m_numWords; length++ ) {
+ for( int startPos = 0; startPos <= m_numWords-length; startPos++ ) {
if (HasNode( startPos, startPos+length-1 )) {
// processing one (parent) span
@@ -96,13 +96,14 @@ bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
return GetNodes( startPos, endPos).size() > 0;
}
-const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes( int startPos, int endPos ) const
+const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes(
+ int startPos, int endPos ) const
{
- SyntaxTreeIndexIterator startIndex = m_index.find( startPos );
+ NodeIndex::const_iterator startIndex = m_index.find( startPos );
if (startIndex == m_index.end() )
return m_emptyNode;
- SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos );
+ InnerNodeIndex::const_iterator endIndex = startIndex->second.find( endPos );
if (endIndex == startIndex->second.end())
return m_emptyNode;
@@ -120,14 +121,15 @@ std::auto_ptr<SyntaxTree> SyntaxNodeCollection::ExtractTree()
}
// Connect the SyntaxTrees.
- typedef SyntaxTreeIndex2::const_reverse_iterator InnerIterator;
+ typedef NodeIndex::const_iterator OuterIterator;
+ typedef InnerNodeIndex::const_reverse_iterator InnerIterator;
SyntaxTree *root = 0;
SyntaxNode *prevNode = 0;
SyntaxTree *prevTree = 0;
// Iterate over all start indices from lowest to highest.
- for (SyntaxTreeIndexIterator p = m_index.begin(); p != m_index.end(); ++p) {
- const SyntaxTreeIndex2 &inner = p->second;
+ for (OuterIterator p = m_index.begin(); p != m_index.end(); ++p) {
+ const InnerNodeIndex &inner = p->second;
// Iterate over all end indices from highest to lowest.
for (InnerIterator q = inner.rbegin(); q != inner.rend(); ++q) {
const std::vector<SyntaxNode*> &nodes = q->second;
diff --git a/phrase-extract/SyntaxNodeCollection.h b/phrase-extract/SyntaxNodeCollection.h
index c8ca67d3d..060192980 100644
--- a/phrase-extract/SyntaxNodeCollection.h
+++ b/phrase-extract/SyntaxNodeCollection.h
@@ -34,38 +34,50 @@ namespace MosesTraining
typedef std::vector< int > SplitPoints;
typedef std::vector< SplitPoints > ParentNodes;
+/** A collection of SyntaxNodes organized by start and end position.
+ *
+ */
class SyntaxNodeCollection
{
-protected:
- std::vector< SyntaxNode* > m_nodes;
-
- typedef std::map< int, std::vector< SyntaxNode* > > SyntaxTreeIndex2;
- typedef SyntaxTreeIndex2::const_iterator SyntaxTreeIndexIterator2;
- typedef std::map< int, SyntaxTreeIndex2 > SyntaxTreeIndex;
- typedef SyntaxTreeIndex::const_iterator SyntaxTreeIndexIterator;
- SyntaxTreeIndex m_index;
- int m_size;
- std::vector< SyntaxNode* > m_emptyNode;
-
public:
- SyntaxNodeCollection() : m_size(0) {}
+ SyntaxNodeCollection() : m_numWords(0) {}
~SyntaxNodeCollection();
+ //! Construct and insert a new SyntaxNode.
SyntaxNode *AddNode( int startPos, int endPos, const std::string &label );
+ // TODO Rename (and move?)
ParentNodes Parse();
+
+ //! Return true iff there are one or more SyntaxNodes with the given span.
bool HasNode( int startPos, int endPos ) const;
+
+ //! Lookup the SyntaxNodes for a given span.
const std::vector< SyntaxNode* >& GetNodes( int startPos, int endPos ) const;
- const std::vector< SyntaxNode* >& GetAllNodes() {
- return m_nodes;
- };
+
+ //! Get a vector of pointers to all SyntaxNodes (unordered).
+ const std::vector< SyntaxNode* >& GetAllNodes() { return m_nodes; };
+
size_t GetNumWords() const {
- return m_size;
+ return m_numWords;
}
void Clear();
std::auto_ptr<SyntaxTree> ExtractTree();
+
+private:
+ typedef std::map< int, std::vector< SyntaxNode* > > InnerNodeIndex;
+ typedef std::map< int, InnerNodeIndex > NodeIndex;
+
+ // Not copyable.
+ SyntaxNodeCollection(const SyntaxNodeCollection &);
+ SyntaxNodeCollection &operator=(const SyntaxNodeCollection &);
+
+ std::vector< SyntaxNode* > m_nodes;
+ NodeIndex m_index;
+ int m_numWords;
+ std::vector< SyntaxNode* > m_emptyNode;
};
} // namespace MosesTraining