Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhil Williams <philip.williams@mac.com>2015-06-03 16:20:00 +0300
committerPhil Williams <philip.williams@mac.com>2015-06-03 16:20:00 +0300
commit8653bd81590d1f9f658d9560458dc72d9556e197 (patch)
tree6934b2416f01b35c20d43e2f0a5084d3f77951bb /phrase-extract
parent9097fd8965e039f9c5c889d76a614dd4eda19651 (diff)
Ongoing moses/phrase-extract refactoring
Diffstat (limited to 'phrase-extract')
-rw-r--r--phrase-extract/SyntaxNodeCollection.cpp40
-rw-r--r--phrase-extract/SyntaxNodeCollection.h6
-rw-r--r--phrase-extract/relax-parse-main.cpp44
-rw-r--r--phrase-extract/relax-parse.h10
4 files changed, 50 insertions, 50 deletions
diff --git a/phrase-extract/SyntaxNodeCollection.cpp b/phrase-extract/SyntaxNodeCollection.cpp
index 356c49bf4..0a344fcd7 100644
--- a/phrase-extract/SyntaxNodeCollection.cpp
+++ b/phrase-extract/SyntaxNodeCollection.cpp
@@ -51,46 +51,6 @@ SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
return newNode;
}
-ParentNodes SyntaxNodeCollection::Parse()
-{
- ParentNodes parents;
-
- // looping through all spans of size >= 2
- for( int length=2; length<=m_numWords; length++ ) {
- for( int startPos = 0; startPos <= m_numWords-length; startPos++ ) {
- if (HasNode( startPos, startPos+length-1 )) {
- // processing one (parent) span
-
- //std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
- SplitPoints splitPoints;
- splitPoints.push_back( startPos );
- //std::cerr << " " << startPos;
-
- int first = 1;
- int covered = 0;
- int found_somehing = 1; // break loop if nothing found
- while( covered < length && found_somehing ) {
- // find largest covering subspan (child)
- // starting at last covered position
- found_somehing = 0;
- for( int midPos=length-first; midPos>covered; midPos-- ) {
- if( HasNode( startPos+covered, startPos+midPos-1 ) ) {
- covered = midPos;
- splitPoints.push_back( startPos+covered );
- // std::cerr << " " << ( startPos+covered );
- first = 0;
- found_somehing = 1;
- }
- }
- }
- // std::cerr << std::endl;
- parents.push_back( splitPoints );
- }
- }
- }
- return parents;
-}
-
bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
{
return GetNodes( startPos, endPos).size() > 0;
diff --git a/phrase-extract/SyntaxNodeCollection.h b/phrase-extract/SyntaxNodeCollection.h
index 060192980..8de151c55 100644
--- a/phrase-extract/SyntaxNodeCollection.h
+++ b/phrase-extract/SyntaxNodeCollection.h
@@ -31,9 +31,6 @@
namespace MosesTraining
{
-typedef std::vector< int > SplitPoints;
-typedef std::vector< SplitPoints > ParentNodes;
-
/** A collection of SyntaxNodes organized by start and end position.
*
*/
@@ -47,9 +44,6 @@ public:
//! Construct and insert a new SyntaxNode.
SyntaxNode *AddNode( int startPos, int endPos, const std::string &label );
- // TODO Rename (and move?)
- ParentNodes Parse();
-
//! Return true iff there are one or more SyntaxNodes with the given span.
bool HasNode( int startPos, int endPos ) const;
diff --git a/phrase-extract/relax-parse-main.cpp b/phrase-extract/relax-parse-main.cpp
index 4b5c2d573..f7a2a271b 100644
--- a/phrase-extract/relax-parse-main.cpp
+++ b/phrase-extract/relax-parse-main.cpp
@@ -50,7 +50,7 @@ int main(int argc, char* argv[])
// output tree
// cerr << "BEFORE:" << endl << tree;
- ParentNodes parents = tree.Parse();
+ ParentNodes parents = determineSplitPoints(tree);
// execute selected grammar relaxation schemes
if (leftBinarizeFlag)
@@ -271,3 +271,45 @@ void SAMT( SyntaxNodeCollection &tree, ParentNodes &parents )
tree.AddNode( nodes[i]->start, nodes[i]->end, nodes[i]->label);
}
}
+
+ParentNodes determineSplitPoints(const SyntaxNodeCollection &nodeColl)
+{
+ ParentNodes parents;
+
+ const std::size_t numWords = nodeColl.GetNumWords();
+
+ // looping through all spans of size >= 2
+ for( int length=2; length<=numWords; length++ ) {
+ for( int startPos = 0; startPos <= numWords-length; startPos++ ) {
+ if (nodeColl.HasNode( startPos, startPos+length-1 )) {
+ // processing one (parent) span
+
+ //std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
+ SplitPoints splitPoints;
+ splitPoints.push_back( startPos );
+ //std::cerr << " " << startPos;
+
+ int first = 1;
+ int covered = 0;
+ int found_somehing = 1; // break loop if nothing found
+ while( covered < length && found_somehing ) {
+ // find largest covering subspan (child)
+ // starting at last covered position
+ found_somehing = 0;
+ for( int midPos=length-first; midPos>covered; midPos-- ) {
+ if( nodeColl.HasNode( startPos+covered, startPos+midPos-1 ) ) {
+ covered = midPos;
+ splitPoints.push_back( startPos+covered );
+ // std::cerr << " " << ( startPos+covered );
+ first = 0;
+ found_somehing = 1;
+ }
+ }
+ }
+ // std::cerr << std::endl;
+ parents.push_back( splitPoints );
+ }
+ }
+ }
+ return parents;
+}
diff --git a/phrase-extract/relax-parse.h b/phrase-extract/relax-parse.h
index a00aa6deb..7c412646a 100644
--- a/phrase-extract/relax-parse.h
+++ b/phrase-extract/relax-parse.h
@@ -37,10 +37,14 @@ bool leftBinarizeFlag = false;
bool rightBinarizeFlag = false;
char SAMTLevel = 0;
+typedef std::vector< int > SplitPoints;
+typedef std::vector< SplitPoints > ParentNodes;
+
// functions
void init(int argc, char* argv[]);
+ParentNodes determineSplitPoints(const MosesTraining::SyntaxNodeCollection &);
void store( MosesTraining::SyntaxNodeCollection &tree, const std::vector<std::string> &words );
-void LeftBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
-void RightBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
-void SAMT( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
+void LeftBinarize( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
+void RightBinarize( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );
+void SAMT( MosesTraining::SyntaxNodeCollection &tree, ParentNodes &parents );