Welcome to mirror list, hosted at ThFree Co, Russian Federation.

SyntaxTree.cpp « phrase-extract « training « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 2601cdb4c64f87e5262282662686d60303d1284b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// $Id: SyntaxTree.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2

/***********************************************************************
  Moses - factored phrase-based language decoder
  Copyright (C) 2009 University of Edinburgh

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/


#include "SyntaxTree.h"
#include <iostream>

SyntaxTree::~SyntaxTree()
{
  // loop through all m_nodes, delete them
  for(int i=0; i<m_nodes.size(); i++) {
    delete m_nodes[i];
  }
}

void SyntaxTree::AddNode( int startPos, int endPos, std::string label )
{
  SyntaxNode* newNode = new SyntaxNode( startPos, endPos, label );
  m_nodes.push_back( newNode );
  m_index[ startPos ][ endPos ].push_back( newNode );
}

ParentNodes SyntaxTree::Parse()
{
  ParentNodes parents;

  int size = m_index.size();

  // looping through all spans of size >= 2
  for( int length=2; length<=size; length++ ) {
    for( int startPos = 0; startPos <= size-length; startPos++ ) {
      if (HasNode( startPos, startPos+length-1 )) {
        // processing one (parent) span

        //std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
        SplitPoints splitPoints;
        splitPoints.push_back( startPos );
        //std::cerr << " " << startPos;

        int first = 1;
        int covered = 0;
        while( covered < length ) {
          // find largest covering subspan (child)
          // starting at last covered position
          for( int midPos=length-first; midPos>covered; midPos-- ) {
            if( HasNode( startPos+covered, startPos+midPos-1 ) ) {
              covered = midPos;
              splitPoints.push_back( startPos+covered );
              // std::cerr << " " << ( startPos+covered );
              first = 0;
            }
          }
        }
        // std::cerr << std::endl;
        parents.push_back( splitPoints );
      }
    }
  }
  return parents;
}

bool SyntaxTree::HasNode( int startPos, int endPos ) const
{
  return GetNodes( startPos, endPos).size() > 0;
}

const std::vector< SyntaxNode* >& SyntaxTree::GetNodes( int startPos, int endPos ) const
{
  SyntaxTreeIndexIterator startIndex = m_index.find( startPos );
  if (startIndex == m_index.end() )
    return m_emptyNode;

  SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos );
  if (endIndex == startIndex->second.end())
    return m_emptyNode;

  return endIndex->second;
}

// for printing out tree
std::string SyntaxTree::ToString() const
{
  std::stringstream out;
  out << *this;
  return out.str();
}

std::ostream& operator<<(std::ostream& os, const SyntaxTree& t)
{
  int size = t.m_index.size();
  for(size_t length=1; length<=size; length++) {
    for(size_t space=0; space<length; space++) {
      os << "    ";
    }
    for(size_t start=0; start<=size-length; start++) {

      if (t.HasNode( start, start+(length-1) )) {
        std::string label = t.GetNodes( start, start+(length-1) )[0]->GetLabel() + "#######";

        os << label.substr(0,7) << " ";
      } else {
        os << "------- ";
      }
    }
    os << std::endl;
  }
  return os;
}