Welcome to mirror list, hosted at ThFree Co, Russian Federation.

ComposedRule.cpp « extract-ghkm « phrase-extract « training « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 7a7fba106fe7a7a5b082cefc4789856ac95d6127 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/***********************************************************************
 Moses - statistical machine translation system
 Copyright (C) 2006-2011 University of Edinburgh
 
 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Lesser General Public
 License as published by the Free Software Foundation; either
 version 2.1 of the License, or (at your option) any later version.
 
 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Lesser General Public License for more details.
 
 You should have received a copy of the GNU Lesser General Public
 License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#include "ComposedRule.h"

#include "Node.h"
#include "Options.h"
#include "Subgraph.h"

#include <set>
#include <vector>
#include <queue>

namespace Moses {
namespace GHKM {

ComposedRule::ComposedRule(const Subgraph &baseRule)
    : m_baseRule(baseRule)
    , m_depth(baseRule.GetDepth())
    , m_size(baseRule.GetSize())
    , m_nodeCount(baseRule.GetNodeCount())
{
  const std::set<const Node *> &leaves = baseRule.GetLeaves();
  for (std::set<const Node *>::const_iterator p = leaves.begin();
       p != leaves.end(); ++p) {
    if ((*p)->GetType() == TREE) {
      m_openAttachmentPoints.push(*p);
    }
  }
}

ComposedRule::ComposedRule(const ComposedRule &other, const Subgraph &rule,
                           int depth)
    : m_baseRule(other.m_baseRule)
    , m_attachedRules(other.m_attachedRules)
    , m_openAttachmentPoints(other.m_openAttachmentPoints)
    , m_depth(depth)
    , m_size(other.m_size+rule.GetSize())
    , m_nodeCount(other.m_nodeCount+rule.GetNodeCount()-1)
{
  m_attachedRules.push_back(&rule);
  m_openAttachmentPoints.pop();
}

const Node *ComposedRule::GetOpenAttachmentPoint()
{
  return m_openAttachmentPoints.empty() ? 0 : m_openAttachmentPoints.front();
}

void ComposedRule::CloseAttachmentPoint()
{
  assert(!m_openAttachmentPoints.empty());
  m_attachedRules.push_back(0);
  m_openAttachmentPoints.pop();
}

ComposedRule *ComposedRule::AttemptComposition(const Subgraph &rule,
                                               const Options &options) const
{
  // The smallest possible rule fragment should be rooted at a tree node.
  // Note that this differs from the original GHKM definition.
  assert(rule.GetRoot()->GetType() == TREE);

  // Check the node count of the proposed rule.
  if (m_nodeCount+rule.GetNodeCount()-1 > options.maxNodes) {
    return 0;
  }

  // Check the size of the proposed rule.
  if (m_size+rule.GetSize() > options.maxRuleSize) {
    return 0;
  }

  // Determine the depth of the proposed rule and test whether it exceeds the
  // limit.
  int attachmentPointDepth = 0;
  const Node *n = rule.GetRoot();
  while (n != m_baseRule.GetRoot()) {
    assert(n->GetParents().size() == 1);
    n = n->GetParents()[0];
    ++attachmentPointDepth;
  }
  int newDepth = std::max(m_depth, attachmentPointDepth+rule.GetDepth());
  if (newDepth > options.maxRuleDepth) {
    return 0;
  }

  return new ComposedRule(*this, rule, newDepth);
}

Subgraph ComposedRule::CreateSubgraph()
{
  std::set<const Node *> leaves;
  const std::set<const Node *> &baseLeaves = m_baseRule.GetLeaves();
  int i = 0;
  for (std::set<const Node *>::const_iterator p = baseLeaves.begin();
       p != baseLeaves.end(); ++p) {
    const Node *baseLeaf = *p;
    if (baseLeaf->GetType() == TREE && i < m_attachedRules.size()) {
      const Subgraph *attachedRule = m_attachedRules[i++];
      if (attachedRule) {
        leaves.insert(attachedRule->GetLeaves().begin(),
                      attachedRule->GetLeaves().end());
        continue;
      }
    }
    leaves.insert(baseLeaf);
  }
  return Subgraph(m_baseRule.GetRoot(), leaves);
}

}  // namespace GHKM
}  // namespace Moses