Welcome to mirror list, hosted at ThFree Co, Russian Federation.

ReorderingConstraint.cpp « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: aede7002d4f002b3f5a8994a914b99faf9ceadfe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// $Id$
// vim:tabstop=2

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2008 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#include "ReorderingConstraint.h"
#include "InputType.h"
#include "StaticData.h"

namespace Moses
{

//! allocate memory for reordering walls
void ReorderingConstraint::InitializeWalls(size_t size)
{
  m_size = size;
  m_wall      = (bool*)   malloc(sizeof(bool) * size);
  m_localWall = (size_t*) malloc(sizeof(size_t) * size);

  for (size_t pos = 0 ; pos < m_size ; pos++) {
    m_wall[pos] = false;
    m_localWall[pos] = NOT_A_ZONE;
  }
}


//! set value at a particular position
void ReorderingConstraint::SetWall( size_t pos, bool value )
{
  VERBOSE(3,"SETTING reordering wall at position " << pos << std::endl);
  m_wall[pos] = value;
  m_active = true;
}

//! has to be called to localized walls
void ReorderingConstraint::FinalizeWalls()
{
  for(size_t z = 0; z < m_zone.size(); z++ ) {
    const size_t startZone = m_zone[z][0];
    const size_t endZone = m_zone[z][1];// note: wall after endZone is not local
    for( size_t pos = startZone; pos < endZone; pos++ ) {
      if (m_wall[ pos ]) {
        m_localWall[ pos ] = z;
        m_wall[ pos ] = false;
        VERBOSE(3,"SETTING local wall " << pos << std::endl);
      }
      // enforce that local walls only apply to innermost zone
      else if (m_localWall[ pos ] != NOT_A_ZONE) {
        size_t assigned_z = m_localWall[ pos ];
        if ((m_zone[assigned_z][0] < startZone) ||
            (m_zone[assigned_z][1] > endZone)) {
          m_localWall[ pos ] = z;
        }
      }
    }
  }
}

//! set walls based on "-monotone-at-punctuation" flag
void ReorderingConstraint::SetMonotoneAtPunctuation( const Phrase &sentence )
{
  for( size_t i=0; i<sentence.GetSize(); i++ ) {
    const Word& word = sentence.GetWord(i);
    if (word[0]->GetString() == "," ||
        word[0]->GetString() == "." ||
        word[0]->GetString() == "!" ||
        word[0]->GetString() == "?" ||
        word[0]->GetString() == ":" ||
        word[0]->GetString() == ";" ||
        word[0]->GetString() == "\"") {
      // set wall before and after punc, but not at sentence start, end
      if (i>0 && i<m_size-1) SetWall( i, true );
      if (i>1)               SetWall( i-1, true );
    }
  }
}

//! set a reordering zone (once entered, need to finish)
void ReorderingConstraint::SetZone( size_t startPos, size_t endPos )
{
  VERBOSE(3,"SETTING zone " << startPos << "-" << endPos << std::endl);
  std::vector< size_t > newZone;
  newZone.push_back( startPos );
  newZone.push_back( endPos );
  m_zone.push_back( newZone );
  m_active = true;
}

//! check if the current hypothesis extension violates reordering constraints
bool ReorderingConstraint::Check( const WordsBitmap &bitmap, size_t startPos, size_t endPos ) const
{
  // nothing to be checked, we are done
  if (! IsActive() ) return true;

  VERBOSE(3,"CHECK " << bitmap << " " << startPos << "-" << endPos);

  // check walls
  size_t firstGapPos = bitmap.GetFirstGapPos();
  // filling first gap -> no wall violation possible
  if (firstGapPos != startPos) {
    // if there is a wall before the last word,
    // we created a gap while moving through wall
    // -> violation
    for( size_t pos = firstGapPos; pos < endPos; pos++ ) {
      if( GetWall( pos ) ) {
        VERBOSE(3," hitting wall " << pos << std::endl);
        return false;
      }
    }
  }

  // monotone -> no violation possible
  size_t lastPos = bitmap.GetLastPos();
  if ((lastPos == NOT_FOUND && startPos == 0) || // nothing translated
      (firstGapPos > lastPos &&  // no gaps
       firstGapPos == startPos)) { // translating first empty word
    VERBOSE(3," montone, fine." << std::endl);
    return true;
  }

  // check zones
  for(size_t z = 0; z < m_zone.size(); z++ ) {
    const size_t startZone = m_zone[z][0];
    const size_t endZone = m_zone[z][1];

    // fine, if translation has not reached zone yet and phrase outside zone
    if (lastPos < startZone && ( endPos < startZone || startPos > endZone ) ) {
      continue;
    }

    // already completely translated zone, no violations possible
    if (firstGapPos > endZone) {
      continue;
    }

    // some words are translated beyond the start
    // let's look closer if some are in the zone
    size_t numWordsInZoneTranslated = 0;
    if (lastPos >= startZone) {
      for(size_t pos = startZone; pos <= endZone; pos++ ) {
        if( bitmap.GetValue( pos ) ) {
          numWordsInZoneTranslated++;
        }
      }
    }

    // all words in zone translated, no violation possible
    if (numWordsInZoneTranslated == endZone-startZone+1) {
      continue;
    }

    // flag if this is an active zone
    bool activeZone = (numWordsInZoneTranslated > 0);

    // fine, if zone completely untranslated and phrase outside zone
    if (!activeZone && ( endPos < startZone || startPos > endZone ) ) {
      continue;
    }

    // violation, if phrase completely outside active zone
    if (activeZone && ( endPos < startZone || startPos > endZone ) ) {
      VERBOSE(3," outside active zone" << std::endl);
      return false;
    }

    // ok, this is what we know now:
    // * the phrase is in the zone (at least partially)
    // * either zone is already active, or it becomes active now


    // check, if we are setting us up for a dead end due to distortion limits
    size_t distortionLimit = (size_t)StaticData::Instance().GetMaxDistortion();
    if (startPos != firstGapPos && endZone-firstGapPos >= distortionLimit) {
      VERBOSE(3," dead end due to distortion limit" << std::endl);
      return false;
    }

    // let us check on phrases that are partially outside

    // phrase overlaps at the beginning, always ok
    if (startPos <= startZone) {
      continue;
    }

    // phrase goes beyond end, has to fill zone completely
    if (endPos > endZone) {
      if (endZone-startPos+1 < // num. words filled in by phrase
          endZone-startZone+1-numWordsInZoneTranslated) { // num. untranslated
        VERBOSE(3," overlap end, but not completing" << std::endl);
        return false;
      } else {
        continue;
      }
    }

    // now we are down to phrases that are completely inside the zone
    // we have to check local walls
    bool seenUntranslatedBeforeStartPos = false;
    for(size_t pos = startZone; pos < endZone && pos < endPos; pos++ ) {
      // be careful when there is a gap before phrase
      if( !bitmap.GetValue( pos ) // untranslated word
          && pos < startPos ) {   // before startPos
        seenUntranslatedBeforeStartPos = true;
      }
      if( seenUntranslatedBeforeStartPos && GetLocalWall( pos, z ) ) {
        VERBOSE(3," local wall violation" << std::endl);
        return false;
      }
    }

    // passed all checks for this zone, on to the next one
  }

  // passed all checks, no violations
  VERBOSE(3," fine." << std::endl);
  return true;
}

}