Welcome to mirror list, hosted at ThFree Co, Russian Federation.

PhraseOrientation.h « extract-ghkm « phrase-extract - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: d826c127ca1d68bc18bf3edab690fe37ae6204a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

/***********************************************************************
 Moses - statistical machine translation system
 Copyright (C) 2006-2011 University of Edinburgh

 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Lesser General Public
 License as published by the Free Software Foundation; either
 version 2.1 of the License, or (at your option) any later version.

 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Lesser General Public License for more details.

 You should have received a copy of the GNU Lesser General Public
 License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#pragma once

#include "Alignment.h"
#include "moses/AlignmentInfo.h"

#include <map>
#include <set>
#include <string>
#include <vector>
#include <boost/unordered_map.hpp>

namespace Moses
{
namespace GHKM
{

// The key of the map is the English index and the value is a set of the source ones
typedef std::map <int, std::set<int> > HSentenceVertices;


class PhraseOrientation
{
public:

  enum REO_MODEL_TYPE {REO_MODEL_TYPE_MSD, REO_MODEL_TYPE_MSLR, REO_MODEL_TYPE_MONO};
  enum REO_CLASS {REO_CLASS_LEFT, REO_CLASS_RIGHT, REO_CLASS_DLEFT, REO_CLASS_DRIGHT, REO_CLASS_UNKNOWN};
  enum REO_DIR {REO_DIR_L2R, REO_DIR_R2L, REO_DIR_BIDIR};


  PhraseOrientation(int sourceSize,
                    int targetSize,
                    const Alignment &alignment);

  PhraseOrientation(int sourceSize,
                    int targetSize,
                    const AlignmentInfo &alignTerm,
                    const AlignmentInfo &alignNonTerm);

  REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
  REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
  const std::string GetOrientationInfoString(int startF, int endF, REO_DIR direction=REO_DIR_BIDIR) const;
  const std::string GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction=REO_DIR_BIDIR) const;
  static const std::string GetOrientationString(const REO_CLASS orient, const REO_MODEL_TYPE modelType=REO_MODEL_TYPE_MSLR);
  static void WriteOrientation(std::ostream& out, const REO_CLASS orient, const REO_MODEL_TYPE modelType=REO_MODEL_TYPE_MSLR);
  void IncrementPriorCount(REO_DIR direction, REO_CLASS orient, float increment);
  static void WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE modelType=REO_MODEL_TYPE_MSLR);
  bool SourceSpanIsAligned(int index1, int index2) const;
  bool TargetSpanIsAligned(int index1, int index2) const;

private:

  void Init(int sourceSize, int targetSize,
            const std::vector<std::vector<int> > &alignedToT,
            const std::vector<std::vector<int> > &alignedToS,
            const std::vector<int> &alignedCountS);

  void InsertVertex( HSentenceVertices & corners, int x, int y );

  void InsertPhraseVertices(HSentenceVertices & topLeft,
                            HSentenceVertices & topRight,
                            HSentenceVertices & bottomLeft,
                            HSentenceVertices & bottomRight,
                            int startF, int startE, int endF, int endE);

  REO_CLASS GetOrientHierModel(REO_MODEL_TYPE modelType,
                               int startF, int endF, int startE, int endE, int countF, int zeroF, int zeroE, int unit,
                               bool (*ge)(int, int), bool (*lt)(int, int),
                               const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const;

  bool SpanIsAligned(int index1, int index2, const boost::unordered_map< std::pair<int,int> , std::pair<int,int> > &minAndMaxAligned) const;

  bool IsAligned(int fi, int ei) const;

  static bool ge(int first, int second) {
    return first >= second;
  };
  static bool le(int first, int second) {
    return first <= second;
  };
  static bool lt(int first, int second) {
    return first < second;
  };

  const int m_countF;
  const int m_countE;

  std::vector<std::vector<int> > m_alignedToT;

  HSentenceVertices m_topLeft;
  HSentenceVertices m_topRight;
  HSentenceVertices m_bottomLeft;
  HSentenceVertices m_bottomRight;

  boost::unordered_map< std::pair<int,int> , std::pair<int,int> > m_minAndMaxAlignedToSourceSpan;
  boost::unordered_map< std::pair<int,int> , std::pair<int,int> > m_minAndMaxAlignedToTargetSpan;

  static std::vector<float> m_l2rOrientationPriorCounts;
  static std::vector<float> m_r2lOrientationPriorCounts;
};

}  // namespace GHKM
}  // namespace Moses