Welcome to mirror list, hosted at ThFree Co, Russian Federation.

ExtractionPhrasePair.h « phrase-extract - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: e0f5dc5fbedc38f78a5d5bba65e685548d89b596 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/***********************************************************************
  Moses - factored phrase-based language decoder
  Copyright (C) 2009 University of Edinburgh

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/

#pragma once
#include "tables-core.h"

#include <vector>
#include <set>
#include <map>
#include <boost/unordered_map.hpp>

namespace MosesTraining {


typedef std::vector< std::set<size_t> > ALIGNMENT;


class ExtractionPhrasePair {

protected:

  typedef std::map<std::string,float> PROPERTY_VALUES;
  typedef std::map<std::string,float>::iterator LAST_PROPERTY_VALUE;

  
  bool m_isValid;

  const PHRASE *m_phraseSource;
  const PHRASE *m_phraseTarget;

  float m_count;
  float m_pcfgSum;

  std::map<ALIGNMENT*,float> m_targetToSourceAlignments;
  std::map<std::string, 
           std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties;

  float m_lastCount;
  float m_lastPcfgSum;
  std::map<ALIGNMENT*,float>::iterator m_lastTargetToSourceAlignment;

public:

  ExtractionPhrasePair( const PHRASE *phraseSource, 
                        const PHRASE *phraseTarget, 
                        ALIGNMENT *targetToSourceAlignment, 
                        float count, float pcfgSum );

  ~ExtractionPhrasePair();

  bool Add( ALIGNMENT *targetToSourceAlignment, 
            float count, float pcfgSum );

  void IncrementPrevious( float count, float pcfgSum );

  bool Matches( const PHRASE *otherPhraseSource,
                const PHRASE *otherPhraseTarget,
                ALIGNMENT *otherTargetToSourceAlignment ) const;

  bool Matches( const PHRASE *otherPhraseSource,
                const PHRASE *otherPhraseTarget,
                ALIGNMENT *otherTargetToSourceAlignment,
                bool &sourceMatch,
                bool &targetMatch,
                bool &alignmentMatch ) const;

  bool MatchesAlignment( ALIGNMENT *otherTargetToSourceAlignment ) const;

  void Clear();

  bool IsValid() const {
    return m_isValid;
  }


  const PHRASE *GetSource() const {
    return m_phraseSource;
  }
  
  const PHRASE *GetTarget() const {
    return m_phraseTarget;
  }

  float GetCount() const {
    return m_count;
  }

  float GetPcfgScore() const {
    return m_pcfgSum;
  }

  const size_t GetNumberOfProperties() const {
    return m_properties.size();
  }

  const std::map<std::string,float> *GetProperty( const std::string &key ) const {
    std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::const_iterator iter;
    iter = m_properties.find(key);
    if (iter == m_properties.end()) {
      return NULL;
    } else {
      return iter->second.first;
    }
  }

  const ALIGNMENT *FindBestAlignmentTargetToSource() const;

  const std::string *FindBestPropertyValue(const std::string &key) const;

  std::string CollectAllPropertyValues(const std::string &key) const;

  std::string CollectAllLabelsSeparateLHSAndRHS(const std::string& propertyKey,
                                                std::set<std::string>& sourceLabelSet,
                                                boost::unordered_map<std::string,float>& sourceLHSCounts,
                                                boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >& sourceRHSAndLHSJointCounts, 
                                                Vocabulary &vcbT) const;

  void CollectAllPhraseOrientations(const std::string &key, 
                                    const std::vector<float> &orientationClassPriorsL2R, 
                                    const std::vector<float> &orientationClassPriorsR2L, 
                                    double smoothingFactor, 
                                    std::ostream &out) const;

  void AddProperties( const std::string &str, float count );

  void AddProperty( const std::string &key, const std::string &value, float count ) 
  {
    std::map<std::string,
             std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
    if ( iter == m_properties.end() ) {
      // key not found: insert property key and value
      PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES();
      std::pair<LAST_PROPERTY_VALUE,bool> insertedProperty = propertyValues->insert( std::pair<std::string,float>(value,count) );
      LAST_PROPERTY_VALUE *lastPropertyValue = new LAST_PROPERTY_VALUE(insertedProperty.first);
      m_properties[key] = std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* >(propertyValues, lastPropertyValue);
    } else {
      LAST_PROPERTY_VALUE *lastPropertyValue = (iter->second).second;
      if ( (*lastPropertyValue)->first == value ) { // same property key-value pair has been seen right before
        // property key-value pair exists already: add count
        (*lastPropertyValue)->second += count;
      } else { // need to check whether the property key-value pair has appeared before (insert if not)
        // property key exists, but not in combination with this value:
        // add new value with count
        PROPERTY_VALUES *propertyValues = (iter->second).first;
        std::pair<LAST_PROPERTY_VALUE,bool> insertedProperty = propertyValues->insert( std::pair<std::string,float>(value,count) );
        if ( !insertedProperty.second ) { // property value for this key appeared before: add count
          insertedProperty.first->second += count;
        }
        LAST_PROPERTY_VALUE *lastPropertyValue = new LAST_PROPERTY_VALUE(insertedProperty.first);
        delete (iter->second).second;
        (iter->second).second = lastPropertyValue;
      }
    }
  }

};

}