mira/Optimiser.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/
#ifndef _MIRA_OPTIMISER_H_
#define _MIRA_OPTIMISER_H_

#include <vector>

#include "ScoreComponentCollection.h"


namespace Mira {
  
  class Optimiser {
    public:
      Optimiser() {}

      virtual size_t updateWeightsHopeFear(
      		Moses::ScoreComponentCollection& currWeights,
      		Moses::ScoreComponentCollection& weightUpdate,
				  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
				  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
				  const std::vector<std::vector<float> >& bleuScoresHope,
				  const std::vector<std::vector<float> >& bleuScoresFear,
				  const std::vector<std::vector<float> >& modelScoresHope,
				  const std::vector<std::vector<float> >& modelScoresFear,
				  float learning_rate,
				  size_t rank,
				  size_t epoch,
				  int updatePosition = -1) = 0;
  };
 
  class Perceptron : public Optimiser {
    public:
			virtual size_t updateWeightsHopeFear(
					Moses::ScoreComponentCollection& currWeights,
					Moses::ScoreComponentCollection& weightUpdate,
					const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
					const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
					const std::vector<std::vector<float> >& bleuScoresHope,
					const std::vector<std::vector<float> >& bleuScoresFear,
					const std::vector<std::vector<float> >& modelScoresHope,
					const std::vector<std::vector<float> >& modelScoresFear,
					float learning_rate,
					size_t rank,
					size_t epoch,
					int updatePosition = -1);
  };

  class MiraOptimiser : public Optimiser {
   public:
	  MiraOptimiser() :
		  Optimiser() { }

	  MiraOptimiser(float slack, bool scale_margin, bool scale_margin_precision,
			bool scale_update, bool scale_update_precision, bool boost, bool normaliseMargin, float sigmoidParam) :
		  Optimiser(),
		  m_slack(slack),
		  m_scale_margin(scale_margin),
		  m_scale_margin_precision(scale_margin_precision),
		  m_scale_update(scale_update),
		  m_scale_update_precision(scale_update_precision),
		  m_precision(1),
		  m_boost(boost),
		  m_normaliseMargin(normaliseMargin),
		  m_sigmoidParam(sigmoidParam) { }
   
	  size_t updateWeights(Moses::ScoreComponentCollection& currWeights,
	  								Moses::ScoreComponentCollection& weightUpdate,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
      						  const std::vector<std::vector<float> >& losses,
      						  const std::vector<std::vector<float> >& bleuScores,
      						  const std::vector<std::vector<float> >& modelScores,
      						  const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
      						  const std::vector< float> oracleBleuScores,
      						  const std::vector< float> oracleModelScores,
      						  float learning_rate,
      						  size_t rank,
      						  size_t epoch);
     virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& currWeights,
    		 	 	 	 	 	 	Moses::ScoreComponentCollection& weightUpdate,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
      						  const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
      						  const std::vector<std::vector<float> >& bleuScoresHope,
      						  const std::vector<std::vector<float> >& bleuScoresFear,
      						  const std::vector<std::vector<float> >& modelScoresHope,
      						  const std::vector<std::vector<float> >& modelScoresFear,
      						  float learning_rate,
      						  size_t rank,
      						  size_t epoch,
      						  int updatePosition = -1);
     size_t updateWeightsAnalytically(Moses::ScoreComponentCollection& currWeights,
    		 Moses::ScoreComponentCollection& weightUpdate,
    		 Moses::ScoreComponentCollection& featureValuesHope,
    		 Moses::ScoreComponentCollection& featureValuesFear,
    		 float bleuScoreHope,
    		 float bleuScoreFear,
    		 float modelScoreHope,
    		 float modelScoreFear,
    		 float learning_rate,
    		 size_t rank,
    		 size_t epoch);
     size_t updateWeightsRankModel(Moses::ScoreComponentCollection& currWeights,
    		 Moses::ScoreComponentCollection& weightUpdate,
    		 const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
    		 const std::vector<std::vector<float> >& bleuScores,
    		 const std::vector<std::vector<float> >& modelScores,
    		 float learning_rate,
    		 size_t rank,
    		 size_t epoch);
   
     void setSlack(float slack) {
    	 m_slack = slack;
     }
     
     void setPrecision(float precision) {
    	 m_precision = precision;
     }

   private:

      // regularise Hildreth updates
      float m_slack;

      // scale margin with BLEU score or precision
      bool m_scale_margin, m_scale_margin_precision;

      // scale update with oracle BLEU score or precision
      bool m_scale_update, m_scale_update_precision;

      float m_precision;
      
      // boosting of updates on misranked candidates
      bool m_boost;

      // squash margin between 0 and 1 (or depending on m_sigmoidParam)
      bool m_normaliseMargin;
      
      // y=sigmoidParam is the axis that this sigmoid approaches
      float m_sigmoidParam ;
  };
}

#endif