Welcome to mirror list, hosted at ThFree Co, Russian Federation.

Perplexity.h « src « mgizapp - github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 6e24cf010d10f09a4aa122e63026433223d2287f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*

EGYPT Toolkit for Statistical Machine Translation
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, 
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
USA.

*/
/* Perplexity.h
 * ============
 * Mike Jahr, 7/15/99
 * Machine Translation group, WS99
 * Center for Language and Speech Processing
 * 
 * Last Modified by: Yaser Al-Onaizan, August 17, 1999
 *
 * Simple class used to calculate cross entropy and perplexity
 * of models.
 */

#ifndef _PERPLEXITY_H
#define _PERPLEXITY_H

#include <cmath>
#include <fstream>
#include "Vector.h"
#include "defs.h"
#include "Array2.h"
#include "Globals.h"
#include "syncObj.h"

#define CROSS_ENTROPY_BASE 2

class Perplexity {
 private:
    double sum;
    double wc;
    Array2<double, Vector<double> > *E_M_L;
    Vector<string> modelid;
    Vector<double > perp;
    Vector<double > ce;
    Vector<string> name ;
	Mutex mutex;
 public:
	~Perplexity() { delete E_M_L;}
	Perplexity() {
		E_M_L  = new Array2<double, Vector<double> >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH);
		unsigned int l, m ;
		Vector<double> fact(MAX_SENTENCE_LENGTH, 1.0);
		for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++)
			fact[m] = fact[m-1] * m ;
		for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++)
			for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) {
				(*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) / 
									  (fact[m])) ;
			}
		sum = 0 ;
		wc = 0;
		perp.clear();
		ce.clear();
		name.clear();
	}
    inline void clear() {
	  mutex.lock();
      sum = 0 ;
      wc = 0 ;
	  mutex.unlock();
    }
    size_t size() const {return(min(perp.size(), ce.size()));}
	inline void addFactor(const double p, const double count, const int l, 
						  const int m,bool withPoisson) {
		mutex.lock();
		wc += count * m ; // number of french words 
		sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ;
		mutex.unlock();
	}
    
	inline double perplexity() const {
		return exp( -1*sum / wc);
    }

    inline double cross_entropy() const {
		return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc)); 
    }

    inline double word_count() const {
		return wc;
    }
    
    inline double getSum() const {
		return sum ;
    }

    void record(string model);
    
    friend void generatePerplexityReport(const Perplexity&, const Perplexity&, 
					 const Perplexity&, const Perplexity&, 
					 ostream&, int, int, bool); 
};


#endif