/* EGYPT Toolkit for Statistical Machine Translation Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* Perplexity.h * ============ * Mike Jahr, 7/15/99 * Machine Translation group, WS99 * Center for Language and Speech Processing * * Last Modified by: Yaser Al-Onaizan, August 17, 1999 * * Simple class used to calculate cross entropy and perplexity * of models. */ #ifndef _PERPLEXITY_H #define _PERPLEXITY_H #include #include #include "Vector.h" #include "defs.h" #include "Array2.h" #include "Globals.h" #include "syncObj.h" #define CROSS_ENTROPY_BASE 2 class Perplexity { private: double sum; double wc; Array2 > *E_M_L; Vector modelid; Vector perp; Vector ce; Vector name ; Mutex mutex; public: ~Perplexity() { delete E_M_L;} Perplexity() { E_M_L = new Array2 >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH); unsigned int l, m ; Vector fact(MAX_SENTENCE_LENGTH, 1.0); for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++) fact[m] = fact[m-1] * m ; for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++) for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) { (*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) / (fact[m])) ; } sum = 0 ; wc = 0; perp.clear(); ce.clear(); name.clear(); } inline void clear() { mutex.lock(); sum = 0 ; wc = 0 ; mutex.unlock(); } size_t size() const {return(min(perp.size(), ce.size()));} inline void addFactor(const double p, const double count, const int l, const int m,bool withPoisson) { mutex.lock(); wc += count * m ; // number of french words sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ; mutex.unlock(); } inline double perplexity() const { return exp( -1*sum / wc); } inline double cross_entropy() const { return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc)); } inline double word_count() const { return wc; } inline double getSum() const { return sum ; } void record(string model); friend void generatePerplexityReport(const Perplexity&, const Perplexity&, const Perplexity&, const Perplexity&, ostream&, int, int, bool); }; #endif