diff options
Diffstat (limited to 'mgizapp/src/Perplexity.h')
-rw-r--r-- | mgizapp/src/Perplexity.h | 115 |
1 files changed, 0 insertions, 115 deletions
diff --git a/mgizapp/src/Perplexity.h b/mgizapp/src/Perplexity.h deleted file mode 100644 index 45cc84f..0000000 --- a/mgizapp/src/Perplexity.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - -EGYPT Toolkit for Statistical Machine Translation -Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky. - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, -USA. - -*/ -/* Perplexity.h - * ============ - * Mike Jahr, 7/15/99 - * Machine Translation group, WS99 - * Center for Language and Speech Processing - * - * Last Modified by: Yaser Al-Onaizan, August 17, 1999 - * - * Simple class used to calculate cross entropy and perplexity - * of models. - */ - -#ifndef _PERPLEXITY_H -#define _PERPLEXITY_H - -#include <math.h> -#include <fstream> -#include "Vector.h" -#include "defs.h" -#include "Array2.h" -#include "Globals.h" -#include "syncObj.h" - -#define CROSS_ENTROPY_BASE 2 - -class Perplexity { - private: - double sum; - double wc; - Array2<double, Vector<double> > *E_M_L; - Vector<string> modelid; - Vector<double > perp; - Vector<double > ce; - Vector<string> name ; - Mutex mutex; - public: - ~Perplexity() { delete E_M_L;} - Perplexity() { - E_M_L = new Array2<double, Vector<double> >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH); - unsigned int l, m ; - Vector<double> fact(MAX_SENTENCE_LENGTH, 1.0); - for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++) - fact[m] = fact[m-1] * m ; - for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++) - for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) { - (*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) / - (fact[m])) ; - } - sum = 0 ; - wc = 0; - perp.clear(); - ce.clear(); - name.clear(); - } - inline void clear() { - mutex.lock(); - sum = 0 ; - wc = 0 ; - mutex.unlock(); - } - size_t size() const {return(min(perp.size(), ce.size()));} - inline void addFactor(const double p, const double count, const int l, - const int m,bool withPoisson) { - mutex.lock(); - wc += count * m ; // number of french words - sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ; - mutex.unlock(); - } - - inline double perplexity() const { - return exp( -1*sum / wc); - } - - inline double cross_entropy() const { - return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc)); - } - - inline double word_count() const { - return wc; - } - - inline double getSum() const { - return sum ; - } - - void record(string model); - - friend void generatePerplexityReport(const Perplexity&, const Perplexity&, - const Perplexity&, const Perplexity&, - ostream&, int, int, bool); -}; - - -#endif |