diff options
Diffstat (limited to 'mgizapp/src/mkcls/KategProblemKBC.cpp')
-rw-r--r-- | mgizapp/src/mkcls/KategProblemKBC.cpp | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/mgizapp/src/mkcls/KategProblemKBC.cpp b/mgizapp/src/mkcls/KategProblemKBC.cpp new file mode 100644 index 0000000..97c40fc --- /dev/null +++ b/mgizapp/src/mkcls/KategProblemKBC.cpp @@ -0,0 +1,243 @@ +/* + +Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och + +mkcls - a program for making word classes . + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +USA. + +*/ + + + + +#include <stdlib.h> +#include "KategProblem.h" + +double rhoLo=0.75; +#define MAX_VERFAELSCHUNG 5000 +double verfTab[MAX_VERFAELSCHUNG],verfTabSigma=-1.0; +double verfaelsche(int a,double b) +{ + + if( a>=0&&verfTabSigma==b&&a<MAX_VERFAELSCHUNG ) + { + massert(verfTab[a]== b*(erf(10000.0) - erf(a/b))/2+a); + return verfTab[a]; + } + else + { + double x = b*(erf(10000.0) - erf(a/b))/2+a; + return x; + } +} +double verfaelsche(double,double b) +{ + abort(); + return b; +} + +KategProblemKBC::KategProblemKBC(int s,double sv) : + _n(s),_n1(s,0),_n2(s,0),sigmaVerfaelschung(sv),withVerfaelschung(sv!=0.0), + _nverf(s),_n1verf(s,0.0),_n2verf(s,0.0),_nWords(0), + eta0(s*s),eta1(0),c1_0(s),c2_0(s), + _bigramVerfSum(0.0),_unigramVerfSum1(0.0),_unigramVerfSum2(0.0),nKats(s) + +{ + verfInit0=0.0; + int i; + if( withVerfaelschung ) + { + verfInit0=verfaelsche(0,sv); + cout << "VERFAELSCHUNG wird mitgefuehrt => LANGSAMER!!!\n"; + } + for(i=0;i<s;i++) + { + _n[i].init(s,0); + _nverf[i].init(s,verfInit0); + _n1verf[i]=_n2verf[i]=verfInit0; + _bigramVerfSum+=verfInit0*s; + _unigramVerfSum1+=verfInit0; + _unigramVerfSum2+=verfInit0; + } + if( withVerfaelschung ) + { + cout << "VERFAELSCHUNG " << _bigramVerfSum << " " << _unigramVerfSum1 << " " << _unigramVerfSum2 << endl; + } + verfTabSigma=sigmaVerfaelschung; + + + +} + +void KategProblemKBC::setN(int w1,int w2, FreqType n) + +{ + addN(w1,w2,-_n[w1][w2]); + addN(w1,w2,n); +} + + +double KategProblemKBC::fullBewertung(int auswertung) +{ + + double bewertung=0; + int c1,c2; + + + switch( auswertung ) + { + case CRITERION_ML: + for(c1=0;c1<nKats;c1++) + { + for(c2=0;c2<nKats;c2++) + bewertung-=kat_h(_n[c1][c2]); + bewertung+=kat_h(_n1[c1])+kat_h(_n2[c1]); + } + break; + case CRITERION_MY: + { + for(c1=0;c1<nKats;c1++) + { + for(c2=0;c2<nKats;c2++) + bewertung-=mkat_h_full((int)n(c1,c2),nverf(c1,c2)); + bewertung+=mkat_h_part((int)(n1(c1)),n1verf(c1))+mkat_h_part((int)(n2(c1)),n2verf(c1)); + } + double u1=_unigramVerfSum1-verfInit0*c1_0; + double u2=_unigramVerfSum2-verfInit0*c2_0; + double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0); + if( verboseMode>1 ) + { + cout << "CRITERION_MY: " << bewertung << endl; + cout << "U1:"<<_unigramVerfSum1 << " n:"<<u1<< " " + << "U2:"<<_unigramVerfSum2 << " n:"<<u2<< " " + << "U3:"<<_bigramVerfSum << " n:"<<b<< endl; + } + if(b>0.000001) + { + + + if(verboseMode>1 ) + cout << " NEU: " <<_nWords*log( u1 * u2 / b ) << endl; + bewertung -= _nWords*log( u1 * u2 / b ); + if(verboseMode>1) + cout << "SCHLUSSBEWERTUNG: " << bewertung << endl; + } + else + cout << "B zu klein " << b << endl; + } + break; + case CRITERION_LO: + for(c1=0;c1<nKats;c1++) + { + for(c2=0;c2<nKats;c2++) + bewertung-=_n[c1][c2]*kat_mlog(_n[c1][c2]-1-rhoLo); + bewertung+=_n1[c1]*kat_mlog(_n1[c1]-1)+_n2[c1]*kat_mlog(_n2[c1]-1); + } + bewertung-=kat_etaFkt(eta0,eta1,(c1_0*nKats+c2_0*nKats-c1_0*c2_0),nKats); + break; + default: + cerr << "Error: wrong criterion " << auswertung << endl; + exit(1); + } + return bewertung; +} + +double KategProblemKBC::myCriterionTerm() +{ + iassert( withVerfaelschung ); + double r; + double u1=_unigramVerfSum1-verfInit0*c1_0; + double u2=_unigramVerfSum2-verfInit0*c2_0; + double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0); + + + if( verboseMode>1 ) + { + cout << "nwords divisor:"<<_nWords << " " << u1 * u2 / b << endl; + cout << "ergebnis: "<<_nWords*log( u1 * u2 / b ) << endl; + cout << "0: "<<c1_0 << endl; + } + r = _nWords*log( u1 * u2 / b ); + + return -r; +} + + + + +double KategProblemKBC::bigramVerfSum() +{ + double sum=0; + for(int c1=0;c1<nKats;c1++) + for(int c2=0;c2<nKats;c2++) + sum+=nverf(c1,c2); + cout << "BIGRAMVERFSUM: " << sum << endl; + return sum; +} + +double KategProblemKBC::unigramVerfSum1() +{ + double sum=0; + for(int c1=0;c1<nKats;c1++) + sum+=n1verf(c1); + cout << "UNIGRAMVERFSUM1: " << sum << endl; + return sum; +} + +double KategProblemKBC::unigramVerfSum2() +{ + double sum=0; + for(int c1=0;c1<nKats;c1++) + sum+=n2verf(c1); + cout << "UNIGRAMVERFSUM2: " << sum << endl; + return sum; +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + |