Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/giza-pp.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mkcls-v2/KategProblem.h')
-rw-r--r--mkcls-v2/KategProblem.h439
1 files changed, 439 insertions, 0 deletions
diff --git a/mkcls-v2/KategProblem.h b/mkcls-v2/KategProblem.h
new file mode 100644
index 0000000..e5a5a46
--- /dev/null
+++ b/mkcls-v2/KategProblem.h
@@ -0,0 +1,439 @@
+/*
+
+Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
+
+mkcls - a program for making word classes .
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+USA.
+
+*/
+
+
+
+
+
+
+#ifndef KATEG_OPT_H
+#define KATEG_OPT_H
+#include <string>
+
+#include <stdlib.h>
+#include "Problem.h"
+
+extern double rhoLo;
+
+typedef int Kategory;
+typedef int Word;
+
+
+
+#ifdef FREQTYPE_DOUBLE
+typedef double FreqType;
+#else
+typedef int FreqType;
+#endif
+
+
+#include "KategProblemWBC.h"
+
+
+#include "KategProblemKBC.h"
+
+
+enum {
+ INIT_RAN=1,
+ INIT_AIO=2,
+ INIT_LWRW=3,
+ INIT_FREQ=4,
+ INIT_OTHER=5
+ };
+
+
+enum {
+ W_RAN=(8|16),
+ W_DET_DECR=(16),
+ W_DET_INCR =(32)
+};
+#define CHOOSE_WORD (8|16|32)
+
+
+enum {
+ K_DET=(64),
+ K_RAN=(128),
+ K_BEST=(64|128)
+};
+#define CHOOSE_KAT (64|128)
+
+
+enum {
+ CRITERION_ML=0,
+ CRITERION_LO=1,
+ CRITERION_MY=2
+};
+
+
+
+class NWG
+{
+ private:
+ Array<FreqType> freq;
+
+ Array<int> timeOfFreq;
+
+
+
+
+ int curTime;
+ public:
+ NWG(int n);
+ void init();
+
+ int anzNot0;
+
+
+ Array<int> not0;
+
+ int word;
+
+ inline void addFreq(int C,FreqType n);
+
+ void sort();
+
+ FreqType getFreq(int i)
+ {
+ if( timeOfFreq[i]==curTime )
+ return freq[i];
+ else
+ return 0;
+ };
+};
+
+inline void NWG::addFreq(int g,FreqType n)
+{
+ if(timeOfFreq[g]==curTime)
+ freq[g]+=n;
+ else
+ {
+ timeOfFreq[g]=curTime;
+ freq[g]=n;
+ not0[anzNot0++]=g;
+ }
+}
+
+
+
+struct KategProblemChange : public ProblemChange
+{
+ void *operator new(size_t size);
+ void operator delete(void *ptr,size_t size);
+
+ int word;
+ int toKat;
+ int fromKat;
+};
+
+class KategProblem : public Problem
+{
+ private:
+ double kat_h_full(int n);
+ double kat_h_full(double n);
+ double kat_h_part(int n);
+ double kat_h_part(double n);
+ double sigmaVerfaelschung;
+ short katWasEmpty;
+
+
+
+ int nwgWord;
+
+ NWG nwg;
+ NWG ngw;
+ FreqType nww;
+
+ int ursprung,ziel;
+
+ Array<int> _katOfWord;
+
+ int _maxComp,_maxCompVal;
+
+ double nmo_my(int i,int j);
+ double nmo(int i,int j);
+
+
+ double nmo_lo(int i,int j,int &e0,int &e1);
+
+
+ void putWord(int word,int to);
+
+
+ void fastPutWord(int word,int to);
+
+
+ void setKatOfWord(int w,int k)
+{
+ if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) )
+ {
+ cout << "mkcls::setKatOfWord::ERROR: " << w << " " << k << " " << wordFreq.fixedWord[w] << " " << (*words)[w] << endl;
+ }
+ _katOfWord[w]=k;
+ nwgWord=-1;
+};
+
+
+ void fillNWG(int w);
+
+
+ inline FreqType nstrich(int i,int j);
+
+
+ void vnstrich(int i,int j);
+
+
+
+ protected:
+ virtual int _change(ProblemChange **p);
+
+
+ virtual void _doChange(ProblemChange &c);
+
+
+ virtual void _undoChange(ProblemChange &c);
+
+
+ virtual double _value();
+
+
+ double _valueChange(KategProblemChange &k);
+
+
+ virtual void incrementDirection();
+
+
+ virtual int maxDimensionVal(void) ;
+
+
+ virtual int maxDimension(void) ;
+
+
+public:
+ leda_array<string> *words;
+typedef leda_set<int> intSet;
+
+leda_array<intSet> *kats;
+
+ KategProblemWBC wordFreq;
+ KategProblemKBC katFreq;
+
+ Array<int> initLike;
+
+ KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
+ int _nachbarschaft,int minw=0);
+
+
+ virtual ~KategProblem();
+
+
+ virtual void _initialize(int initTyp);
+ virtual void _initialize(int initTyp,int specialFixedWord);
+
+
+ virtual double valueChange(ProblemChange&c);
+
+
+ virtual Problem *makeEqualProblem();
+
+
+ virtual double nicevalue(double value=1e100);
+
+
+ void makeKats();
+
+
+ virtual void dumpOn(ostream &strm);
+
+
+ virtual void dumpInfos(ostream &strm);
+
+
+
+
+
+ inline void katwahl(int k);
+
+
+ inline void wortwahl(int w);
+
+
+
+
+
+ inline int katOfWord(int w);
+
+
+ inline short wortwahl();
+
+
+ inline short katwahl() ;
+
+
+ virtual int maxNonBetterIterations();
+
+
+ virtual int expectedNumberOfIterations();
+
+
+ const char *getString(int i);
+ string getTheString(int i);
+
+
+ void makeTitle(char x[512]);
+
+
+ void fixInitLike();
+
+};
+
+inline int KategProblem::katOfWord(int w){return _katOfWord[w];};
+inline short KategProblem::wortwahl(){return nachbarschaft&CHOOSE_WORD;};
+inline short KategProblem::katwahl() {return nachbarschaft&CHOOSE_KAT;};
+
+inline void KategProblem::katwahl(int k)
+ {
+ nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k;
+ if(k==K_BEST)
+ _maxCompVal=1;
+ else
+ _maxCompVal=katFreq.nKats-2;
+ };
+
+inline void KategProblem::wortwahl(int w)
+ {
+ nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w;
+ };
+
+
+
+inline FreqType KategProblem::nstrich(int i,int j)
+{
+ FreqType n=0;
+
+ if( i==ursprung )
+ n-=nwg.getFreq(j);
+ if( i==ziel )
+ n+=nwg.getFreq(j);
+
+ if( j==ursprung )
+ n-=ngw.getFreq(i);
+ if( j==ziel )
+ n+=ngw.getFreq(i);
+
+ if( i==ursprung && j==ursprung )
+ n+=nww;
+ if( i==ziel && j==ziel )
+ n+=nww;
+
+ if( i==ursprung && j==ziel )
+ n-=nww;
+ if( i==ziel && j==ursprung )
+ n-=nww;
+
+ return n;
+}
+
+
+
+
+
+#define MAX_H_TABLE 4000
+extern double h_table[],l_table[],hmy_table[],hmy_sigma;
+
+
+inline double kat_mlog(double x)
+{
+ if(x<=1e-9)
+ return 0;
+ else
+ return log(x);
+}
+
+
+inline double kat_mlog(int s)
+{
+ if(s<=0)
+ return 0;
+ else if( s<MAX_H_TABLE )
+ {
+ massert( s==0 || l_table[s]==log(s) );
+ return l_table[s];
+ }
+ else
+ return log((double)(s));
+}
+
+
+
+inline double kat_hlo(int n)
+{
+ return n*kat_mlog(n-1);
+}
+
+inline double kat_hlo(double n)
+{
+ return n*kat_mlog(n-1);
+}
+
+
+inline double kat_h(int n)
+{
+ massert(n>=-1);
+ if(n<=0)
+ return 0;
+ else
+ if(n<MAX_H_TABLE)
+ {
+ massert(n==0||fabs(h_table[n]-n*log((double)n))<1e-8);
+ return h_table[n];
+ }
+ else
+ return n*log((double)(n));
+}
+inline double kat_h(double n)
+{
+ if(n<=1e-9)
+ return 0;
+ else
+ return n*log(n);
+}
+
+
+inline double kat_etaFkt(int _e0,int e1,int immer0,int cats)
+{
+ int e0 = _e0 - immer0;
+ int ePlus = cats*cats - _e0;
+ if(cats*cats-e0>1)
+ return e1*log( (ePlus-1.0)/(e0+1.0)*rhoLo );
+ else
+ return 0;
+}
+
+double mkat_h_full(int n,double tf);
+double mkat_h_part(int n,double cf);
+
+int Hash(const string& s);
+
+
+#endif
+