Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mgizapp/src/mkcls/KategProblemWBC.cpp')
-rw-r--r--mgizapp/src/mkcls/KategProblemWBC.cpp289
1 files changed, 133 insertions, 156 deletions
diff --git a/mgizapp/src/mkcls/KategProblemWBC.cpp b/mgizapp/src/mkcls/KategProblemWBC.cpp
index 422b4a4..a3280d1 100644
--- a/mgizapp/src/mkcls/KategProblemWBC.cpp
+++ b/mgizapp/src/mkcls/KategProblemWBC.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -55,15 +55,15 @@ static int oneFreqCompareFallend(const void *p,const void *j)
}
-KategProblemWBC::KategProblemWBC(int n,int minw)
-: _n1(n,0),_n2(n,0),with_h_of_words(0),afterFilled(n,0),beforeFilled(n,0),filled(0),fixedWord(n,-1),absteigend(0),nWords(n),nTranspWords(0),
- mindestAnzahl(minw),after(n),before(n),minIndex(n,-1),maxIndex(n,-1)
-
+KategProblemWBC::KategProblemWBC(int n,int minw)
+ : _n1(n,0),_n2(n,0),with_h_of_words(0),afterFilled(n,0),beforeFilled(n,0),filled(0),fixedWord(n,-1),absteigend(0),nWords(n),nTranspWords(0),
+ mindestAnzahl(minw),after(n),before(n),minIndex(n,-1),maxIndex(n,-1)
+
{
}
KategProblemWBC::~KategProblemWBC()
-
+
{
massert( after.size()==nWords);
if( absteigend )
@@ -72,77 +72,73 @@ KategProblemWBC::~KategProblemWBC()
void KategProblemWBC::init(int specialFixedWord)
{
-
+
nTranspWords=0;
int i;
- for(i=0;i<_n1.size();i++)
- {
- if( (_n1[i]<mindestAnzahl && _n2[i]<mindestAnzahl && minIndex[i]<=1) ||i==specialFixedWord )
- {
-
- if(!( fixedWord[i]==1 || fixedWord[i]== -1))
- cerr << "mkcls:KategProblemWBC::init::ERROR: " << i << " " << fixedWord[i] << endl;
- fixedWord[i]=1;
- }
- else if(fixedWord[i]<0)
- nTranspWords++;
- }
+ for(i=0; i<_n1.size(); i++) {
+ if( (_n1[i]<mindestAnzahl && _n2[i]<mindestAnzahl && minIndex[i]<=1) ||i==specialFixedWord ) {
+
+ if(!( fixedWord[i]==1 || fixedWord[i]== -1))
+ cerr << "mkcls:KategProblemWBC::init::ERROR: " << i << " " << fixedWord[i] << endl;
+ fixedWord[i]=1;
+ } else if(fixedWord[i]<0)
+ nTranspWords++;
+ }
if( absteigend==0 )
absteigend= &(getSortedList(0));
-
-
-
-
-
+
+
+
+
+
if(verboseMode && nTranspWords!=_n1.size()-1 )
cout << "Es sind: " <<nTranspWords<<" transportierbar.\n";
}
void KategProblemWBC::set_h_of_words(double s)
-
+
{
with_h_of_words=1;
h_of_words = -s;
}
double KategProblemWBC::get_h_of_words()
-
+
{
if( with_h_of_words )
return -h_of_words;
- else
- {
- h_of_words=0;
- for(int i=0;i<nWords;i++)
- h_of_words+=0.5*(kat_h(_n2[i])+kat_h(_n1[i]));
- with_h_of_words=1;
- return -h_of_words;
- }
+ else {
+ h_of_words=0;
+ for(int i=0; i<nWords; i++)
+ h_of_words+=0.5*(kat_h(_n2[i])+kat_h(_n1[i]));
+ with_h_of_words=1;
+ return -h_of_words;
+ }
}
-void KategProblemWBC::setAfterWords(int w,int anzahl)
-
-{
+void KategProblemWBC::setAfterWords(int w,int anzahl)
+
+{
OneFreq o;
o.w=-1;
o.n=0;
- afterFilled[w]=0;
+ afterFilled[w]=0;
after[w].init(anzahl,o,1);
}
-void KategProblemWBC::setBeforeWords(int w,int anzahl)
-
-{
+void KategProblemWBC::setBeforeWords(int w,int anzahl)
+
+{
OneFreq o;
o.w=-1;
o.n=0;
beforeFilled[w]=0;
- before[w].init(anzahl,o,1);
+ before[w].init(anzahl,o,1);
}
-void KategProblemWBC::setFreq(int w1,int w2,FreqType anzahl)
-
+void KategProblemWBC::setFreq(int w1,int w2,FreqType anzahl)
+
{
OneFreq o;
o.n=anzahl;
@@ -155,134 +151,118 @@ void KategProblemWBC::setFreq(int w1,int w2,FreqType anzahl)
_n2[w2]+=anzahl;
}
-void KategProblemWBC::addFreq(int w1,int w2,FreqType anzahl)
-
+void KategProblemWBC::addFreq(int w1,int w2,FreqType anzahl)
+
{
OneFreq o;
o.n=anzahl;
int pos=-1,i;
- for(i=0;i<afterFilled[w1];i++)
+ for(i=0; i<afterFilled[w1]; i++)
if(after[w1][i].w==w2)
pos=i;
- if(pos==-1)
- {
- o.w=w2;
- after[w1][afterFilled[w1]++]=o;
- }
- else
- after[w1][pos].n+=anzahl;
+ if(pos==-1) {
+ o.w=w2;
+ after[w1][afterFilled[w1]++]=o;
+ } else
+ after[w1][pos].n+=anzahl;
_n1[w1]+=anzahl;
pos=-1;
- for(i=0;i<beforeFilled[w2];i++)
+ for(i=0; i<beforeFilled[w2]; i++)
if(before[w2][i].w==w1)
pos=i;
- if(pos==-1)
- {
- o.w=w1;
- before[w2][beforeFilled[w2]++]=o;
- }
- else
+ if(pos==-1) {
+ o.w=w1;
+ before[w2][beforeFilled[w2]++]=o;
+ } else
before[w2][pos].n+=anzahl;
_n2[w2]+=anzahl;
}
short KategProblemWBC::testFull(int doIt)
-
+
{
int enaNom=0;
int afterFilledSum=0,beforeFilledSum=0;
int ret=1,i;
- for(i=0;i<nWords;i++)
- {
- if( n1(i)==1 && n2(i)==1 )
- enaNom++;
- afterFilledSum+=afterFilled[i];
- beforeFilledSum+=beforeFilled[i];
- if(afterFilled[i]!=after[i].size())
- {
- ret=0;
- if( doIt )
- after[i].resize(afterFilled[i]);
- }
- if(beforeFilled[i]!=before[i].size())
- {
- ret=0;
- if( doIt )
- before[i].resize(beforeFilled[i]);
- }
-
+ for(i=0; i<nWords; i++) {
+ if( n1(i)==1 && n2(i)==1 )
+ enaNom++;
+ afterFilledSum+=afterFilled[i];
+ beforeFilledSum+=beforeFilled[i];
+ if(afterFilled[i]!=after[i].size()) {
+ ret=0;
+ if( doIt )
+ after[i].resize(afterFilled[i]);
}
- if( ret==0 && !doIt )
- {
- cerr << "Error: Unfilled word bigram statistics.\n";
- exit(1);
+ if(beforeFilled[i]!=before[i].size()) {
+ ret=0;
+ if( doIt )
+ before[i].resize(beforeFilled[i]);
}
- else
+
+ }
+ if( ret==0 && !doIt ) {
+ cerr << "Error: Unfilled word bigram statistics.\n";
+ exit(1);
+ } else
filled=1;
- if( verboseMode>1 )
- {
- cout << "MEAN(|L(w)|+|R(w)|)=" << (beforeFilledSum/(float)nWords)
- +(afterFilledSum/(float)nWords) << endl;
- cout << "Hapaslegomena: " << enaNom << endl;
- }
+ if( verboseMode>1 ) {
+ cout << "MEAN(|L(w)|+|R(w)|)=" << (beforeFilledSum/(float)nWords)
+ +(afterFilledSum/(float)nWords) << endl;
+ cout << "Hapaslegomena: " << enaNom << endl;
+ }
int symmetrisch=1;
- for(i=0;i<nWords;i++)
- {
- int j;
- massert(before[i].size()==beforeFilled[i]);
- massert( after[i].size()== afterFilled[i]);
- FreqType sum=0;
- for(j=0;j<after[i].size();j++)
- sum+=after[i][j].n;
- massert( sum==_n1[i] );
- sum=0;
- for(j=0;j<before[i].size();j++)
- sum+=before[i][j].n;
- massert(sum==_n2[i]);
- if(_n1[i]!=_n2[i])
- {
- symmetrisch=0;
- if( verboseMode>1 )
- cout << "Asymmetrie: " << i << " " << _n1[i] << " " << _n2[i] << endl;
- }
-
+ for(i=0; i<nWords; i++) {
+ int j;
+ massert(before[i].size()==beforeFilled[i]);
+ massert( after[i].size()== afterFilled[i]);
+ FreqType sum=0;
+ for(j=0; j<after[i].size(); j++)
+ sum+=after[i][j].n;
+ massert( sum==_n1[i] );
+ sum=0;
+ for(j=0; j<before[i].size(); j++)
+ sum+=before[i][j].n;
+ massert(sum==_n2[i]);
+ if(_n1[i]!=_n2[i]) {
+ symmetrisch=0;
+ if( verboseMode>1 )
+ cout << "Asymmetrie: " << i << " " << _n1[i] << " " << _n2[i] << endl;
}
+
+ }
if(verboseMode && symmetrisch==0)
- cout << "Warning: word bigram statistic is not symmetric "
- "(this is possibly an error)\n";
+ cout << "Warning: word bigram statistic is not symmetric "
+ "(this is possibly an error)\n";
return ret;
}
Array<Word> &KategProblemWBC::getSortedList(int steigend)
-
+
{
int siz=_n2.size(),i;
massert(filled);
Array<Word> &sortedList =*new Array<Word>(siz);
Array<OneFreq> list(siz);
int pos=0;
- for(i=0;i<siz;i++)
- {
- if( fixedWord[i]<0 )
- {
- list[pos].w=i;
- list[pos].n=_n1[i];
- pos++;
- }
+ for(i=0; i<siz; i++) {
+ if( fixedWord[i]<0 ) {
+ list[pos].w=i;
+ list[pos].n=_n1[i];
+ pos++;
}
+ }
int anzFree=pos;
- for(i=0;i<siz;i++)
- {
- if( fixedWord[i]>=0 )
- {
- list[pos].w=i;
- list[pos].n=_n1[i];
- pos++;
- }
+ for(i=0; i<siz; i++) {
+ if( fixedWord[i]>=0 ) {
+ list[pos].w=i;
+ list[pos].n=_n1[i];
+ pos++;
}
+ }
massert(pos==siz);
if(steigend )
qsort(list.getPointerToData(),anzFree,sizeof(OneFreq),oneFreqCompareSteigend);
@@ -290,32 +270,30 @@ Array<Word> &KategProblemWBC::getSortedList(int steigend)
qsort(list.getPointerToData(),anzFree,sizeof(OneFreq),oneFreqCompareFallend);
massert( anzFree<=list.size() );
- for(i=0;i<siz;i++)
- {
- sortedList[i]=list[i].w;
- massert(steigend || i==0 || i>=anzFree || list[i-1].n>=list[i].n );
- massert((!steigend) || i==0 || i>=anzFree || list[i-1].n<=list[i].n );
- }
+ for(i=0; i<siz; i++) {
+ sortedList[i]=list[i].w;
+ massert(steigend || i==0 || i>=anzFree || list[i-1].n>=list[i].n );
+ massert((!steigend) || i==0 || i>=anzFree || list[i-1].n<=list[i].n );
+ }
return sortedList;
}
FreqType KategProblemWBC::numberOfWords()
-
+
{
FreqType n1=0,n2=0;
- for(int i=0;i<_n1.size();i++)
- {
- n1+=_n1[i];
- n2+=_n2[i];
- }
- #ifndef FREQTYPE_DOUBLE
+ for(int i=0; i<_n1.size(); i++) {
+ n1+=_n1[i];
+ n2+=_n2[i];
+ }
+#ifndef FREQTYPE_DOUBLE
massert(n1==n2);
- #endif
+#endif
return n1;
}
void KategProblemWBC::setDollar(int n)
-
+
{
if( fixedWord[n]<0 )
nTranspWords--;
@@ -326,18 +304,17 @@ void KategProblemWBC::initializeIndex(const leda_array<string>&words,char firstC
{
int n=0;
int i;
- massert(-1<unten);massert(unten<oben);
+ massert(-1<unten);
+ massert(unten<oben);
if( verboseMode )
cout << "InitializeIndex: " << firstChar << " u:" << unten << " o:" << oben << " " << noHapas << endl;
- over_array(words,i)
- {
- if( words[i][0]==firstChar && (noHapas || ((short)(n1(i)+0.0001))>=mindestAnzahl || ((short)(n2(i)+0.0001))>=mindestAnzahl) )
- {
- minIndex[i]=unten;
- maxIndex[i]=oben;
- n++;
- }
+ over_array(words,i) {
+ if( words[i][0]==firstChar && (noHapas || ((short)(n1(i)+0.0001))>=mindestAnzahl || ((short)(n2(i)+0.0001))>=mindestAnzahl) ) {
+ minIndex[i]=unten;
+ maxIndex[i]=oben;
+ n++;
}
+ }
if( verboseMode )
cout << "InitializeIndex gefunden fuer " << n << " Woerter.\n";
}