1 files changed, 395 insertions, 437 deletions
diff --git a/mgizapp/src/mkcls/KategProblemTest.cpp b/mgizapp/src/mkcls/KategProblemTest.cpp
index 3084a0b..ed78e6f 100644
--- a/mgizapp/src/mkcls/KategProblemTest.cpp
+++ b/mgizapp/src/mkcls/KategProblemTest.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
 
-This program is distributed in the hope that it will be useful, 
+This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
 USA.
 
 */
@@ -53,26 +53,25 @@ char *strdup(char *a)
 
 
 void writeClasses(Array<Kategory> &katOfWord,KategProblem &problem,ostream &to)
-{	    
-  for(int i=0;i<katOfWord.size();i++)
-    {
-      if( strcmp(problem.getString(i),"$") )
-	if( strcmp(problem.getString(i),"mkcls-mapped-dollar-symbol-$")==0 )
-	  to << "$" << "\t" << katOfWord[i] << endl;
-	else
-	  to << problem.getString(i) << "\t" << katOfWord[i] << endl;
-    }
+{
+  for(int i=0; i<katOfWord.size(); i++) {
+    if( strcmp(problem.getString(i),"$") )
+      if( strcmp(problem.getString(i),"mkcls-mapped-dollar-symbol-$")==0 )
+        to << "$" << "\t" << katOfWord[i] << endl;
+      else
+        to << problem.getString(i) << "\t" << katOfWord[i] << endl;
+  }
 }
 
 
 void mysplit(const string &s,string &s1,string &s2)
 {
   unsigned int i=0;
-  for(;i<s.length();i++)if( s[i]==' ' || s[i]=='\t' || s[i]==' ')break;
+  for(; i<s.length(); i++)if( s[i]==' ' || s[i]=='\t' || s[i]==' ')break;
   s1=s.substr(0,i);
-  for(;i<s.length();i++)if( !(s[i]==' ' || s[i]=='\t' || s[i]==' ') )break;  
+  for(; i<s.length(); i++)if( !(s[i]==' ' || s[i]=='\t' || s[i]==' ') )break;
   s2=s.substr(i,s.length()-i);
-  
+
   iassert(s1.size());
   iassert(s2.size());
 }
@@ -84,164 +83,163 @@ int fromCatFile(KategProblem *p,const char *fname,bool verb)
   leda_h_array<string,int> translation(-1);
   int maxCat=2;
   ifstream in(fname);
-  if(!in)
-    {
-      cerr << "Error: File '" << fname << "' cannot be opened.\n";
-      exit(1);
-    }
-  for(int i=0;i<p->wordFreq.nWords;i++)
+  if(!in) {
+    cerr << "Error: File '" << fname << "' cannot be opened.\n";
+    exit(1);
+  }
+  for(int i=0; i<p->wordFreq.nWords; i++)
     (p->initLike)[i]= -1;
-  
-  
+
+
   translation["1"]=1;
   translation["0"]=0;
 
-  
+
   string s;
-  while( getline(in,s) ) 
-    {
-      string str,categ;
-      mysplit(s,str,categ);
-      int i=p->words->binary_locate(str);
-      if(i>=0 && (*(p->words))[i]==str )
-	{
-	  
-	  if( translation[categ]==-1 )
-	    translation[categ]=maxCat++;
-	  int cat=translation[categ];
-	  if( (p->initLike)[i]!= -1 )
-	    cerr << "Warning: Word '" << ((*(p->words))[i])<< "' is already in a category.\n";
-	  (p->initLike)[i]=cat;
-	}
-      else
-	cerr << "Warning: Word '" << str << "' " << i << " is not in training corpus.\n";
-    }
-  
+  while( getline(in,s) ) {
+    string str,categ;
+    mysplit(s,str,categ);
+    int i=p->words->binary_locate(str);
+    if(i>=0 && (*(p->words))[i]==str ) {
+
+      if( translation[categ]==-1 )
+        translation[categ]=maxCat++;
+      int cat=translation[categ];
+      if( (p->initLike)[i]!= -1 )
+        cerr << "Warning: Word '" << ((*(p->words))[i])<< "' is already in a category.\n";
+      (p->initLike)[i]=cat;
+    } else
+      cerr << "Warning: Word '" << str << "' " << i << " is not in training corpus.\n";
+  }
+
   if( verboseMode )
-    cout << "We have " << maxCat << " read non-empty categories" 
-      " (with words from the corpus).\n";
-  
-  if(maxCat>p->katFreq.nKats)
-    {
-      cerr << "Error: Not enough categories reserved (only " 
-	   << p->katFreq.nKats << ", but i need " << maxCat << ").\n";
-      exit(1);
-    }
-  
-  
+    cout << "We have " << maxCat << " read non-empty categories"
+         " (with words from the corpus).\n";
+
+  if(maxCat>p->katFreq.nKats) {
+    cerr << "Error: Not enough categories reserved (only "
+         << p->katFreq.nKats << ", but i need " << maxCat << ").\n";
+    exit(1);
+  }
+
+
   int i=p->words->binary_locate("$");
   if( i>=0 &&  (*(p->words))[i]=="$" )
     (p->initLike)[i]=0;
-  else
-    if( verboseMode )
-      cerr << "Warning: No '$' in vocabulary!\n";
-  
-  
+  else if( verboseMode )
+    cerr << "Warning: No '$' in vocabulary!\n";
+
+
   int errors=0;
-  for(i=0;i<p->wordFreq.nWords;i++)
-    if((p->initLike)[i]== -1 )
-      {
-	if( verb ) cerr << "Error: I don't know the category of word " << i 
-	     << " (" << (*(p->words))[i] << ") " << ".\n";
-	errors=1;
-      }
+  for(i=0; i<p->wordFreq.nWords; i++)
+    if((p->initLike)[i]== -1 ) {
+      if( verb ) cerr << "Error: I don't know the category of word " << i
+                        << " (" << (*(p->words))[i] << ") " << ".\n";
+      errors=1;
+    }
   return errors;
 }
 
 
 
 KategProblem *makeKategProblem(const leda_h_array<PSS,FreqType>&cTbl,const leda_set<string>&setVokabular, int maxClass,int initialisierung,
-			 int auswertung,int nachbarschaft,int minWordFrequency)
+                               int auswertung,int nachbarschaft,int minWordFrequency)
 {
-  
+
   int nwrd=0;
   leda_array<string>&sVok = *new leda_array<string>(setVokabular.size());
   string s;
   unsigned int ctr=0;
-  forall_set(leda_set<string>,s,setVokabular)
-    {
-      if( verboseMode>2 )
-	cout << "mkcls:Wort " << ctr << " " << s << endl;
-      sVok[ctr++]=s;
-    }
-  for(unsigned int z=0;z<ctr-1;z++)
+  forall_set(leda_set<string>,s,setVokabular) {
+    if( verboseMode>2 )
+      cout << "mkcls:Wort " << ctr << " " << s << endl;
+    sVok[ctr++]=s;
+  }
+  for(unsigned int z=0; z<ctr-1; z++)
     iassert( sVok[z]<sVok[z+1] );
   sVok.sort();
 
   if( verboseMode>2 )
     cout << "*****Vocabulary: " << sVok;
-  
+
   unsigned int vokSize=sVok.size();
-  massert(vokSize==ctr); massert(vokSize==setVokabular.size());
-  if(verboseMode)
-    {cout << "Size of vocabulary: " << vokSize << "\n";cout.flush();}
-  
+  massert(vokSize==ctr);
+  massert(vokSize==setVokabular.size());
+  if(verboseMode) {
+    cout << "Size of vocabulary: " << vokSize << "\n";
+    cout.flush();
+  }
+
   KategProblem *k = new KategProblem(vokSize,maxClass,initialisierung,
-				     auswertung,nachbarschaft,minWordFrequency);
+                                     auswertung,nachbarschaft,minWordFrequency);
   KategProblemWBC &w=k->wordFreq;
   k->words=&sVok;
-  
+
   Array<int> after(vokSize,0);
   Array<int> before(vokSize,0);
-  
-  
+
+
   nwrd=0;
   {
     PSS s;
-    forall_defined_h2(PSS,FreqType,s,cTbl)
-      {
-	const string&ss1=s.first;
-	const string&ss2=s.second;
-	if( ss2.length()&&(ss1!="$" || ss2!="$") )
-	  {
-	    int i1=sVok.binary_search(ss1);
-	    int i2=sVok.binary_search(ss2);
-	    iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
-	    after[i1]++;
-	    before[i2]++;
-	  }
-	if( verboseMode&&((nwrd++)%10000==0) ) 
-	  {cout<<"Statistiken-1 " << nwrd<< ".      \r";cout.flush();}
+    forall_defined_h2(PSS,FreqType,s,cTbl) {
+      const string&ss1=s.first;
+      const string&ss2=s.second;
+      if( ss2.length()&&(ss1!="$" || ss2!="$") ) {
+        int i1=sVok.binary_search(ss1);
+        int i2=sVok.binary_search(ss2);
+        iassert( sVok[i1] == ss1 );
+        iassert( sVok[i2] == ss2 );
+        after[i1]++;
+        before[i2]++;
+      }
+      if( verboseMode&&((nwrd++)%10000==0) ) {
+        cout<<"Statistiken-1 " << nwrd<< ".      \r";
+        cout.flush();
       }
-  }
-  
-  for(unsigned int i=0;i<vokSize;i++)
-    {
-      w.setAfterWords(i,after[i]);
-      w.setBeforeWords(i,before[i]);
     }
-  
-  
+  }
+
+  for(unsigned int i=0; i<vokSize; i++) {
+    w.setAfterWords(i,after[i]);
+    w.setBeforeWords(i,before[i]);
+  }
+
+
   {
     nwrd=0;
     PSS s;
-    forall_defined_h2(PSS,FreqType,s,cTbl)
-      {
-	const string&ss1=s.first;
-	const string&ss2=s.second;
-	FreqType p=cTbl[s];
-	if( ss2.length()&&(ss1!="$" || ss2!="$") )
-	  {
-	    int i1=sVok.binary_search(ss1);
-	    int i2=sVok.binary_search(ss2);
-	    iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
-	    w.setFreq(i1,i2,p);
-	    if( verboseMode>2 )
-	      cout << "BIGRAMM-HAEUF: " << ss1 << ":" << i1 << "   " 
-		   << ss2 << ":" << i2 << "   " << p << endl;
-	  }
- 	if( verboseMode&&((nwrd++)%10000==0) ) 
-	  {cout<<"Statistiken-2 " <<nwrd<< ".    \r";cout.flush();}
-     }
+    forall_defined_h2(PSS,FreqType,s,cTbl) {
+      const string&ss1=s.first;
+      const string&ss2=s.second;
+      FreqType p=cTbl[s];
+      if( ss2.length()&&(ss1!="$" || ss2!="$") ) {
+        int i1=sVok.binary_search(ss1);
+        int i2=sVok.binary_search(ss2);
+        iassert( sVok[i1] == ss1 );
+        iassert( sVok[i2] == ss2 );
+        w.setFreq(i1,i2,p);
+        if( verboseMode>2 )
+          cout << "BIGRAMM-HAEUF: " << ss1 << ":" << i1 << "   "
+               << ss2 << ":" << i2 << "   " << p << endl;
+      }
+      if( verboseMode&&((nwrd++)%10000==0) ) {
+        cout<<"Statistiken-2 " <<nwrd<< ".    \r";
+        cout.flush();
+      }
+    }
   }
-  
+
   w.testFull();
-  if(verboseMode){cout << "Datenintegritaet getestet.\n";cout.flush();}
+  if(verboseMode) {
+    cout << "Datenintegritaet getestet.\n";
+    cout.flush();
+  }
   return k;
 }
 
 KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
-			 int auswertung,int nachbarschaft,int minWordFrequency)
+                          int auswertung,int nachbarschaft,int minWordFrequency)
 {
   ifstream file(str);
   if(!file)return 0;
@@ -250,118 +248,110 @@ KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
   double c=0;
   if( verboseMode )cout << "NGRFILE: " << str << endl;
   string s1,s2;
-  while(file >> c >> s1 >> s2)
-    {
-      if( s1.length()==0||s2.length()==0 )
-	{
-	  cerr << "ERROR: strings are zero: " << s1.length() <<" " << s1 <<" " << s2.length()<<" " << s2 << endl;
-	  return 0;
-	}
-      if( c==0 )
-	{
-	  cerr << "Count ist 0 " << s1 << " " << s2 << endl;
-	  return 0;
-	}
-      cTbl[pair<string,string>(s1,s2)]=(FreqType)c;
-      setVokabular.insert(s1);
-      setVokabular.insert(s2);
-      if( verboseMode>1 )
-	cout << "R: " << s1 << " " << s2 << " " << c << endl;
-      c=0;
+  while(file >> c >> s1 >> s2) {
+    if( s1.length()==0||s2.length()==0 ) {
+      cerr << "ERROR: strings are zero: " << s1.length() <<" " << s1 <<" " << s2.length()<<" " << s2 << endl;
+      return 0;
+    }
+    if( c==0 ) {
+      cerr << "Count ist 0 " << s1 << " " << s2 << endl;
+      return 0;
     }
-  
+    cTbl[pair<string,string>(s1,s2)]=(FreqType)c;
+    setVokabular.insert(s1);
+    setVokabular.insert(s2);
+    if( verboseMode>1 )
+      cout << "R: " << s1 << " " << s2 << " " << c << endl;
+    c=0;
+  }
+
   return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
 }
 
 
 
-	
 
-	
+
+
 
 
 KategProblem *fromKModel(const char *str,int maxClass,int initialisierung,
-			 int auswertung,int nachbarschaft,int minWordFrequency)
+                         int auswertung,int nachbarschaft,int minWordFrequency)
 {
   string oldText,text,line;
   ifstream f(str);
-  if( !f )
-    {
-      cerr << "ERROR: can not open file " << str << ".\n";
-      return 0;
-    }
-  
+  if( !f ) {
+    cerr << "ERROR: can not open file " << str << ".\n";
+    return 0;
+  }
+
   leda_set<string> setVokabular;
   leda_h_array<PSS,FreqType> cTbl(0);
   oldText="$";
-  while(1)
-    {
-      getline(f,line);
-      if(f.fail() && !f.bad() && !f.eof())
-	{
-	  cerr << "WARNING: strange characters in stream (getline) " << endl;f.clear();
-	}
-      if(!f)break;
-      
-      istrstream f2(line.c_str());
-      while( 1 )
-	{
-	  f2 >> text;
-	  if(f2.fail() && !f2.bad() && !f2.eof())
-	    {
-	      cerr << "WARNING: strange characters in stream (>>) !\n";
-	      f2.clear(ios::failbit);
-	    }
-	  if(!f2){break;}
-	  
-	    
-	    
-	    
-	    
-	    
-	  if( text == "$" )
-	    text = "mkcls-mapped-dollar-symbol-$";
-	  if( !setVokabular.member(text) )setVokabular.insert(text);
-	  cTbl[pair<string,string>(oldText,text)]++;
-	  oldText=text;
-	}
-      text="$";
+  while(1) {
+    getline(f,line);
+    if(f.fail() && !f.bad() && !f.eof()) {
+      cerr << "WARNING: strange characters in stream (getline) " << endl;
+      f.clear();
+    }
+    if(!f)break;
+
+    istrstream f2(line.c_str());
+    while( 1 ) {
+      f2 >> text;
+      if(f2.fail() && !f2.bad() && !f2.eof()) {
+        cerr << "WARNING: strange characters in stream (>>) !\n";
+        f2.clear(ios::failbit);
+      }
+      if(!f2) {
+        break;
+      }
+
+
+
+
+
+
+      if( text == "$" )
+        text = "mkcls-mapped-dollar-symbol-$";
       if( !setVokabular.member(text) )setVokabular.insert(text);
       cTbl[pair<string,string>(oldText,text)]++;
       oldText=text;
     }
+    text="$";
+    if( !setVokabular.member(text) )setVokabular.insert(text);
+    cTbl[pair<string,string>(oldText,text)]++;
+    oldText=text;
+  }
   return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
 }
- 
+
 
 
 
 
 void KategProblemSetParameters(KategProblem &p)
 {
-  if( p.katwahl()==K_BEST )
-    {
-      TAOptimization::defaultAnnRate=0.7;
-      RRTOptimization::defaultAnnRate=0.95;
-      GDAOptimization::defaultAlpha=0.05;
-      if( verboseMode )
-	cout << "Parameter-setting like W-DET-BEST\n";
-    }
-  else
-    {
-      TAOptimization::defaultAnnRate=0.4;
-      RRTOptimization::defaultAnnRate=0.6;
-      GDAOptimization::defaultAlpha=0.0125;
-      if( verboseMode )
-	cout << "Parameter-setting like W-DET-DET\n";
-    }
+  if( p.katwahl()==K_BEST ) {
+    TAOptimization::defaultAnnRate=0.7;
+    RRTOptimization::defaultAnnRate=0.95;
+    GDAOptimization::defaultAlpha=0.05;
+    if( verboseMode )
+      cout << "Parameter-setting like W-DET-BEST\n";
+  } else {
+    TAOptimization::defaultAnnRate=0.4;
+    RRTOptimization::defaultAnnRate=0.6;
+    GDAOptimization::defaultAlpha=0.0125;
+    if( verboseMode )
+      cout << "Parameter-setting like W-DET-DET\n";
+  }
 }
 
 
 
 
 KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
-			int auswertung,int nachbarschaft,float relInit)
+                        int auswertung,int nachbarschaft,float relInit)
 {
   KategProblem &k=
     *new KategProblem(ANZ_WORD,ANZ_CLS,initValue,auswertung,nachbarschaft);
@@ -369,41 +359,35 @@ KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
   Array<int> after(ANZ_WORD,0);
   Array<int> before(ANZ_WORD,0);
   Array<FreqArray> twoD(ANZ_WORD);
-	int i;
-  for(i=0;i<ANZ_WORD;i++) twoD[i].init(ANZ_WORD,0); 
-  
-  for(i=0;i<ANZ_WORD;i++)
-    {
-      massert(after[i]==0);
-      massert(before[i]==0);
-      for(int j=0;j<ANZ_WORD;j++)
-	{
-	  massert(twoD[i][j]==0);
-	}
-    }
-  for(i=0;i<ANZ_WORD*ANZ_WORD*relInit;i++)
-    {
-      int x=randomInt(ANZ_WORD);
-      int y=randomInt(ANZ_WORD);
-      if(twoD[x][y]==0)
-	{
-	  after[x]++;
-	  before[y]++;
-	}
-      twoD[x][y]+=randomInt(10)+1;
-    }
-  for(i=0;i<ANZ_WORD;i++)
-    {
-      w.setAfterWords(i,after[i]);
-      w.setBeforeWords(i,before[i]);
+  int i;
+  for(i=0; i<ANZ_WORD; i++) twoD[i].init(ANZ_WORD,0);
+
+  for(i=0; i<ANZ_WORD; i++) {
+    massert(after[i]==0);
+    massert(before[i]==0);
+    for(int j=0; j<ANZ_WORD; j++) {
+      massert(twoD[i][j]==0);
     }
-  
-  for(i=0;i<ANZ_WORD;i++)
-    {
-      for(int j=0;j<ANZ_WORD;j++)
-	if( twoD[i][j] )
-	    w.setFreq(i,j,twoD[i][j]);
+  }
+  for(i=0; i<ANZ_WORD*ANZ_WORD*relInit; i++) {
+    int x=randomInt(ANZ_WORD);
+    int y=randomInt(ANZ_WORD);
+    if(twoD[x][y]==0) {
+      after[x]++;
+      before[y]++;
     }
+    twoD[x][y]+=randomInt(10)+1;
+  }
+  for(i=0; i<ANZ_WORD; i++) {
+    w.setAfterWords(i,after[i]);
+    w.setBeforeWords(i,before[i]);
+  }
+
+  for(i=0; i<ANZ_WORD; i++) {
+    for(int j=0; j<ANZ_WORD; j++)
+      if( twoD[i][j] )
+        w.setFreq(i,j,twoD[i][j]);
+  }
   w.testFull();
   return k;
 }
@@ -414,24 +398,23 @@ KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
 char *makeTitle(KategProblem &problem,int verfahren)
 {
   char x[1024];
-  switch(verfahren)
-    {
-    case HC_OPT:
-      strcpy(x,"HC   ");
-      break;
-    case SA_OPT:
-      strcpy(x,"SA     ");
-      break;
-    case TA_OPT:
-      strcpy(x,"TA     ");
-      break;
-    case GDA_OPT:
-      strcpy(x,"GDA    ");
-      break;
-    case RRT_OPT:
-      strcpy(x,"RRT    ");
-      break;
-    }
+  switch(verfahren) {
+  case HC_OPT:
+    strcpy(x,"HC   ");
+    break;
+  case SA_OPT:
+    strcpy(x,"SA     ");
+    break;
+  case TA_OPT:
+    strcpy(x,"TA     ");
+    break;
+  case GDA_OPT:
+    strcpy(x,"GDA    ");
+    break;
+  case RRT_OPT:
+    strcpy(x,"RRT    ");
+    break;
+  }
   problem.makeTitle(x+strlen(x));
   return strdup(x);
 }
@@ -439,11 +422,11 @@ char *makeTitle(KategProblem &problem,int verfahren)
 
 
 
-#define MAX_MULTIPLE 10  
+#define MAX_MULTIPLE 10
 
 Array<KategProblem *> &_izrOptimization(Array<KategProblem *> &probs,
-int anzprob,double timeForOneRed,double maxClock,Array<Kategory> &katOfWord,
-int anzIter,int verfahren)
+                                        int anzprob,double timeForOneRed,double maxClock,Array<Kategory> &katOfWord,
+                                        int anzIter,int verfahren)
 {
   massert(anzprob>1);
   massert(probs[0]->wordFreq.mindestAnzahl<=1);
@@ -456,184 +439,161 @@ int anzIter,int verfahren)
   int indexOfDurchschnitt;
   Array<int> newWords(nWords);
   int useAnzprob=anzprob;
-  do
-    {
-      int w,k;
-      indexOfDurchschnitt=0;
-      for(w=0;w<nWords;w++)
-	newWords[w]=-1;
-      for(k=0;k<useAnzprob;k++)
-	{
-	  massert(probs[k]->wordFreq.nWords==nWords);
-	  probs[k]->makeKats();
-	}
-      
-      for(w=0;w<nWords;w++)
-	{
-	  if( newWords[w]==-1 )
-	    {
-	      
-	      
-	      
-	      leda_set<int> durchschnitt=(*p0->kats)[p0->katOfWord(w)];
-	      for(k=1;k<useAnzprob;k++)
-	      durchschnitt = durchschnitt & (*probs[k]->kats)[probs[k]->katOfWord(w)];
-	      
-	      
-	      int _anzInDurchschnitt=0;
-	      int nr=0;
-	      forall_set(leda_set<int>,nr,durchschnitt)
-		{
-		  _anzInDurchschnitt++;
-		  newWords[nr]=indexOfDurchschnitt;
-		}
-	      if( verboseMode && _anzInDurchschnitt>1 && anzIter==0 )
-		{
-		  cout << "- (";
-		    forall_set(leda_set<int>,nr,durchschnitt)
-		    {
-		      cout << p0->getString(nr);
-		      if( p0->wordFreq.n1(nr)==1 )
-			cout << "* ";
-		      else
-			cout << " ";
-		    }
-		  cout << ")\n";
-		}
-	      
-	      
-	      
-	      
-	      for(k=0;k<useAnzprob;k++)
-		{
-		  durchschnitt = durchschnitt - (*probs[k]->kats)[probs[k]->katOfWord(w)];
-		}
-	      indexOfDurchschnitt++;
-	    }
-	}
-      
-    if(indexOfDurchschnitt>=minimumNumberOfWords)
-	{
-	  if(useAnzprob==1)
-	    {
-	      cout << "useAnzProb==1 => mysterious.\n";
-	      break;	
-	    }
-	  useAnzprob--;
-	}
+  do {
+    int w,k;
+    indexOfDurchschnitt=0;
+    for(w=0; w<nWords; w++)
+      newWords[w]=-1;
+    for(k=0; k<useAnzprob; k++) {
+      massert(probs[k]->wordFreq.nWords==nWords);
+      probs[k]->makeKats();
+    }
+
+    for(w=0; w<nWords; w++) {
+      if( newWords[w]==-1 ) {
+
+
+
+        leda_set<int> durchschnitt=(*p0->kats)[p0->katOfWord(w)];
+        for(k=1; k<useAnzprob; k++)
+          durchschnitt = durchschnitt & (*probs[k]->kats)[probs[k]->katOfWord(w)];
+
+
+        int _anzInDurchschnitt=0;
+        int nr=0;
+        forall_set(leda_set<int>,nr,durchschnitt) {
+          _anzInDurchschnitt++;
+          newWords[nr]=indexOfDurchschnitt;
+        }
+        if( verboseMode && _anzInDurchschnitt>1 && anzIter==0 ) {
+          cout << "- (";
+          forall_set(leda_set<int>,nr,durchschnitt) {
+            cout << p0->getString(nr);
+            if( p0->wordFreq.n1(nr)==1 )
+              cout << "* ";
+            else
+              cout << " ";
+          }
+          cout << ")\n";
+        }
+
+
+
+
+        for(k=0; k<useAnzprob; k++) {
+          durchschnitt = durchschnitt - (*probs[k]->kats)[probs[k]->katOfWord(w)];
+        }
+        indexOfDurchschnitt++;
+      }
+    }
+
+    if(indexOfDurchschnitt>=minimumNumberOfWords) {
+      if(useAnzprob==1) {
+        cout << "useAnzProb==1 => mysterious.\n";
+        break;
+      }
+      useAnzprob--;
     }
-  while(indexOfDurchschnitt>=minimumNumberOfWords);
-  
-  
+  } while(indexOfDurchschnitt>=minimumNumberOfWords);
+
+
   Array<KategProblem *> &neu=*new Array<KategProblem *>(MAX_MULTIPLE*anzprob,(KategProblem *)0);
   qsort(probs.getPointerToData(),useAnzprob,sizeof(KategProblem *),compareProblem);
   massert(useAnzprob<=probs.size());
   double startTime=clockSec();
   int i, numberOfNew;
-  for(numberOfNew=0; (clockSec()-startTime<timeForOneRed) 
-                         || (numberOfNew < anzprob) ; numberOfNew++)
-    {
-      int w;
-      if( numberOfNew==anzprob*MAX_MULTIPLE-1 )
-	break;
-      KategProblem *p 
-	= neu[numberOfNew] 
-	  = new KategProblem(indexOfDurchschnitt,nKats-2,
-		          p0->initialisierung,p0->auswertung,p0->nachbarschaft);
-
-      for(w=0;w<indexOfDurchschnitt;w++)
-	{
-	  p->wordFreq.setAfterWords(w,5);
-	  p->wordFreq.setBeforeWords(w,5);
-	}
-      for(w=0;w<nWords;w++)
-	{
-	  Array<OneFreq> &after=p0->wordFreq.after[w];
-	  int size=after.size();
-	  for(i=0;i<size;i++)
-	    p->wordFreq.addFreq(newWords[w],newWords[after[i].w],after[i].n);
-	}
-      p->wordFreq.testFull(1);
-      
-      
-      
-      
-      
-      
-      p->wordFreq.set_h_of_words(p0->wordFreq.get_h_of_words());
-      double w1=0.0,w2=0.0;
-      if(numberOfNew<useAnzprob)
-	{
-	  
-	  for(i=0;i<nWords;i++)
-	    (p->initLike)[newWords[i]]=probs[numberOfNew]->katOfWord(i);
-	  p->_initialize(5);
-	  HCOptimization hc(*p,-1);
-	  if(verboseMode)
-	    {
-	      w1=p->nicevalue();
-	      cout << "from old category system:" << w1 << endl;
-	    }
-	  hc.minimize(-1);
-	  if(verboseMode)
-	    {
-	      w2=p->nicevalue();
-	      if(w2<w1)
-		cout << "improvement: " << w1-w2 << endl;
-	    }
-	}
-      else
-	{
-	  p->_initialize(1);
-	  double mean;
-	  StatVar end,laufzeit,start;
-	  solveProblem(0,*p,1,-1,verfahren,mean,end,laufzeit,start);
-	  w2=p->value();
-	  if(verboseMode)
-	      cout << "new category system: " << w2 << " (" << p->nicevalue() 
-		<< ") Zeit: " << clockSec() << "\n";
-	}
+  for(numberOfNew=0; (clockSec()-startTime<timeForOneRed)
+      || (numberOfNew < anzprob) ; numberOfNew++) {
+    int w;
+    if( numberOfNew==anzprob*MAX_MULTIPLE-1 )
+      break;
+    KategProblem *p
+    = neu[numberOfNew]
+      = new KategProblem(indexOfDurchschnitt,nKats-2,
+                         p0->initialisierung,p0->auswertung,p0->nachbarschaft);
+
+    for(w=0; w<indexOfDurchschnitt; w++) {
+      p->wordFreq.setAfterWords(w,5);
+      p->wordFreq.setBeforeWords(w,5);
+    }
+    for(w=0; w<nWords; w++) {
+      Array<OneFreq> &after=p0->wordFreq.after[w];
+      int size=after.size();
+      for(i=0; i<size; i++)
+        p->wordFreq.addFreq(newWords[w],newWords[after[i].w],after[i].n);
     }
-	int p;
-  for(p=0;p<probs.size();p++)
-    {
-      if( probs[p] )
-	delete probs[p];
+    p->wordFreq.testFull(1);
+
+
+
+
+
+
+    p->wordFreq.set_h_of_words(p0->wordFreq.get_h_of_words());
+    double w1=0.0,w2=0.0;
+    if(numberOfNew<useAnzprob) {
+
+      for(i=0; i<nWords; i++)
+        (p->initLike)[newWords[i]]=probs[numberOfNew]->katOfWord(i);
+      p->_initialize(5);
+      HCOptimization hc(*p,-1);
+      if(verboseMode) {
+        w1=p->nicevalue();
+        cout << "from old category system:" << w1 << endl;
+      }
+      hc.minimize(-1);
+      if(verboseMode) {
+        w2=p->nicevalue();
+        if(w2<w1)
+          cout << "improvement: " << w1-w2 << endl;
+      }
+    } else {
+      p->_initialize(1);
+      double mean;
+      StatVar end,laufzeit,start;
+      solveProblem(0,*p,1,-1,verfahren,mean,end,laufzeit,start);
+      w2=p->value();
+      if(verboseMode)
+        cout << "new category system: " << w2 << " (" << p->nicevalue()
+             << ") Zeit: " << clockSec() << "\n";
     }
+  }
+  int p;
+  for(p=0; p<probs.size(); p++) {
+    if( probs[p] )
+      delete probs[p];
+  }
   qsort(neu.getPointerToData(),numberOfNew,sizeof(Problem *),compareProblem);
   massert(numberOfNew<=neu.size());
   if( verboseMode )
-    cout << "Iterierte Zustandsraum-Reduktion: " << indexOfDurchschnitt 
-      << " words. costs: " << neu[0]->value() << " " 
-      << neu[0]->nicevalue() << " (" << numberOfNew-anzprob << ")" << "time: " 
-      << clockSec() << endl;
-  if( indexOfDurchschnitt<=nKats 
-      || (clockSec()>maxClock&&maxClock) )
-    {
-      if( clockSec()>maxClock&&maxClock )
-	cout << "STOP (time limit: " << (clockSec()-maxClock) << " s)\n";
-      for(i=0;i<nWords;i++)
-	katOfWord[i]=neu[0]->katOfWord(newWords[i]);
-      return neu;
-    }
-  else
-    {
-      Array<Kategory> &newKatOfWord=
-       *(new Array<Kategory>(neu[0]->wordFreq.nWords,-1));
-      Array<KategProblem *> &erg=_izrOptimization(neu,anzprob,timeForOneRed,
-						  maxClock,newKatOfWord,
-						  anzIter+1,verfahren);
-      for(i=0;i<nWords;i++)
-	katOfWord[i]=newKatOfWord[newWords[i]];
-      return erg;
-    }
+    cout << "Iterierte Zustandsraum-Reduktion: " << indexOfDurchschnitt
+         << " words. costs: " << neu[0]->value() << " "
+         << neu[0]->nicevalue() << " (" << numberOfNew-anzprob << ")" << "time: "
+         << clockSec() << endl;
+  if( indexOfDurchschnitt<=nKats
+      || (clockSec()>maxClock&&maxClock) ) {
+    if( clockSec()>maxClock&&maxClock )
+      cout << "STOP (time limit: " << (clockSec()-maxClock) << " s)\n";
+    for(i=0; i<nWords; i++)
+      katOfWord[i]=neu[0]->katOfWord(newWords[i]);
+    return neu;
+  } else {
+    Array<Kategory> &newKatOfWord=
+      *(new Array<Kategory>(neu[0]->wordFreq.nWords,-1));
+    Array<KategProblem *> &erg=_izrOptimization(neu,anzprob,timeForOneRed,
+                               maxClock,newKatOfWord,
+                               anzIter+1,verfahren);
+    for(i=0; i<nWords; i++)
+      katOfWord[i]=newKatOfWord[newWords[i]];
+    return erg;
+  }
 }
 
 
 
 
 KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
-			      double clockForOneRed,double maxClock,int verfahren)
+                              double clockForOneRed,double maxClock,int verfahren)
 {
   Array<Kategory> katOfWord(p.wordFreq.nWords,-1);
   int startN;
@@ -647,31 +607,29 @@ KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
 
   double startTime=clockSec();
   int i;
-  for(i=0;i<startN;i++)
-    {
-      StatVar end,laufzeit,start;
-      double mean;
-      probs[i] = (KategProblem *)((KategProblem *)p.makeEqualProblem());
-      solveProblem(0,*(probs[i]),1,-1,verfahren,mean,end,laufzeit,start);
-      if( i==minN-1 )
-	endTime = clockSec();
-      if( i>=firstN-1 && (startTime+clockForOneRed>clockSec() || i==999) )
-	break;
-    }
+  for(i=0; i<startN; i++) {
+    StatVar end,laufzeit,start;
+    double mean;
+    probs[i] = (KategProblem *)((KategProblem *)p.makeEqualProblem());
+    solveProblem(0,*(probs[i]),1,-1,verfahren,mean,end,laufzeit,start);
+    if( i==minN-1 )
+      endTime = clockSec();
+    if( i>=firstN-1 && (startTime+clockForOneRed>clockSec() || i==999) )
+      break;
+  }
   if( endTime<0 )
     endTime=clockSec();
   massert(i>=firstN);
 
   qsort(probs.getPointerToData(),i,sizeof(KategProblem *),compareProblem);
   massert(i<=probs.size());
-  if( clockForOneRed<=0 )
-    {
-      clockForOneRed=endTime-startTime;
-      if( verboseMode )
-	cout << "time for one reduction: " << clockForOneRed << endl;
-    }
+  if( clockForOneRed<=0 ) {
+    clockForOneRed=endTime-startTime;
+    if( verboseMode )
+      cout << "time for one reduction: " << clockForOneRed << endl;
+  }
   _izrOptimization(probs,minN,clockForOneRed,maxClock,katOfWord,0,verfahren);
-  
+
   KategProblem *n=(KategProblem *)(p.makeEqualProblem());
   n->initLike= katOfWord;
   n->_initialize(5);