Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen Vermeulen <jtv@precisiontranslationtools.com>2015-05-16 05:47:46 +0300
committerJeroen Vermeulen <jtv@precisiontranslationtools.com>2015-05-16 05:47:46 +0300
commit34c7066032aba28ea033357cd1911dce3d397c56 (patch)
treeaa5fdb127f034fb47ddaf4453eea943544460397
parent8104c1c5158d8318faac77a2c74a36ce988e74d4 (diff)
Reformat source code.
Done using moses-smt's scripts/other/beautify.py (which uses astyle 2.01 with the k&r style).
-rw-r--r--mgizapp/src/ATables.cpp283
-rw-r--r--mgizapp/src/ATables.h232
-rw-r--r--mgizapp/src/AlignTables.cpp6
-rw-r--r--mgizapp/src/AlignTables.h318
-rw-r--r--mgizapp/src/Array2.h173
-rw-r--r--mgizapp/src/Array4.h77
-rw-r--r--mgizapp/src/D4Tables.h1457
-rw-r--r--mgizapp/src/D5Tables.h231
-rw-r--r--mgizapp/src/Dictionary.cpp29
-rw-r--r--mgizapp/src/Dictionary.h11
-rw-r--r--mgizapp/src/FlexArray.h40
-rw-r--r--mgizapp/src/ForwardBackward.cpp271
-rw-r--r--mgizapp/src/ForwardBackward.h41
-rw-r--r--mgizapp/src/Globals.h12
-rw-r--r--mgizapp/src/HMMTables.cpp1000
-rw-r--r--mgizapp/src/HMMTables.h240
-rw-r--r--mgizapp/src/MoveSwapMatrix.cpp265
-rw-r--r--mgizapp/src/MoveSwapMatrix.h114
-rw-r--r--mgizapp/src/NTables.cpp229
-rw-r--r--mgizapp/src/NTables.h209
-rw-r--r--mgizapp/src/Parameter.cpp123
-rw-r--r--mgizapp/src/Parameter.h209
-rw-r--r--mgizapp/src/Perplexity.cpp9
-rw-r--r--mgizapp/src/Perplexity.h141
-rw-r--r--mgizapp/src/Pointer.h200
-rw-r--r--mgizapp/src/SetArray.h255
-rw-r--r--mgizapp/src/TTables.cpp237
-rw-r--r--mgizapp/src/TTables.h396
-rw-r--r--mgizapp/src/Vector.h489
-rw-r--r--mgizapp/src/WordClasses.h120
-rw-r--r--mgizapp/src/alignment.cpp8
-rw-r--r--mgizapp/src/alignment.h325
-rw-r--r--mgizapp/src/cmd.c935
-rw-r--r--mgizapp/src/cmd.h24
-rw-r--r--mgizapp/src/collCounts.cpp480
-rw-r--r--mgizapp/src/collCounts.h22
-rw-r--r--mgizapp/src/d4norm.cxx120
-rw-r--r--mgizapp/src/defs.h59
-rw-r--r--mgizapp/src/file_spec.h23
-rw-r--r--mgizapp/src/getSentence.cpp606
-rw-r--r--mgizapp/src/getSentence.h130
-rw-r--r--mgizapp/src/hmm.cpp1903
-rw-r--r--mgizapp/src/hmm.h87
-rw-r--r--mgizapp/src/hmmnorm.cxx140
-rw-r--r--mgizapp/src/logprob.cpp61
-rw-r--r--mgizapp/src/logprob.h231
-rw-r--r--mgizapp/src/main.cpp2017
-rw-r--r--mgizapp/src/mkcls/Array.h430
-rw-r--r--mgizapp/src/mkcls/FixedArray.h307
-rw-r--r--mgizapp/src/mkcls/FlexArray.h22
-rw-r--r--mgizapp/src/mkcls/GDAOptimization.cpp131
-rw-r--r--mgizapp/src/mkcls/GDAOptimization.h86
-rw-r--r--mgizapp/src/mkcls/HCOptimization.cpp16
-rw-r--r--mgizapp/src/mkcls/HCOptimization.h30
-rw-r--r--mgizapp/src/mkcls/IterOptimization.cpp196
-rw-r--r--mgizapp/src/mkcls/IterOptimization.h114
-rw-r--r--mgizapp/src/mkcls/KategProblem.cpp1135
-rw-r--r--mgizapp/src/mkcls/KategProblem.h381
-rw-r--r--mgizapp/src/mkcls/KategProblemKBC.cpp215
-rw-r--r--mgizapp/src/mkcls/KategProblemKBC.h172
-rw-r--r--mgizapp/src/mkcls/KategProblemTest.cpp832
-rw-r--r--mgizapp/src/mkcls/KategProblemTest.h12
-rw-r--r--mgizapp/src/mkcls/KategProblemWBC.cpp289
-rw-r--r--mgizapp/src/mkcls/KategProblemWBC.h110
-rw-r--r--mgizapp/src/mkcls/MSBOptimization.cpp249
-rw-r--r--mgizapp/src/mkcls/MSBOptimization.h48
-rw-r--r--mgizapp/src/mkcls/MYOptimization.cpp84
-rw-r--r--mgizapp/src/mkcls/MYOptimization.h41
-rw-r--r--mgizapp/src/mkcls/Optimization.cpp4
-rw-r--r--mgizapp/src/mkcls/Optimization.h14
-rw-r--r--mgizapp/src/mkcls/PopOptimization.cpp53
-rw-r--r--mgizapp/src/mkcls/PopOptimization.h66
-rw-r--r--mgizapp/src/mkcls/Problem.cpp81
-rw-r--r--mgizapp/src/mkcls/Problem.h169
-rw-r--r--mgizapp/src/mkcls/ProblemTest.cpp277
-rw-r--r--mgizapp/src/mkcls/ProblemTest.h8
-rw-r--r--mgizapp/src/mkcls/RRTOptimization.cpp254
-rw-r--r--mgizapp/src/mkcls/RRTOptimization.h71
-rw-r--r--mgizapp/src/mkcls/SAOptimization.cpp343
-rw-r--r--mgizapp/src/mkcls/SAOptimization.h108
-rw-r--r--mgizapp/src/mkcls/StatVar.cpp81
-rw-r--r--mgizapp/src/mkcls/StatVar.h150
-rw-r--r--mgizapp/src/mkcls/TAOptimization.cpp226
-rw-r--r--mgizapp/src/mkcls/TAOptimization.h67
-rw-r--r--mgizapp/src/mkcls/general.cpp26
-rw-r--r--mgizapp/src/mkcls/general.h24
-rw-r--r--mgizapp/src/mkcls/mkcls.cpp727
-rw-r--r--mgizapp/src/mkcls/my.h4
-rw-r--r--mgizapp/src/mkcls/myassert.h4
-rw-r--r--mgizapp/src/mkcls/myleda.h188
-rw-r--r--mgizapp/src/mkcls/mystl.h65
-rw-r--r--mgizapp/src/model1.cpp1055
-rw-r--r--mgizapp/src/model1.h284
-rw-r--r--mgizapp/src/model2.cpp372
-rw-r--r--mgizapp/src/model2.h37
-rw-r--r--mgizapp/src/model2to3.cpp344
-rw-r--r--mgizapp/src/model3.cpp2366
-rw-r--r--mgizapp/src/model3.h165
-rw-r--r--mgizapp/src/model345-peg.cpp302
-rw-r--r--mgizapp/src/model3_viterbi.cpp959
-rw-r--r--mgizapp/src/model3_viterbi_with_tricks.cpp1392
-rw-r--r--mgizapp/src/myassert.cpp12
-rw-r--r--mgizapp/src/mymath.h5
-rw-r--r--mgizapp/src/mystl.h172
-rw-r--r--mgizapp/src/parse.cpp160
-rw-r--r--mgizapp/src/plain2snt.cpp128
-rw-r--r--mgizapp/src/reports.cpp160
-rw-r--r--mgizapp/src/snt2cooc-reduce-mem-preprocess.cpp97
-rw-r--r--mgizapp/src/snt2cooc.cpp143
-rw-r--r--mgizapp/src/snt2plain.cpp107
-rw-r--r--mgizapp/src/symal.cpp650
-rw-r--r--mgizapp/src/syncObj.h152
-rw-r--r--mgizapp/src/transpair_model1.h115
-rw-r--r--mgizapp/src/transpair_model2.h28
-rw-r--r--mgizapp/src/transpair_model3.cpp141
-rw-r--r--mgizapp/src/transpair_model3.h52
-rw-r--r--mgizapp/src/transpair_model4.cpp92
-rw-r--r--mgizapp/src/transpair_model4.h56
-rw-r--r--mgizapp/src/transpair_model5.cpp208
-rw-r--r--mgizapp/src/transpair_model5.h31
-rw-r--r--mgizapp/src/transpair_modelhmm.h338
-rw-r--r--mgizapp/src/ttableDiff.hpp149
-rw-r--r--mgizapp/src/types.h2
-rw-r--r--mgizapp/src/utility.cpp9
-rw-r--r--mgizapp/src/utility.h30
-rw-r--r--mgizapp/src/vocab.cpp74
-rw-r--r--mgizapp/src/vocab.h101
-rw-r--r--mgizapp/w32/benchtest.h10
-rw-r--r--mgizapp/w32/implement.h114
-rw-r--r--mgizapp/w32/need_errno.h24
-rw-r--r--mgizapp/w32/pthread.h1289
-rw-r--r--mgizapp/w32/sched.h42
-rw-r--r--mgizapp/w32/semaphore.h52
133 files changed, 17277 insertions, 17240 deletions
diff --git a/mgizapp/src/ATables.cpp b/mgizapp/src/ATables.cpp
index 8cc4ad2..7552e95 100644
--- a/mgizapp/src/ATables.cpp
+++ b/mgizapp/src/ATables.cpp
@@ -8,18 +8,18 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
-#include "ATables.h"
+#include "ATables.h"
#include "Globals.h"
#include "myassert.h"
#include "Parameter.h"
@@ -28,185 +28,190 @@ GLOBAL_PARAMETER(bool,CompactADTable,"compactadtable","1: only 3-dimensional ali
GLOBAL_PARAMETER(float,amodel_smooth_factor,"model23SmoothFactor","smoothing parameter for IBM-2/3 (interpolation with constant)",PARLEV_SMOOTH,0.0);
template <class VALTYPE>
-void amodel<VALTYPE>::printTable(const char *filename) const{
- // print amodel to file with the name filename (it'll be created or overwritten
- // format : for a table :
- // aj j l m val
- // where aj is source word pos, j target word pos, l source sentence length,
+void amodel<VALTYPE>::printTable(const char *filename) const
+{
+ // print amodel to file with the name filename (it'll be created or overwritten
+ // format : for a table :
+ // aj j l m val
+ // where aj is source word pos, j target word pos, l source sentence length,
// m is target sentence length.
- //
+ //
//return;
- if (is_distortion)
- cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n';
- else
- cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n';
-
- ofstream of(filename);
- double ssum=0.0;
- for(WordIndex l=0; l < MaxSentLength; l++){
- for(WordIndex m=0;m<MaxSentLength;m++){
- if( CompactADTable && l!=m )
- continue;
- unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
- unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
- if( is_distortion==0 ){
- for(WordIndex j=1;j<=M; j++){
- double sum=0.0;
- for(WordIndex i=0;i<=L; i++){
- VALTYPE x=getValue(i, j, L, M);
- if( x>PROB_SMOOTH ){
- of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
- sum+=x;
- }
- }
- ssum+=sum;
- }
- }else{
- for(WordIndex i=0;i<=L;i++){
- double sum=0.0;
- for(WordIndex j=1;j<=M;j++){
- VALTYPE x=getValue(j, i, L, M);
- if( x>PROB_SMOOTH ){
- of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
- sum+=x;
- }
- }
- ssum+=sum;
- }
+ if (is_distortion)
+ cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n';
+ else
+ cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n';
+
+ ofstream of(filename);
+ double ssum=0.0;
+ for(WordIndex l=0; l < MaxSentLength; l++) {
+ for(WordIndex m=0; m<MaxSentLength; m++) {
+ if( CompactADTable && l!=m )
+ continue;
+ unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
+ unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
+ if( is_distortion==0 ) {
+ for(WordIndex j=1; j<=M; j++) {
+ double sum=0.0;
+ for(WordIndex i=0; i<=L; i++) {
+ VALTYPE x=getValue(i, j, L, M);
+ if( x>PROB_SMOOTH ) {
+ of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
+ sum+=x;
+ }
+ }
+ ssum+=sum;
+ }
+ } else {
+ for(WordIndex i=0; i<=L; i++) {
+ double sum=0.0;
+ for(WordIndex j=1; j<=M; j++) {
+ VALTYPE x=getValue(j, i, L, M);
+ if( x>PROB_SMOOTH ) {
+ of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
+ sum+=x;
}
+ }
+ ssum+=sum;
}
+ }
}
+ }
}
template <class VALTYPE>
-void amodel<VALTYPE>::printRealTable(const char *filename) const{
- // print amodel to file with the name filename (it'll be created or overwritten
- // format : for a table :
- // aj j l m val
- // where aj is source word pos, j target word pos, l source sentence length,
+void amodel<VALTYPE>::printRealTable(const char *filename) const
+{
+ // print amodel to file with the name filename (it'll be created or overwritten
+ // format : for a table :
+ // aj j l m val
+ // where aj is source word pos, j target word pos, l source sentence length,
// m is target sentence length.
- //
+ //
//return;
- if (is_distortion)
- cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n';
- else
- cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n';
-
- ofstream of(filename);
- for(WordIndex l=0; l < MaxSentLength; l++){
- for(WordIndex m=0;m<MaxSentLength;m++){
- if( CompactADTable && l!=m )
- continue;
- unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
- unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
- if( is_distortion==0 ){
- for(WordIndex j=1;j<=M; j++){
- for(WordIndex i=0;i<=L; i++){
- VALTYPE x=getValue(i, j, L, M);
- if( x>MINCOUNTINCREASE )
- of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
- }
- }
- }else{
- for(WordIndex i=0;i<=L;i++){
- for(WordIndex j=1;j<=M;j++){
- VALTYPE x=getValue(j, i, L, M);
- if( x>MINCOUNTINCREASE )
- of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
- }
- }
- }
+ if (is_distortion)
+ cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n';
+ else
+ cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n';
+
+ ofstream of(filename);
+ for(WordIndex l=0; l < MaxSentLength; l++) {
+ for(WordIndex m=0; m<MaxSentLength; m++) {
+ if( CompactADTable && l!=m )
+ continue;
+ unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
+ unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
+ if( is_distortion==0 ) {
+ for(WordIndex j=1; j<=M; j++) {
+ for(WordIndex i=0; i<=L; i++) {
+ VALTYPE x=getValue(i, j, L, M);
+ if( x>MINCOUNTINCREASE )
+ of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
+ }
+ }
+ } else {
+ for(WordIndex i=0; i<=L; i++) {
+ for(WordIndex j=1; j<=M; j++) {
+ VALTYPE x=getValue(j, i, L, M);
+ if( x>MINCOUNTINCREASE )
+ of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
+ }
}
+ }
}
+ }
}
extern short NoEmptyWord;
template <class VALTYPE>
-bool amodel<VALTYPE>::readTable(const char *filename){
+bool amodel<VALTYPE>::readTable(const char *filename)
+{
/* This function reads the a table from a file.
Each line is of the format: aj j l m val
- where aj is the source word position, j the target word position,
+ where aj is the source word position, j the target word position,
l the source sentence length, and m the target sentence length
-
+
This function also works for a d table, where the positions
of aj and i are swapped. Both the a and d tables are 4 dimensional
hashes; this function will simply read in the four values and keep
them in that order when hashing the fifth value.
NAS, 7/11/99
*/
- ifstream inf(filename);
- cout << "Reading a/d table from " << filename << "\n";
- if(!inf){
- cerr << "\nERROR: Cannot open " << filename<<"\n";
- return false;
- }
- WordIndex w, x, l, m;
- VALTYPE prob;
- while(inf >> w >> x >> l >> m >> prob )
- // the NULL word is added to the length
- // of the sentence in the tables, but discount it when you write the tables.
- setValue(w, x, l, m, prob);
- return true;
+ ifstream inf(filename);
+ cout << "Reading a/d table from " << filename << "\n";
+ if(!inf) {
+ cerr << "\nERROR: Cannot open " << filename<<"\n";
+ return false;
+ }
+ WordIndex w, x, l, m;
+ VALTYPE prob;
+ while(inf >> w >> x >> l >> m >> prob )
+ // the NULL word is added to the length
+ // of the sentence in the tables, but discount it when you write the tables.
+ setValue(w, x, l, m, prob);
+ return true;
}
template <class VALTYPE>
-bool amodel<VALTYPE>::readAugTable(const char *filename){
+bool amodel<VALTYPE>::readAugTable(const char *filename)
+{
/* This function reads the a table from a file.
Each line is of the format: aj j l m val
- where aj is the source word position, j the target word position,
+ where aj is the source word position, j the target word position,
l the source sentence length, and m the target sentence length
-
+
This function also works for a d table, where the positions
of aj and i are swapped. Both the a and d tables are 4 dimensional
hashes; this function will simply read in the four values and keep
them in that order when hashing the fifth value.
NAS, 7/11/99
*/
- ifstream inf(filename);
- cout << "Reading a/d table from " << filename << "\n";
- if(!inf){
- cerr << "\nERROR: Cannot open " << filename<<"\n";
- return false;
- }
- WordIndex w, x, l, m;
- VALTYPE prob;
- while(inf >> w >> x >> l >> m >> prob )
- // the NULL word is added to the length
- // of the sentence in the tables, but discount it when you write the tables.
- addValue(w, x, l, m, prob);
- return true;
+ ifstream inf(filename);
+ cout << "Reading a/d table from " << filename << "\n";
+ if(!inf) {
+ cerr << "\nERROR: Cannot open " << filename<<"\n";
+ return false;
+ }
+ WordIndex w, x, l, m;
+ VALTYPE prob;
+ while(inf >> w >> x >> l >> m >> prob )
+ // the NULL word is added to the length
+ // of the sentence in the tables, but discount it when you write the tables.
+ addValue(w, x, l, m, prob);
+ return true;
}
template <class VALTYPE>
-bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am){
- cout << "start merging " <<"\n";
- for(WordIndex l=0; l < MaxSentLength; l++){
- for(WordIndex m=0;m<MaxSentLength;m++){
- if( CompactADTable && l!=m )
- continue;
- unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
- unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
- if( is_distortion==0 ){
- for(WordIndex j=1;j<=M; j++){
- for(WordIndex i=0;i<=L; i++){
- VALTYPE x=am.getValue(i, j, L, M);
- addValue(i,j,L,M,x);
- }
- }
- }else{
- for(WordIndex i=0;i<=L;i++){
- for(WordIndex j=1;j<=M;j++){
- VALTYPE x=am.getValue(j, i, L, M);
- addValue(j,i,L,M,x);
- }
- }
- }
+bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am)
+{
+ cout << "start merging " <<"\n";
+ for(WordIndex l=0; l < MaxSentLength; l++) {
+ for(WordIndex m=0; m<MaxSentLength; m++) {
+ if( CompactADTable && l!=m )
+ continue;
+ unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
+ unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
+ if( is_distortion==0 ) {
+ for(WordIndex j=1; j<=M; j++) {
+ for(WordIndex i=0; i<=L; i++) {
+ VALTYPE x=am.getValue(i, j, L, M);
+ addValue(i,j,L,M,x);
+ }
+ }
+ } else {
+ for(WordIndex i=0; i<=L; i++) {
+ for(WordIndex j=1; j<=M; j++) {
+ VALTYPE x=am.getValue(j, i, L, M);
+ addValue(j,i,L,M,x);
+ }
}
+ }
}
- return true;
+ }
+ return true;
}
-template class amodel<COUNT> ;
-//template class amodel<PROB> ;
+template class amodel<COUNT> ;
+//template class amodel<PROB> ;
diff --git a/mgizapp/src/ATables.h b/mgizapp/src/ATables.h
index 9db77b1..bc1390d 100644
--- a/mgizapp/src/ATables.h
+++ b/mgizapp/src/ATables.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -64,128 +64,142 @@ extern short NoEmptyWord;
table is the probability (d(j/l,m,i), where j is word target position, i is
source word position connected to it, m is target sentence length, and l is
source sentence length) or count collected for it. The probability and the
- count are represented as log integer probability as
- defined by the class LogProb .
+ count are represented as log integer probability as
+ defined by the class LogProb .
This class is used to represents a Tables (probabiliity) and d (distortion)
tables and also their corresponding count tables .
-
+
*--------------------------------------------------------------------------*/
-inline int Mabs(int a){
- if(a<0)
- return -a;
- else
- return a;
+inline int Mabs(int a)
+{
+ if(a<0)
+ return -a;
+ else
+ return a;
}
template <class VALTYPE>
-class amodel{
+class amodel
+{
public:
- Array4<VALTYPE> a;
- bool is_distortion ;
- WordIndex MaxSentLength;
- bool ignoreL, ignoreM;
- VALTYPE get(WordIndex aj, WordIndex j, WordIndex l, WordIndex m)const{
- massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );massert( (!is_distortion) || aj!=0 );
- massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
- massert( l<MaxSentLength );massert( m<MaxSentLength );
- return a.get(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
- }
-
- static float smooth_factor;
- amodel(bool flag = false)
- : a(MAX_SENTENCE_LENGTH+1,0.0), is_distortion(flag), MaxSentLength(MAX_SENTENCE_LENGTH)
- {lock = new Mutex();};
-
- ~amodel(){delete lock;};
-
+ Array4<VALTYPE> a;
+ bool is_distortion ;
+ WordIndex MaxSentLength;
+ bool ignoreL, ignoreM;
+ VALTYPE get(WordIndex aj, WordIndex j, WordIndex l, WordIndex m)const {
+ massert( (!is_distortion) || aj<=m );
+ massert( (!is_distortion) || j<=l );
+ massert( (!is_distortion) || aj!=0 );
+ massert( is_distortion || aj<=l );
+ massert( is_distortion || j<=m );
+ massert( (is_distortion) || j!=0 );
+ massert( l<MaxSentLength );
+ massert( m<MaxSentLength );
+ return a.get(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
+ }
+
+ static float smooth_factor;
+ amodel(bool flag = false)
+ : a(MAX_SENTENCE_LENGTH+1,0.0), is_distortion(flag), MaxSentLength(MAX_SENTENCE_LENGTH) {
+ lock = new Mutex();
+ };
+
+ ~amodel() {
+ delete lock;
+ };
+
protected:
- VALTYPE&getRef(WordIndex aj, WordIndex j, WordIndex l, WordIndex m){
- massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );
- massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
- massert( l<MaxSentLength );massert( m<MaxSentLength );
- return a(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
- }
+ VALTYPE&getRef(WordIndex aj, WordIndex j, WordIndex l, WordIndex m) {
+ massert( (!is_distortion) || aj<=m );
+ massert( (!is_distortion) || j<=l );
+ massert( is_distortion || aj<=l );
+ massert( is_distortion || j<=m );
+ massert( (is_distortion) || j!=0 );
+ massert( l<MaxSentLength );
+ massert( m<MaxSentLength );
+ return a(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
+ }
public:
- void setValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
- lock->lock();
- getRef(aj, j, l, m)=val;
- lock->unlock();
- }
-
- Mutex* lock;
+ void setValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
+ lock->lock();
+ getRef(aj, j, l, m)=val;
+ lock->unlock();
+ }
+
+ Mutex* lock;
public:
- /**
- By Qin
- */
- void addValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
- lock->lock();
- getRef(aj, j, l, m)+=val;
- lock->unlock();
- }
- bool merge(amodel<VALTYPE>& am);
- VALTYPE getValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m) const{
- if( is_distortion==0 )
- return max(double(PROB_SMOOTH),amodel_smooth_factor/(l+1)+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
- else
- return max(double(PROB_SMOOTH),amodel_smooth_factor/m+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
- }
-
- void printTable(const char* filename)const ;
- void printRealTable(const char* filename)const ;
- template<class COUNT>
- void normalize(amodel<COUNT>& aTable)const
- {
- WordIndex i, j, l, m ;
- COUNT total;
- int nParam=0;
- for(l=0;l<MaxSentLength;l++){
- for(m=0;m<MaxSentLength;m++){
- if( CompactADTable && l!=m )
- continue;
- unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
- unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
- if( is_distortion==0 ){
- for(j=1;j<=M; j++){
- total=0.0;
- for(i=0;i<=L;i++){
- total+=get(i, j, L, M);
- }
- if( total ){
- for(i=0;i<=L;i++){
- nParam++;
- aTable.getRef(i, j, L, M)=get(i, j, L, M)/total;
- massert(aTable.getRef(i,j,L,M)<=1.0);
- if( NoEmptyWord&&i==0 )
- aTable.getRef(i,j,L,M)=0;
- }
- }
- }
- }else{
- for(i=0;i<=L;i++){
- total=0.0;
- for(j=1;j<=M;j++)
- total+=get(j, i, L, M);
- if( total )
- for(j=1;j<=M;j++){
- aTable.getRef(j, i, L, M)=amodel_smooth_factor/M+(1.0-amodel_smooth_factor)*get(j, i, L, M)/total;
- nParam++;
- massert(aTable.getRef(j,i,L,M)<=1.0);
- if( NoEmptyWord&&i==0 )
- aTable.getRef(j,i,L,M)=0;
- }
- }
- }
+ /**
+ By Qin
+ */
+ void addValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val) {
+ lock->lock();
+ getRef(aj, j, l, m)+=val;
+ lock->unlock();
+ }
+ bool merge(amodel<VALTYPE>& am);
+ VALTYPE getValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m) const {
+ if( is_distortion==0 )
+ return max(double(PROB_SMOOTH),amodel_smooth_factor/(l+1)+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
+ else
+ return max(double(PROB_SMOOTH),amodel_smooth_factor/m+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
+ }
+
+ void printTable(const char* filename)const ;
+ void printRealTable(const char* filename)const ;
+ template<class COUNT>
+ void normalize(amodel<COUNT>& aTable)const {
+ WordIndex i, j, l, m ;
+ COUNT total;
+ int nParam=0;
+ for(l=0; l<MaxSentLength; l++) {
+ for(m=0; m<MaxSentLength; m++) {
+ if( CompactADTable && l!=m )
+ continue;
+ unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
+ unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
+ if( is_distortion==0 ) {
+ for(j=1; j<=M; j++) {
+ total=0.0;
+ for(i=0; i<=L; i++) {
+ total+=get(i, j, L, M);
+ }
+ if( total ) {
+ for(i=0; i<=L; i++) {
+ nParam++;
+ aTable.getRef(i, j, L, M)=get(i, j, L, M)/total;
+ massert(aTable.getRef(i,j,L,M)<=1.0);
+ if( NoEmptyWord&&i==0 )
+ aTable.getRef(i,j,L,M)=0;
+ }
}
+ }
+ } else {
+ for(i=0; i<=L; i++) {
+ total=0.0;
+ for(j=1; j<=M; j++)
+ total+=get(j, i, L, M);
+ if( total )
+ for(j=1; j<=M; j++) {
+ aTable.getRef(j, i, L, M)=amodel_smooth_factor/M+(1.0-amodel_smooth_factor)*get(j, i, L, M)/total;
+ nParam++;
+ massert(aTable.getRef(j,i,L,M)<=1.0);
+ if( NoEmptyWord&&i==0 )
+ aTable.getRef(j,i,L,M)=0;
+ }
+ }
}
- cout << "A/D table contains " << nParam << " parameters.\n";
+ }
}
-
- bool readTable(const char *filename);
- bool readAugTable(const char *filename);
- void clear()
- {a.clear();}
+ cout << "A/D table contains " << nParam << " parameters.\n";
+ }
+
+ bool readTable(const char *filename);
+ bool readAugTable(const char *filename);
+ void clear() {
+ a.clear();
+ }
};
/* ------------------- End of amodel Class Definitions ----------------------*/
diff --git a/mgizapp/src/AlignTables.cpp b/mgizapp/src/AlignTables.cpp
index 8c35b77..3326c82 100644
--- a/mgizapp/src/AlignTables.cpp
+++ b/mgizapp/src/AlignTables.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -30,7 +30,7 @@ bool alignmodel::insert(Vector<WordIndex>& aj, LogProb val)
a.insert(pair<const Vector<WordIndex>, LogProb>(aj, val));
return true ;
}
-
+
LogProb alignmodel::getValue(Vector<WordIndex>& align) const
{
diff --git a/mgizapp/src/AlignTables.h b/mgizapp/src/AlignTables.h
index d4bcf09..52debc9 100644
--- a/mgizapp/src/AlignTables.h
+++ b/mgizapp/src/AlignTables.h
@@ -1,154 +1,164 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _aligntables_h
-#define _aligntables_h 1
-
-#include "defs.h"
-
-
-#include <cassert>
-
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-//#include <vector>
-#include "Vector.h"
-#include <utility>
-#if __GNUC__>2
-#include <ext/hash_map>
-using __gnu_cxx::hash_map;
-#else
-#include <hash_map>
-#endif
-#include <cmath>
-#include <fstream>
-#include "transpair_model1.h"
-
-
-/* ----------------- Class Defintions for hashmyalignment --------------------
- Objective: This class is used to define a hash mapping function to map
- an alignment (defined as a vector of integers) into a hash key
- ----------------------------------------------------------------------------*/
-
-class hashmyalignment : public unary_function< Vector<WordIndex>, size_t >
-{
-public:
- size_t operator() (const Vector<WordIndex>& key) const
- // to define the mapping function. it takes an alignment (a vector of
- // integers) and it returns an integer value (hash key).
- {
- WordIndex j ;
- size_t s ;
- size_t key_sum = 0 ;
- // logmsg << "For alignment:" ;
- for (j = 1 ; j < key.size() ; j++){
- // logmsg << " " << key[j] ;
- key_sum += (size_t) (int) pow(double(key[j]), double((j % 6)+1));
- }
- // logmsg << " , Key value was : " << key_sum;
- s = key_sum % 1000000 ;
- // logmsg << " h(k) = " << s << endl ;
- return(s);
- }
-#ifdef WIN32
- enum
- { // parameters for hash table
- bucket_size = 1 // 0 < bucket_size
- };
-
- bool operator()(const Vector<WordIndex> t1,
- const Vector<WordIndex> t2) const
- {WordIndex j ;
- if (t1.size() != t2.size())
- return(false);
- for (j = 1 ; j < t1.size() ; j++)
- if (t1[j] != t2[j])
- return(false);
- return(true);
- }
-#endif
-};
-
-#ifndef WIN32
-class equal_to_myalignment{
- // returns true if two alignments are the same (two vectors have same enties)
-public:
- bool operator()(const Vector<WordIndex> t1,
- const Vector<WordIndex> t2) const
- {WordIndex j ;
- if (t1.size() != t2.size())
- return(false);
- for (j = 1 ; j < t1.size() ; j++)
- if (t1[j] != t2[j])
- return(false);
- return(true);
- }
-
-};
-#endif
-
-/* ---------------- End of Class Defnition for hashmyalignment --------------*/
-
-
-/* ------------------ Class Defintions for alignmodel -----------------------
- Class Name: alignmodel
- Objective: Alignments neighborhhoods (collection of alignments) are stored in
- a hash table (for easy lookup). Each alignment vector is mapped into a hash
- key using the operator defined above.
- *--------------------------------------------------------------------------*/
-
-class alignmodel{
-private:
-#ifdef WIN32
- typedef hash_map<Vector<WordIndex>, LogProb, hashmyalignment > alignment_hash;
-
-#else
- typedef hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment > alignment_hash;
-
-#endif
- alignment_hash a;
-private:
- // void erase(Vector<WordIndex>&);
-public:
-
- // methods;
-
- inline alignment_hash::iterator begin(void){return a.begin();} // begining of hash
- inline alignment_hash::iterator end(void){return a.end();} // end of hash
- inline const alignment_hash& getHash() const {return a;}; // reference to hash table
- bool insert(Vector<WordIndex>&, LogProb val=0.0); // add a alignmnet
- // void setValue(Vector<WordIndex>&, LogProb val); // not needed
- LogProb getValue(Vector<WordIndex>&)const; // retrieve prob. of alignment
- inline void clear(void){ a.clear();}; // clear hash table
- // void printTable(const char* filename);
- inline void resize(WordIndex n) {
-#ifndef WIN32
- a.resize(n);
-#endif
- }; // resize table
-
-};
-
-/* -------------- End of alignmode Class Definitions ------------------------*/
-#endif
+/*
+
+EGYPT Toolkit for Statistical Machine Translation
+Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+USA.
+
+*/
+#ifndef _aligntables_h
+#define _aligntables_h 1
+
+#include "defs.h"
+
+
+#include <cassert>
+
+#include <iostream>
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
+//#include <vector>
+#include "Vector.h"
+#include <utility>
+#if __GNUC__>2
+#include <ext/hash_map>
+using __gnu_cxx::hash_map;
+#else
+#include <hash_map>
+#endif
+#include <cmath>
+#include <fstream>
+#include "transpair_model1.h"
+
+
+/* ----------------- Class Defintions for hashmyalignment --------------------
+ Objective: This class is used to define a hash mapping function to map
+ an alignment (defined as a vector of integers) into a hash key
+ ----------------------------------------------------------------------------*/
+
+class hashmyalignment : public unary_function< Vector<WordIndex>, size_t >
+{
+public:
+ size_t operator() (const Vector<WordIndex>& key) const
+ // to define the mapping function. it takes an alignment (a vector of
+ // integers) and it returns an integer value (hash key).
+ {
+ WordIndex j ;
+ size_t s ;
+ size_t key_sum = 0 ;
+ // logmsg << "For alignment:" ;
+ for (j = 1 ; j < key.size() ; j++) {
+ // logmsg << " " << key[j] ;
+ key_sum += (size_t) (int) pow(double(key[j]), double((j % 6)+1));
+ }
+ // logmsg << " , Key value was : " << key_sum;
+ s = key_sum % 1000000 ;
+ // logmsg << " h(k) = " << s << endl ;
+ return(s);
+ }
+#ifdef WIN32
+ enum {
+ // parameters for hash table
+ bucket_size = 1 // 0 < bucket_size
+ };
+
+ bool operator()(const Vector<WordIndex> t1,
+ const Vector<WordIndex> t2) const {
+ WordIndex j ;
+ if (t1.size() != t2.size())
+ return(false);
+ for (j = 1 ; j < t1.size() ; j++)
+ if (t1[j] != t2[j])
+ return(false);
+ return(true);
+ }
+#endif
+};
+
+#ifndef WIN32
+class equal_to_myalignment
+{
+ // returns true if two alignments are the same (two vectors have same enties)
+public:
+ bool operator()(const Vector<WordIndex> t1,
+ const Vector<WordIndex> t2) const {
+ WordIndex j ;
+ if (t1.size() != t2.size())
+ return(false);
+ for (j = 1 ; j < t1.size() ; j++)
+ if (t1[j] != t2[j])
+ return(false);
+ return(true);
+ }
+
+};
+#endif
+
+/* ---------------- End of Class Defnition for hashmyalignment --------------*/
+
+
+/* ------------------ Class Defintions for alignmodel -----------------------
+ Class Name: alignmodel
+ Objective: Alignments neighborhhoods (collection of alignments) are stored in
+ a hash table (for easy lookup). Each alignment vector is mapped into a hash
+ key using the operator defined above.
+ *--------------------------------------------------------------------------*/
+
+class alignmodel
+{
+private:
+#ifdef WIN32
+ typedef hash_map<Vector<WordIndex>, LogProb, hashmyalignment > alignment_hash;
+
+#else
+ typedef hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment > alignment_hash;
+
+#endif
+ alignment_hash a;
+private:
+ // void erase(Vector<WordIndex>&);
+public:
+
+ // methods;
+
+ inline alignment_hash::iterator begin(void) {
+ return a.begin(); // begining of hash
+ }
+ inline alignment_hash::iterator end(void) {
+ return a.end(); // end of hash
+ }
+ inline const alignment_hash& getHash() const {
+ return a;
+ }; // reference to hash table
+ bool insert(Vector<WordIndex>&, LogProb val=0.0); // add a alignmnet
+// void setValue(Vector<WordIndex>&, LogProb val); // not needed
+ LogProb getValue(Vector<WordIndex>&)const; // retrieve prob. of alignment
+ inline void clear(void) {
+ a.clear();
+ }; // clear hash table
+ // void printTable(const char* filename);
+ inline void resize(WordIndex n) {
+#ifndef WIN32
+ a.resize(n);
+#endif
+ }; // resize table
+
+};
+
+/* -------------- End of alignmode Class Definitions ------------------------*/
+#endif
diff --git a/mgizapp/src/Array2.h b/mgizapp/src/Array2.h
index 8ea2d9e..6f347cd 100644
--- a/mgizapp/src/Array2.h
+++ b/mgizapp/src/Array2.h
@@ -8,14 +8,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,95 +32,96 @@
#include <string>
#include <vector>
-template<class T, class Y=vector<T> > class Array2 {
+template<class T, class Y=vector<T> > class Array2
+{
public:
- Y p;
- // short h1, h2;
- unsigned int h1, h2;
+ Y p;
+ // short h1, h2;
+ unsigned int h1, h2;
public:
- Array2(unsigned int _h1, unsigned int _h2) :
- p(_h1*_h2), h1(_h1), h2(_h2) {
- }
- Array2(unsigned int _h1, unsigned int _h2, const T&_init) :
- p(_h1*_h2, _init), h1(_h1), h2(_h2) {
- }
- Array2() :
- h1(0), h2(0) {
- }
- inline T &operator()(unsigned int i, unsigned int j) {
- assert(i<h1);
- assert(j<h2);
- return p[i*h2+j];
- }
- inline const T&operator()(unsigned int i, unsigned int j) const {
- assert(i<h1);
- assert(j<h2);
- return p[i*h2+j];
- }
- inline T get(unsigned int i, unsigned int j) {
- assert(i<h1);
- assert(j<h2);
- return p[i*h2+j];
- }
- inline void set(unsigned int i, unsigned int j, T x) {
- assert(i<h1);
- assert(j<h2);
- p[i*h2+j]=x;
- }
- inline const T get(unsigned int i, unsigned int j) const {
- assert(i<h1);
- assert(j<h2);
- return p[i*h2+j];
- }
- inline unsigned int getLen1() const {
- return h1;
- }
- inline unsigned int getLen2() const {
- return h2;
- }
+ Array2(unsigned int _h1, unsigned int _h2) :
+ p(_h1*_h2), h1(_h1), h2(_h2) {
+ }
+ Array2(unsigned int _h1, unsigned int _h2, const T&_init) :
+ p(_h1*_h2, _init), h1(_h1), h2(_h2) {
+ }
+ Array2() :
+ h1(0), h2(0) {
+ }
+ inline T &operator()(unsigned int i, unsigned int j) {
+ assert(i<h1);
+ assert(j<h2);
+ return p[i*h2+j];
+ }
+ inline const T&operator()(unsigned int i, unsigned int j) const {
+ assert(i<h1);
+ assert(j<h2);
+ return p[i*h2+j];
+ }
+ inline T get(unsigned int i, unsigned int j) {
+ assert(i<h1);
+ assert(j<h2);
+ return p[i*h2+j];
+ }
+ inline void set(unsigned int i, unsigned int j, T x) {
+ assert(i<h1);
+ assert(j<h2);
+ p[i*h2+j]=x;
+ }
+ inline const T get(unsigned int i, unsigned int j) const {
+ assert(i<h1);
+ assert(j<h2);
+ return p[i*h2+j];
+ }
+ inline unsigned int getLen1() const {
+ return h1;
+ }
+ inline unsigned int getLen2() const {
+ return h2;
+ }
- inline T*begin() {
- if (h1==0||h2==0)
- return 0;
- return &(p[0]);
- }
- inline T*end() {
- if (h1==0||h2==0)
- return 0;
- return &(p[0])+p.size();
- }
+ inline T*begin() {
+ if (h1==0||h2==0)
+ return 0;
+ return &(p[0]);
+ }
+ inline T*end() {
+ if (h1==0||h2==0)
+ return 0;
+ return &(p[0])+p.size();
+ }
- inline const T*begin() const {
- return p.begin();
- }
- inline const T*end() const {
- return p.end();
- }
+ inline const T*begin() const {
+ return p.begin();
+ }
+ inline const T*end() const {
+ return p.end();
+ }
- friend ostream&operator<<(ostream&out, const Array2<T, Y>&ar) {
- for (unsigned int i=0; i<ar.getLen1(); i++) {
- //out << i << ": ";
- for (unsigned int j=0; j<ar.getLen2(); j++)
- out << ar(i, j) << ' ';
- out << '\n';
- }
- return out << endl;
- }
- inline void resize(unsigned int a, unsigned int b) {
- if ( !(a==h1&&b==h2)) {
- h1=a;
- h2=b;
- p.resize(h1*h2);
- }
- }
- inline void resize(unsigned int a, unsigned int b, const T&t) {
- if ( !(a==h1&&b==h2)) {
- h1=a;
- h2=b;
- p.resize(h1*h2);
- fill(p.begin(), p.end(), t);
- }
- }
+ friend ostream&operator<<(ostream&out, const Array2<T, Y>&ar) {
+ for (unsigned int i=0; i<ar.getLen1(); i++) {
+ //out << i << ": ";
+ for (unsigned int j=0; j<ar.getLen2(); j++)
+ out << ar(i, j) << ' ';
+ out << '\n';
+ }
+ return out << endl;
+ }
+ inline void resize(unsigned int a, unsigned int b) {
+ if ( !(a==h1&&b==h2)) {
+ h1=a;
+ h2=b;
+ p.resize(h1*h2);
+ }
+ }
+ inline void resize(unsigned int a, unsigned int b, const T&t) {
+ if ( !(a==h1&&b==h2)) {
+ h1=a;
+ h2=b;
+ p.resize(h1*h2);
+ fill(p.begin(), p.end(), t);
+ }
+ }
};
#endif
diff --git a/mgizapp/src/Array4.h b/mgizapp/src/Array4.h
index 4e57a2e..c182a11 100644
--- a/mgizapp/src/Array4.h
+++ b/mgizapp/src/Array4.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -26,53 +26,46 @@ USA.
#include "Array2.h"
template<class T> class Array4
{
- private:
+private:
Array2< Array2<T>* > A;
int M;
T init;
- public:
+public:
Array4(int m,const T&_init)
: A(m,m,0),M(m),init(_init) {}
- ~Array4()
- {
- for(int l=0;l<M;++l)
- for(int m=0;m<M;++m)
- delete A(l,m);
- }
- const T&operator()(int i, int j, int l, int m)const
- {
- if( A(l,m)==0 )
- return init;
- else
- return (*A(l,m))(i,j);
- }
- const T&get(int i, int j, int l, int m)const
- {
- if( A(l,m)==0 )
- return init;
- else
- return (*A(l,m))(i,j);
- }
- T&operator()(int i, int j, int l, int m)
- {
- if( A(l,m)==0 )
- {
- A(l,m)=new Array2<T>(max(l+1,m+1),max(l+1,m+1),init);
- }
+ ~Array4() {
+ for(int l=0; l<M; ++l)
+ for(int m=0; m<M; ++m)
+ delete A(l,m);
+ }
+ const T&operator()(int i, int j, int l, int m)const {
+ if( A(l,m)==0 )
+ return init;
+ else
return (*A(l,m))(i,j);
+ }
+ const T&get(int i, int j, int l, int m)const {
+ if( A(l,m)==0 )
+ return init;
+ else
+ return (*A(l,m))(i,j);
+ }
+ T&operator()(int i, int j, int l, int m) {
+ if( A(l,m)==0 ) {
+ A(l,m)=new Array2<T>(max(l+1,m+1),max(l+1,m+1),init);
}
- void clear()
- {
- for(int l=0;l<M;++l)
- for(int m=0;m<M;++m)
- if( A(l,m) )
- {
- Array2<T>&a=*A(l,m);
- for(int i=0;i<=l;++i)
- for(int j=0;j<=m;++j)
- a(i,j)=0.0;
- }
- }
+ return (*A(l,m))(i,j);
+ }
+ void clear() {
+ for(int l=0; l<M; ++l)
+ for(int m=0; m<M; ++m)
+ if( A(l,m) ) {
+ Array2<T>&a=*A(l,m);
+ for(int i=0; i<=l; ++i)
+ for(int j=0; j<=m; ++j)
+ a(i,j)=0.0;
+ }
+ }
};
#endif
diff --git a/mgizapp/src/D4Tables.h b/mgizapp/src/D4Tables.h
index ce9e7c3..ef45bb1 100644
--- a/mgizapp/src/D4Tables.h
+++ b/mgizapp/src/D4Tables.h
@@ -9,14 +9,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -29,757 +29,762 @@
#include "syncObj.h"
extern float d4modelsmooth_factor;
-class m4_key {
+class m4_key
+{
public:
- int deps;
- int l;
- int m;
- int F;
- int E;
- int prevj;
- int vacancies1, vacancies2;
- m4_key(int _deps, int _l, int _m, int _F, int _E, int _prevj, int _v1,
- int _v2) :
- deps(_deps), l(_l), m(_m), F(_F), E(_E), prevj(_prevj),
- vacancies1(_v1), vacancies2(_v2) {
- }
- friend ostream&print1(ostream&out, const m4_key&x, const WordClasses&wce,
- const WordClasses&wcf) {
- if (x.deps&DEP_MODEL_l)
- out << "l: " << x.l<<' ';
- if (x.deps&DEP_MODEL_m)
- out << "m: " << x.m<<' ';
- if (x.deps&DEP_MODEL_F)
- out << "F: " << wcf.classString(x.F)<< ' ';
- if (x.deps&DEP_MODEL_E)
- out << "E: " << wce.classString(x.E)<< ' ';
- // if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
- if (x.vacancies1!=-1)
- out << "v1: " << x.vacancies1 << ' ';
- if (x.vacancies2!=-1)
- out << "v2: " << x.vacancies2 << ' ';
- return out << '\n';
- }
-
- friend ostream&print1_m5(ostream&out, const m4_key&x,
- const WordClasses&wce, const WordClasses&wcf) {
- out << ((x.deps&DEP_MODEL_E) ? wce.classString(x.E) : string("0"))
- << ' ';
- out << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F) : string("0"))
- << ' ';
- out << x.vacancies1 << ' ';
- out << x.vacancies2 << ' ';
- return out;
- }
-
- friend ostream&printb1(ostream&out, const m4_key&x, const WordClasses&wce,
- const WordClasses&wcf) {
- if (x.deps&DEP_MODELb_l)
- out << "l: " << x.l<<' ';
- if (x.deps&DEP_MODELb_m)
- out << "m: " << x.m<<' ';
- if (x.deps&DEP_MODELb_F)
- out << "F: " << wcf.classString(x.F) << ' ';
- if (x.deps&DEP_MODELb_E)
- out << "E: " << wce.classString(x.E) << ' ';
- if (x.vacancies1!=-1)
- out << "v1: " << x.vacancies1 << ' ';
- if (x.vacancies2!=-1)
- out << "v2: " << x.vacancies2 << ' ';
- return out << '\n';
- }
- friend ostream&printb1_m5(ostream&out, const m4_key&x,
- const WordClasses&wcf) {
- out << "-1 " << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F)
- : string("0"))<< ' ';
- out << x.vacancies1 << ' ';
- out << x.vacancies2 << ' ';
- return out;
- }
+ int deps;
+ int l;
+ int m;
+ int F;
+ int E;
+ int prevj;
+ int vacancies1, vacancies2;
+ m4_key(int _deps, int _l, int _m, int _F, int _E, int _prevj, int _v1,
+ int _v2) :
+ deps(_deps), l(_l), m(_m), F(_F), E(_E), prevj(_prevj),
+ vacancies1(_v1), vacancies2(_v2) {
+ }
+ friend ostream&print1(ostream&out, const m4_key&x, const WordClasses&wce,
+ const WordClasses&wcf) {
+ if (x.deps&DEP_MODEL_l)
+ out << "l: " << x.l<<' ';
+ if (x.deps&DEP_MODEL_m)
+ out << "m: " << x.m<<' ';
+ if (x.deps&DEP_MODEL_F)
+ out << "F: " << wcf.classString(x.F)<< ' ';
+ if (x.deps&DEP_MODEL_E)
+ out << "E: " << wce.classString(x.E)<< ' ';
+ // if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
+ if (x.vacancies1!=-1)
+ out << "v1: " << x.vacancies1 << ' ';
+ if (x.vacancies2!=-1)
+ out << "v2: " << x.vacancies2 << ' ';
+ return out << '\n';
+ }
+
+ friend ostream&print1_m5(ostream&out, const m4_key&x,
+ const WordClasses&wce, const WordClasses&wcf) {
+ out << ((x.deps&DEP_MODEL_E) ? wce.classString(x.E) : string("0"))
+ << ' ';
+ out << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F) : string("0"))
+ << ' ';
+ out << x.vacancies1 << ' ';
+ out << x.vacancies2 << ' ';
+ return out;
+ }
+
+ friend ostream&printb1(ostream&out, const m4_key&x, const WordClasses&wce,
+ const WordClasses&wcf) {
+ if (x.deps&DEP_MODELb_l)
+ out << "l: " << x.l<<' ';
+ if (x.deps&DEP_MODELb_m)
+ out << "m: " << x.m<<' ';
+ if (x.deps&DEP_MODELb_F)
+ out << "F: " << wcf.classString(x.F) << ' ';
+ if (x.deps&DEP_MODELb_E)
+ out << "E: " << wce.classString(x.E) << ' ';
+ if (x.vacancies1!=-1)
+ out << "v1: " << x.vacancies1 << ' ';
+ if (x.vacancies2!=-1)
+ out << "v2: " << x.vacancies2 << ' ';
+ return out << '\n';
+ }
+ friend ostream&printb1_m5(ostream&out, const m4_key&x,
+ const WordClasses&wcf) {
+ out << "-1 " << ((x.deps&DEP_MODEL_F) ? wcf.classString(x.F)
+ : string("0"))<< ' ';
+ out << x.vacancies1 << ' ';
+ out << x.vacancies2 << ' ';
+ return out;
+ }
};
-class compare1 {
+class compare1
+{
private:
- int deps;
+ int deps;
public:
- compare1(int _deps) :
- deps(_deps) {
- }
- bool operator()(const m4_key&a, const m4_key&b) const {
- if (deps&DEP_MODEL_l) {
- if (a.l<b.l)
- return 1;
- if (b.l<a.l)
- return 0;
- }
- if (deps&DEP_MODEL_m) {
- if (a.m<b.m)
- return 1;
- if (b.m<a.m)
- return 0;
- }
- if (deps&DEP_MODEL_F) {
- if (a.F<b.F)
- return 1;
- if (b.F<a.F)
- return 0;
- }
- if (deps&DEP_MODEL_E) {
- if (a.E<b.E)
- return 1;
- if (b.E<a.E)
- return 0;
- }
- //if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
- if (a.vacancies1<b.vacancies1)
- return 1;
- if (b.vacancies1<a.vacancies1)
- return 0;
- if (a.vacancies2<b.vacancies2)
- return 1;
- if (b.vacancies2<a.vacancies2)
- return 0;
- return 0;
- }
+ compare1(int _deps) :
+ deps(_deps) {
+ }
+ bool operator()(const m4_key&a, const m4_key&b) const {
+ if (deps&DEP_MODEL_l) {
+ if (a.l<b.l)
+ return 1;
+ if (b.l<a.l)
+ return 0;
+ }
+ if (deps&DEP_MODEL_m) {
+ if (a.m<b.m)
+ return 1;
+ if (b.m<a.m)
+ return 0;
+ }
+ if (deps&DEP_MODEL_F) {
+ if (a.F<b.F)
+ return 1;
+ if (b.F<a.F)
+ return 0;
+ }
+ if (deps&DEP_MODEL_E) {
+ if (a.E<b.E)
+ return 1;
+ if (b.E<a.E)
+ return 0;
+ }
+ //if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
+ if (a.vacancies1<b.vacancies1)
+ return 1;
+ if (b.vacancies1<a.vacancies1)
+ return 0;
+ if (a.vacancies2<b.vacancies2)
+ return 1;
+ if (b.vacancies2<a.vacancies2)
+ return 0;
+ return 0;
+ }
};
-class compareb1 {
+class compareb1
+{
private:
- int deps;
+ int deps;
public:
- compareb1(int _deps) :
- deps(_deps) {
- }
- bool operator()(const m4_key&a, const m4_key&b) const {
- if (deps&DEP_MODELb_l) {
- if (a.l<b.l)
- return 1;
- if (b.l<a.l)
- return 0;
- }
- if (deps&DEP_MODELb_m) {
- if (a.m<b.m)
- return 1;
- if (b.m<a.m)
- return 0;
- }
- if (deps&DEP_MODELb_F) {
- if (a.F<b.F)
- return 1;
- if (b.F<a.F)
- return 0;
- }
- if (deps&DEP_MODELb_E) {
- if (a.E<b.E)
- return 1;
- if (b.E<a.E)
- return 0;
- }
- //if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
- if (a.vacancies1<b.vacancies1)
- return 1;
- if (b.vacancies1<a.vacancies1)
- return 0;
- if (a.vacancies2<b.vacancies2)
- return 1;
- if (b.vacancies2<a.vacancies2)
- return 0;
- return 0;
- }
+ compareb1(int _deps) :
+ deps(_deps) {
+ }
+ bool operator()(const m4_key&a, const m4_key&b) const {
+ if (deps&DEP_MODELb_l) {
+ if (a.l<b.l)
+ return 1;
+ if (b.l<a.l)
+ return 0;
+ }
+ if (deps&DEP_MODELb_m) {
+ if (a.m<b.m)
+ return 1;
+ if (b.m<a.m)
+ return 0;
+ }
+ if (deps&DEP_MODELb_F) {
+ if (a.F<b.F)
+ return 1;
+ if (b.F<a.F)
+ return 0;
+ }
+ if (deps&DEP_MODELb_E) {
+ if (a.E<b.E)
+ return 1;
+ if (b.E<a.E)
+ return 0;
+ }
+ //if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
+ if (a.vacancies1<b.vacancies1)
+ return 1;
+ if (b.vacancies1<a.vacancies1)
+ return 0;
+ if (a.vacancies2<b.vacancies2)
+ return 1;
+ if (b.vacancies2<a.vacancies2)
+ return 0;
+ return 0;
+ }
};
-inline void tokenize(const string&in, Vector<string>&out) {
- string s;
- istrstream l(in.c_str());
- while (l>>s)
- out.push_back(s);
+inline void tokenize(const string&in, Vector<string>&out)
+{
+ string s;
+ istrstream l(in.c_str());
+ while (l>>s)
+ out.push_back(s);
}
-class d4model {
+class d4model
+{
public:
- typedef Vector<pair<COUNT,PROB> > Vpff;
- map<m4_key,Vpff,compare1 > D1;
- map<m4_key,Vpff,compareb1> Db1;
- PositionIndex msl;
- WordClasses* ewordclasses;
- WordClasses* fwordclasses;
- template<class MAPPER> void makeWordClasses(const MAPPER&m1,
- const MAPPER&m2, string efile, string ffile, const vcbList& elist,
- const vcbList& flist) {
- ifstream estrm(efile.c_str()), fstrm(ffile.c_str());
- if ( !estrm) {
- cerr << "ERROR: can not read " << efile << endl;
- } else
- ewordclasses->read(estrm, m1,elist);
- if ( !fstrm)
- cerr << "ERROR: can not read " << ffile << endl;
- else
- fwordclasses->read(fstrm, m2,flist);
- }
- d4model(PositionIndex _msl, WordClasses& e, WordClasses& f) :
- D1(compare1(M4_Dependencies)), Db1(compareb1(M4_Dependencies)),
- msl(_msl),ewordclasses(&e),fwordclasses(&f) {
- }
-
+ typedef Vector<pair<COUNT,PROB> > Vpff;
+ map<m4_key,Vpff,compare1 > D1;
+ map<m4_key,Vpff,compareb1> Db1;
+ PositionIndex msl;
+ WordClasses* ewordclasses;
+ WordClasses* fwordclasses;
+ template<class MAPPER> void makeWordClasses(const MAPPER&m1,
+ const MAPPER&m2, string efile, string ffile, const vcbList& elist,
+ const vcbList& flist) {
+ ifstream estrm(efile.c_str()), fstrm(ffile.c_str());
+ if ( !estrm) {
+ cerr << "ERROR: can not read " << efile << endl;
+ } else
+ ewordclasses->read(estrm, m1,elist);
+ if ( !fstrm)
+ cerr << "ERROR: can not read " << ffile << endl;
+ else
+ fwordclasses->read(fstrm, m2,flist);
+ }
+ d4model(PositionIndex _msl, WordClasses& e, WordClasses& f) :
+ D1(compare1(M4_Dependencies)), Db1(compareb1(M4_Dependencies)),
+ msl(_msl),ewordclasses(&e),fwordclasses(&f) {
+ }
+
protected:
- inline COUNT&getCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
- int m) {
- assert(j>=1);
- m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
- if (p==D1.end())
- p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- return (p->second)[j-j_cp+msl].first;
- };
-
- inline COUNT&getCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
- int l, int m) {
- assert(j>=1);
- assert(j_prev>=1);
- m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
- if (p==Db1.end())
- p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=Db1.end());
- return (p->second)[j-j_prev+msl].first;
- };
- Mutex lock_f,lock_b;
+ inline COUNT&getCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
+ int m) {
+ assert(j>=1);
+ m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
+ map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
+ if (p==D1.end())
+ p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
+ assert(p!=D1.end());
+ return (p->second)[j-j_cp+msl].first;
+ };
+
+ inline COUNT&getCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
+ int l, int m) {
+ assert(j>=1);
+ assert(j_prev>=1);
+ m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
+ map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
+ if (p==Db1.end())
+ p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
+ assert(p!=Db1.end());
+ return (p->second)[j-j_prev+msl].first;
+ };
+ Mutex lock_f,lock_b;
public:
- inline void augCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
- int m, const COUNT& v){
- lock_f.lock();
- getCountRef_first(j,j_cp,E,F,l,m)+=v;
- lock_f.unlock();
- }
-
- inline void augCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
- int l, int m, const COUNT& v){
- lock_b.lock();
- getCountRef_bigger(j,j_prev,E,F,l,m)+=v;
- lock_b.unlock();
- }
-
-
-
- void merge(d4model &d) {
- map<m4_key,Vpff,compare1 >::iterator it;
- for (it = d.D1.begin(); it!=d.D1.end(); it++) {
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(it->first);
- if (p==D1.end())
- p=D1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- int i;
- for (i=0; i<it->second.size(); i++) {
- p->second[i].second+=it->second[i].second;
- }
- }
+ inline void augCountRef_first(WordIndex j, WordIndex j_cp, int E, int F, int l,
+ int m, const COUNT& v) {
+ lock_f.lock();
+ getCountRef_first(j,j_cp,E,F,l,m)+=v;
+ lock_f.unlock();
+ }
+
+ inline void augCountRef_bigger(WordIndex j, WordIndex j_prev, int E, int F,
+ int l, int m, const COUNT& v) {
+ lock_b.lock();
+ getCountRef_bigger(j,j_prev,E,F,l,m)+=v;
+ lock_b.unlock();
+ }
+
+
+
+ void merge(d4model &d) {
+ map<m4_key,Vpff,compare1 >::iterator it;
+ for (it = d.D1.begin(); it!=d.D1.end(); it++) {
+ map<m4_key,Vpff,compare1 >::iterator p=D1.find(it->first);
+ if (p==D1.end())
+ p=D1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
+ int i;
+ for (i=0; i<it->second.size(); i++) {
+ p->second[i].second+=it->second[i].second;
+ }
+ }
#ifdef WIN32
- map<m4_key,Vpff,compareb1 >::iterator it1;
- for (it1 = d.Db1.begin(); it1!=d.Db1.end(); it1++) {
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(it->first);
- if (p==Db1.end())
- p=Db1.insert(make_pair(it1->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- int i;
- for (i=0; i<it->second.size(); i++) {
- p->second[i].second+=it1->second[i].second;
- }
- }
+ map<m4_key,Vpff,compareb1 >::iterator it1;
+ for (it1 = d.Db1.begin(); it1!=d.Db1.end(); it1++) {
+ map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(it->first);
+ if (p==Db1.end())
+ p=Db1.insert(make_pair(it1->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
+ int i;
+ for (i=0; i<it->second.size(); i++) {
+ p->second[i].second+=it1->second[i].second;
+ }
+ }
#else
- for (it = d.Db1.begin(); it!=d.Db1.end(); it++) {
- map<m4_key,Vpff,compare1 >::iterator p=Db1.find(it->first);
- if (p==Db1.end())
- p=Db1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- int i;
- for (i=0; i<it->second.size(); i++) {
- p->second[i].second+=it->second[i].second;
- }
- }
+ for (it = d.Db1.begin(); it!=d.Db1.end(); it++) {
+ map<m4_key,Vpff,compare1 >::iterator p=Db1.find(it->first);
+ if (p==Db1.end())
+ p=Db1.insert(make_pair(it->first,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
+ int i;
+ for (i=0; i<it->second.size(); i++) {
+ p->second[i].second+=it->second[i].second;
+ }
+ }
#endif
- }
-
- bool augCount(const char* fD1, const char* fDb) {
- ifstream ifsd(fD1);
- int deps;
- int l;
- int m;
- int F;
- int E;
- int prevj;
- int vacancies1, vacancies2;
- int len;
- double count;
- if (!ifsd) {
- cerr << "Failed in " << fD1 << endl;
- return false;
- }
- {
- while (ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1
- >>vacancies2>>len) {
- m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
- vacancies2);
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
- if (p==D1.end())
- p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- int i;
- for (i=0; i<len; i++) {
- ifsd >> count;
- p->second[i].first+=count;
- }
-
- }
- }
- ifstream ifsd1(fDb);
- if (!ifsd1) {
- cerr << "Failed in " << fDb << endl;
- return false;
- }
- {
- while (ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1
- >>vacancies2>>len) {
- m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
- vacancies2);
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
- if (p==Db1.end())
- p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- int i;
- for (i=0; i<len; i++) {
- ifsd1 >> count;
- p->second[i].first+=count;
- }
-
- }
- }
- return true;
- }
-
- bool readProbTable(const char* fD1, const char* fDb){
- ifstream ifsd(fD1);
- int deps;
- int l;
- int m;
- int F;
- int E;
- int prevj;
- int vacancies1,vacancies2;
- int len;
- double count;
- if(!ifsd){
- cerr << "Failed in " << fD1 << endl;
- return false;
+ }
+
+ bool augCount(const char* fD1, const char* fDb) {
+ ifstream ifsd(fD1);
+ int deps;
+ int l;
+ int m;
+ int F;
+ int E;
+ int prevj;
+ int vacancies1, vacancies2;
+ int len;
+ double count;
+ if (!ifsd) {
+ cerr << "Failed in " << fD1 << endl;
+ return false;
+ }
+ {
+ while (ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1
+ >>vacancies2>>len) {
+ m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
+ vacancies2);
+ map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
+ if (p==D1.end())
+ p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
+ assert(p!=D1.end());
+ int i;
+ for (i=0; i<len; i++) {
+ ifsd >> count;
+ p->second[i].first+=count;
}
- {
- while(ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
- m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
- if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- int i;
- for(i=0;i<len;i++){
- ifsd >> count;
- p->second[i].second=count;
- }
-
- }
+
+ }
+ }
+ ifstream ifsd1(fDb);
+ if (!ifsd1) {
+ cerr << "Failed in " << fDb << endl;
+ return false;
+ }
+ {
+ while (ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1
+ >>vacancies2>>len) {
+ m4_key key(M4_Dependencies, l, m, F, E, prevj, vacancies1,
+ vacancies2);
+ map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
+ if (p==Db1.end())
+ p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
+ assert(p!=D1.end());
+ int i;
+ for (i=0; i<len; i++) {
+ ifsd1 >> count;
+ p->second[i].first+=count;
}
- ifstream ifsd1(fDb);
- if(!ifsd1){
- cerr << "Failed in " << fDb << endl;
- return false;
+
+ }
+ }
+ return true;
+ }
+
+ bool readProbTable(const char* fD1, const char* fDb) {
+ ifstream ifsd(fD1);
+ int deps;
+ int l;
+ int m;
+ int F;
+ int E;
+ int prevj;
+ int vacancies1,vacancies2;
+ int len;
+ double count;
+ if(!ifsd) {
+ cerr << "Failed in " << fD1 << endl;
+ return false;
+ }
+ {
+ while(ifsd >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len) {
+ m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
+ map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
+ if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
+ assert(p!=D1.end());
+ int i;
+ for(i=0; i<len; i++) {
+ ifsd >> count;
+ p->second[i].second=count;
}
- {
- while(ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len){
- m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
- if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- int i;
- for(i=0;i<len;i++){
- ifsd1 >> count;
- p->second[i].second=count;
- }
-
- }
+
+ }
+ }
+ ifstream ifsd1(fDb);
+ if(!ifsd1) {
+ cerr << "Failed in " << fDb << endl;
+ return false;
+ }
+ {
+ while(ifsd1 >> deps >> l >> m >>F >> E >> prevj >> vacancies1>>vacancies2>>len) {
+ m4_key key(M4_Dependencies,l,m,F,E,prevj,vacancies1,vacancies2);
+ map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
+ if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
+ assert(p!=D1.end());
+ int i;
+ for(i=0; i<len; i++) {
+ ifsd1 >> count;
+ p->second[i].second=count;
}
- return true;
- }
-
-
- bool printProbTable(const char* fD1, const char* fDb) {
- ofstream ofsd(fD1);
- if (!ofsd.is_open()) {
- return false;
- }
- {
- map<m4_key,Vpff,compare1 >::iterator it;
- for (it = D1.begin(); it!=D1.end(); it++) {
- ofsd << it->first.deps << " " << it->first.l << " "
- << it->first.m << " " << it->first.F << " "
- << it->first.E << " " << it->first.prevj << " "
- << it->first.vacancies1 << " " << it->first.vacancies2
- << " " << it->second.size() << " ";
- int i;
- for (i=0; i<it->second.size(); i++) {
- ofsd << it->second[i].second << " ";
- }
- ofsd << endl;
- }
-
- }
-
- ofstream ofsdb(fDb);
- if (!ofsdb.is_open()) {
- return false;
- }
-
- map<m4_key,Vpff,compareb1 >::iterator it;
- for (it = Db1.begin(); it!=Db1.end(); it++) {
- ofsdb << it->first.deps << " " << it->first.l << " " << it->first.m
- << " " << it->first.F << " " << it->first.E << " "
- << it->first.prevj << " " << it->first.vacancies1 << " "
- << it->first.vacancies2 << " " << it->second.size()<< endl;
- int i;
- for (i=0; i<it->second.size(); i++) {
- ofsdb << it->second[i].second << " ";
- }
- ofsdb << endl;
- }
- return true;
- }
-
- bool dumpCount(const char* fD1, const char* fDb){
- ofstream ofsd(fD1);
- if(!ofsd.is_open()){
- return false;
+
+ }
+ }
+ return true;
+ }
+
+
+ bool printProbTable(const char* fD1, const char* fDb) {
+ ofstream ofsd(fD1);
+ if (!ofsd.is_open()) {
+ return false;
+ }
+ {
+ map<m4_key,Vpff,compare1 >::iterator it;
+ for (it = D1.begin(); it!=D1.end(); it++) {
+ ofsd << it->first.deps << " " << it->first.l << " "
+ << it->first.m << " " << it->first.F << " "
+ << it->first.E << " " << it->first.prevj << " "
+ << it->first.vacancies1 << " " << it->first.vacancies2
+ << " " << it->second.size() << " ";
+ int i;
+ for (i=0; i<it->second.size(); i++) {
+ ofsd << it->second[i].second << " ";
}
- {
- map<m4_key,Vpff,compare1 >::iterator it;
- for(it = D1.begin(); it!=D1.end();it++){
- ofsd << it->first.deps << " "
- << it->first.l << " "
- << it->first.m << " "
- << it->first.F << " "
- << it->first.E << " "
- << it->first.prevj << " "
- << it->first.vacancies1 << " "
- << it->first.vacancies2 << " "
- << it->second.size() << " ";
- int i;
- for(i=0;i<it->second.size();i++){
- ofsd << it->second[i].first << " ";
- }
- ofsd << endl;
- }
-
+ ofsd << endl;
+ }
+
+ }
+
+ ofstream ofsdb(fDb);
+ if (!ofsdb.is_open()) {
+ return false;
+ }
+
+ map<m4_key,Vpff,compareb1 >::iterator it;
+ for (it = Db1.begin(); it!=Db1.end(); it++) {
+ ofsdb << it->first.deps << " " << it->first.l << " " << it->first.m
+ << " " << it->first.F << " " << it->first.E << " "
+ << it->first.prevj << " " << it->first.vacancies1 << " "
+ << it->first.vacancies2 << " " << it->second.size()<< endl;
+ int i;
+ for (i=0; i<it->second.size(); i++) {
+ ofsdb << it->second[i].second << " ";
+ }
+ ofsdb << endl;
+ }
+ return true;
+ }
+
+ bool dumpCount(const char* fD1, const char* fDb) {
+ ofstream ofsd(fD1);
+ if(!ofsd.is_open()) {
+ return false;
+ }
+ {
+ map<m4_key,Vpff,compare1 >::iterator it;
+ for(it = D1.begin(); it!=D1.end(); it++) {
+ ofsd << it->first.deps << " "
+ << it->first.l << " "
+ << it->first.m << " "
+ << it->first.F << " "
+ << it->first.E << " "
+ << it->first.prevj << " "
+ << it->first.vacancies1 << " "
+ << it->first.vacancies2 << " "
+ << it->second.size() << " ";
+ int i;
+ for(i=0; i<it->second.size(); i++) {
+ ofsd << it->second[i].first << " ";
+ }
+ ofsd << endl;
+ }
+
+ }
+
+ ofstream ofsdb(fDb);
+ if(!ofsdb.is_open()) {
+ return false;
+ }
+
+ map<m4_key,Vpff,compareb1 >::iterator it;
+ for(it = Db1.begin(); it!=Db1.end(); it++) {
+ ofsdb << it->first.deps << " "
+ << it->first.l << " "
+ << it->first.m << " "
+ << it->first.F << " "
+ << it->first.E << " "
+ << it->first.prevj << " "
+ << it->first.vacancies1 << " "
+ << it->first.vacancies2 << " "
+ << it->second.size()<< endl;
+ int i;
+ for(i=0; i<it->second.size(); i++) {
+ ofsdb << it->second[i].first << " ";
+ }
+ ofsdb << endl;
+ }
+ return true;
+ }
+ map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,
+ int F, int l, int m) const {
+ return D1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
+ }
+ PROB getProb_first_withiterator(WordIndex j, WordIndex j_cp, int m,
+ const map<m4_key,Vpff,compare1 >::const_iterator& p) const {
+ assert(j>=1);
+ //assert(j_cp>=0);
+ assert(j<=msl);
+ assert(j_cp<=msl);
+ if (p==D1.end()) {
+ return PROB_SMOOTH;
+ } else {
+ massert((p->second)[j-j_cp+msl].second<=1.0);
+ return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
+ -d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
+ }
+ }
+
+ PROB getProb_first(WordIndex j, WordIndex j_cp, int E, int F, int l, int m) const {
+ assert(j>=1);
+ //assert(j_cp>=0);
+ assert(j<=msl);
+ assert(j_cp<=msl);
+ m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
+ map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
+ if (p==D1.end()) {
+ return PROB_SMOOTH;
+ } else {
+ massert((p->second)[j-j_cp+msl].second<=1.0);
+ return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
+ -d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
+ }
+ }
+ map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,
+ int F, int l, int m) const {
+ return Db1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
+ }
+ PROB getProb_bigger_withiterator(WordIndex j, WordIndex j_prev, int m,
+ const map<m4_key,Vpff,compareb1 >::const_iterator&p) const {
+ massert(j>=1);
+ massert(j_prev>=1);
+ massert(j>j_prev);
+ massert(j<=msl);
+ massert(j_prev<=msl);
+ if (p==Db1.end()) {
+ return PROB_SMOOTH;
+ } else {
+ massert((p->second)[j-j_prev+msl].second<=1.0 );
+ return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
+ -d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
+ }
+ }
+
+ PROB getProb_bigger(WordIndex j, WordIndex j_prev, int E, int F, int l,
+ int m) const {
+ massert(j>=1);
+ massert(j_prev>=1);
+ massert(j>j_prev);
+ massert(j<=msl);
+ massert(j_prev<=msl);
+ m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
+ map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
+ if (p==Db1.end()) {
+ return PROB_SMOOTH;
+ } else {
+ massert((p->second)[j-j_prev+msl].second<=1.0 );
+ return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
+ -d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
+ }
+ }
+
+ void normalizeTable() {
+ int nParams=0;
+ for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
+ Vpff&d1=i->second;
+ double sum=0.0;
+ for (PositionIndex i=0; i<d1.size(); i++)
+ sum+=d1[i].first;
+ for (PositionIndex i=0; i<d1.size(); i++) {
+ d1[i].second=sum ? (d1[i].first/sum) : (1.0/d1.size());
+ nParams++;
+ }
+ }
+ for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
+ Vpff&db1=i->second;
+ double sum=0.0;
+ for (PositionIndex i=0; i<db1.size(); i++)
+ sum+=db1[i].first;
+ for (PositionIndex i=0; i<db1.size(); i++) {
+ db1[i].second=sum ? (db1[i].first/sum) : (1.0/db1.size());
+ nParams++;
+ }
+ }
+ cout << "D4 table contains " << nParams << " parameters.\n";
+ }
+
+ void clear() {
+ for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
+ Vpff&d1=i->second;
+ for (PositionIndex i=0; i<d1.size(); i++)
+ d1[i].first=0.0;
+ }
+ for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
+ Vpff&db1=i->second;
+ for (PositionIndex i=0; i<db1.size(); i++)
+ db1[i].first=0.0;
+ }
+ }
+
+ /*void printProbTable(const char*fname1,const char*fname2)
+ {
+ ofstream out(fname1);
+ double ssum=0.0;
+ out << "# Translation tables for Model 4 .\n";
+ out << "# Table for head of cept.\n";
+ for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i){
+ const Vpff&d1=i->second;
+ double sum=0.0;
+ for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
+ if ( sum ){
+ print1(out,i->first,ewordclasses,fwordclasses);
+ out << "SUM: " << sum << ' '<< '\n';
+ for(unsigned ii=0;ii<d1.size();ii++)
+ if( d1[ii].first )
+ out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
+ out << endl;
+ }
+ ssum+=sum;
+ }
+ out << "# Table for non-head of cept.\n";
+ for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
+ {
+ const Vpff&db1=i->second;
+ double sum=0.0;
+ for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
+ if( sum ){
+ printb1(out,i->first,ewordclasses,fwordclasses);
+ out << "SUM: " << sum << ' '<<'\n';
+ for(unsigned ii=0;ii<db1.size();ii++)
+ if( db1[ii].first )
+ {
+ out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
+ }
+ out << endl;
+ }
+ ssum+=sum;
+ }
+ out << endl << "FULL-SUM: " << ssum << endl;
+ if( M4_Dependencies==76 ){
+ ofstream out2(fname2);
+ for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
+ {
+ const Vpff&d1=i->second;
+ for(unsigned ii=0;ii<d1.size();ii++)
+ if( d1[ii].first )
+ out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
+ }
+ for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i) {
+ const Vpff&db1=i->second;
+ for(unsigned ii=0;ii<db1.size();ii++)
+ if( db1[ii].first )
+ out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
+ }
+ }
+ }*/
+
+ bool readProbTable(const char *fname) {
+ cerr << "Reading D4Tables from " << fname << endl;
+ ifstream file(fname);
+ string line;
+ do {
+ getline(file, line);
+ } while (line.length()&&line[0]=='#');
+
+ do {
+ while (line.length()==0)
+ getline(file, line);
+ if (line[0]=='#')
+ break;
+ Vector<string> linestr;
+ tokenize(line, linestr);
+ m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
+ for (unsigned int i=0; i<linestr.size(); i+=2) {
+ if (linestr[i]=="l:") {
+ k.l=atoi(linestr[i+1].c_str());
+ iassert(M4_Dependencies&DEP_MODEL_l);
+ }
+ if (linestr[i]=="m:") {
+ k.m=atoi(linestr[i+1].c_str());
+ iassert(M4_Dependencies&DEP_MODEL_m);
}
-
- ofstream ofsdb(fDb);
- if(!ofsdb.is_open()){
- return false;
+ if (linestr[i]=="F:") {
+ k.F=(*fwordclasses)(linestr[i+1]);
+ iassert(M4_Dependencies&DEP_MODEL_F);
}
-
- map<m4_key,Vpff,compareb1 >::iterator it;
- for(it = Db1.begin(); it!=Db1.end();it++){
- ofsdb << it->first.deps << " "
- << it->first.l << " "
- << it->first.m << " "
- << it->first.F << " "
- << it->first.E << " "
- << it->first.prevj << " "
- << it->first.vacancies1 << " "
- << it->first.vacancies2 << " "
- << it->second.size()<< endl;
- int i;
- for(i=0;i<it->second.size();i++){
- ofsdb << it->second[i].first << " ";
- }
- ofsdb << endl;
+ if (linestr[i]=="E:") {
+ k.E=(*ewordclasses)(linestr[i+1]);
+ iassert(M4_Dependencies&DEP_MODEL_E);
+ }
+ //if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
+ }
+ string str;
+ double sum;
+ file >> str >> sum;
+ iassert(str=="SUM:");
+ if (str!="SUM:")
+ cerr << "ERROR: string is " << str << " and not sum " << endl;
+
+ do {
+ int value;
+ double count;
+ getline(file, line);
+ istrstream twonumbers(line.c_str());
+ if (twonumbers >> value >> count) {
+ if (D1.count(k)==0)
+ D1.insert(make_pair(k, Vpff(msl*2+1, pair<COUNT, PROB>(
+ 0.0, 0.0))));
+ D1[k][value+msl]=make_pair(count, count/sum);
+ }
+ } while (line.length());
+ } while (file);
+ do {
+ getline(file, line);
+ } while (line.length()&&line[0]=='#');
+ do {
+ while (line.length()==0)
+ getline(file, line);
+ if (line[0]=='#')
+ break;
+ Vector<string> linestr;
+ tokenize(line, linestr);
+ m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
+ bool sumRead=0;
+ for (unsigned int i=0; i<linestr.size(); i+=2) {
+ if (linestr[i]=="l:") {
+ k.l=atoi(linestr[i+1].c_str());
+ iassert(M4_Dependencies&DEP_MODELb_l);
+ } else if (linestr[i]=="m:") {
+ k.m=atoi(linestr[i+1].c_str());
+ iassert(M4_Dependencies&DEP_MODELb_m);
+ } else if (linestr[i]=="F:") {
+ k.F=(*fwordclasses)(linestr[i+1]);
+ iassert(M4_Dependencies&DEP_MODELb_F);
+ } else if (linestr[i]=="E:") {
+ k.E=(*ewordclasses)(linestr[i+1]);
+ iassert(M4_Dependencies&DEP_MODELb_E);
+ } else if (linestr[i]=="SUM:") {
+ cerr << "Warning: obviously no dependency.\n";
+ sumRead=1;
+ } else if (linestr[i]=="FULL-SUM:") {
+ break;
+ } else {
+ cerr << "ERROR: error in reading d4 tables: " << linestr[i]
+ << ' ' << linestr[i+1] << endl;
+ }
+ }
+ string str;
+ double sum;
+ if (sumRead==0)
+ file >> str >> sum;
+ else {
+ str=linestr[0];
+ sum=atof(linestr[1].c_str());
+ }
+ if (str!="SUM:")
+ cerr << "ERROR: should read SUM but read " << str << endl;
+ do {
+ int value;
+ double count;
+ getline(file, line);
+ istrstream twonumbers(line.c_str());
+ if (twonumbers >> value >> count) {
+ if (Db1.count(k)==0)
+ Db1.insert(make_pair(k, Vpff(msl*2+1,
+ pair<COUNT, PROB>(0.0, 0.0))));
+ Db1[k][value+msl]=make_pair(count, count/sum);
}
- return true;
- }
- map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,
- int F, int l, int m) const {
- return D1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
- }
- PROB getProb_first_withiterator(WordIndex j, WordIndex j_cp, int m,
- const map<m4_key,Vpff,compare1 >::const_iterator& p) const {
- assert(j>=1);
- //assert(j_cp>=0);
- assert(j<=msl);
- assert(j_cp<=msl);
- if (p==D1.end()) {
- return PROB_SMOOTH;
- } else {
- massert((p->second)[j-j_cp+msl].second<=1.0);
- return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
- -d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
- }
- }
-
- PROB getProb_first(WordIndex j, WordIndex j_cp, int E, int F, int l, int m) const {
- assert(j>=1);
- //assert(j_cp>=0);
- assert(j<=msl);
- assert(j_cp<=msl);
- m4_key key(M4_Dependencies, l, m, F, E, j_cp, -1, -1);
- map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
- if (p==D1.end()) {
- return PROB_SMOOTH;
- } else {
- massert((p->second)[j-j_cp+msl].second<=1.0);
- return max(PROB_SMOOTH, d4modelsmooth_factor/(2*m-1)+(1
- -d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
- }
- }
- map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,
- int F, int l, int m) const {
- return Db1.find(m4_key(M4_Dependencies, l, m, F, E, 0, -1, -1));
- }
- PROB getProb_bigger_withiterator(WordIndex j, WordIndex j_prev, int m,
- const map<m4_key,Vpff,compareb1 >::const_iterator&p) const {
- massert(j>=1);
- massert(j_prev>=1);
- massert(j>j_prev);
- massert(j<=msl);
- massert(j_prev<=msl);
- if (p==Db1.end()) {
- return PROB_SMOOTH;
- } else {
- massert((p->second)[j-j_prev+msl].second<=1.0 );
- return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
- -d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
- }
- }
-
- PROB getProb_bigger(WordIndex j, WordIndex j_prev, int E, int F, int l,
- int m) const {
- massert(j>=1);
- massert(j_prev>=1);
- massert(j>j_prev);
- massert(j<=msl);
- massert(j_prev<=msl);
- m4_key key(M4_Dependencies, l, m, F, E, j_prev, -1, -1);
- map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
- if (p==Db1.end()) {
- return PROB_SMOOTH;
- } else {
- massert((p->second)[j-j_prev+msl].second<=1.0 );
- return max(PROB_SMOOTH, d4modelsmooth_factor/(m-1)+(1
- -d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
- }
- }
-
- void normalizeTable() {
- int nParams=0;
- for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
- Vpff&d1=i->second;
- double sum=0.0;
- for (PositionIndex i=0; i<d1.size(); i++)
- sum+=d1[i].first;
- for (PositionIndex i=0; i<d1.size(); i++) {
- d1[i].second=sum ? (d1[i].first/sum) : (1.0/d1.size());
- nParams++;
- }
- }
- for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
- Vpff&db1=i->second;
- double sum=0.0;
- for (PositionIndex i=0; i<db1.size(); i++)
- sum+=db1[i].first;
- for (PositionIndex i=0; i<db1.size(); i++) {
- db1[i].second=sum ? (db1[i].first/sum) : (1.0/db1.size());
- nParams++;
- }
- }
- cout << "D4 table contains " << nParams << " parameters.\n";
- }
-
- void clear() {
- for (map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
- Vpff&d1=i->second;
- for (PositionIndex i=0; i<d1.size(); i++)
- d1[i].first=0.0;
- }
- for (map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
- Vpff&db1=i->second;
- for (PositionIndex i=0; i<db1.size(); i++)
- db1[i].first=0.0;
- }
- }
-
- /*void printProbTable(const char*fname1,const char*fname2)
- {
- ofstream out(fname1);
- double ssum=0.0;
- out << "# Translation tables for Model 4 .\n";
- out << "# Table for head of cept.\n";
- for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i){
- const Vpff&d1=i->second;
- double sum=0.0;
- for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
- if ( sum ){
- print1(out,i->first,ewordclasses,fwordclasses);
- out << "SUM: " << sum << ' '<< '\n';
- for(unsigned ii=0;ii<d1.size();ii++)
- if( d1[ii].first )
- out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
- out << endl;
- }
- ssum+=sum;
- }
- out << "# Table for non-head of cept.\n";
- for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
- {
- const Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
- if( sum ){
- printb1(out,i->first,ewordclasses,fwordclasses);
- out << "SUM: " << sum << ' '<<'\n';
- for(unsigned ii=0;ii<db1.size();ii++)
- if( db1[ii].first )
- {
- out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
- }
- out << endl;
- }
- ssum+=sum;
- }
- out << endl << "FULL-SUM: " << ssum << endl;
- if( M4_Dependencies==76 ){
- ofstream out2(fname2);
- for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
- {
- const Vpff&d1=i->second;
- for(unsigned ii=0;ii<d1.size();ii++)
- if( d1[ii].first )
- out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
- }
- for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i) {
- const Vpff&db1=i->second;
- for(unsigned ii=0;ii<db1.size();ii++)
- if( db1[ii].first )
- out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
- }
- }
- }*/
-
- bool readProbTable(const char *fname) {
- cerr << "Reading D4Tables from " << fname << endl;
- ifstream file(fname);
- string line;
- do {
- getline(file, line);
- } while (line.length()&&line[0]=='#');
-
- do {
- while (line.length()==0)
- getline(file, line);
- if (line[0]=='#')
- break;
- Vector<string> linestr;
- tokenize(line, linestr);
- m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
- for (unsigned int i=0; i<linestr.size(); i+=2) {
- if (linestr[i]=="l:") {
- k.l=atoi(linestr[i+1].c_str());
- iassert(M4_Dependencies&DEP_MODEL_l);
- }
- if (linestr[i]=="m:") {
- k.m=atoi(linestr[i+1].c_str());
- iassert(M4_Dependencies&DEP_MODEL_m);
- }
- if (linestr[i]=="F:") {
- k.F=(*fwordclasses)(linestr[i+1]);
- iassert(M4_Dependencies&DEP_MODEL_F);
- }
- if (linestr[i]=="E:") {
- k.E=(*ewordclasses)(linestr[i+1]);
- iassert(M4_Dependencies&DEP_MODEL_E);
- }
- //if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
- }
- string str;
- double sum;
- file >> str >> sum;
- iassert(str=="SUM:");
- if (str!="SUM:")
- cerr << "ERROR: string is " << str << " and not sum " << endl;
-
- do {
- int value;
- double count;
- getline(file, line);
- istrstream twonumbers(line.c_str());
- if (twonumbers >> value >> count) {
- if (D1.count(k)==0)
- D1.insert(make_pair(k, Vpff(msl*2+1, pair<COUNT, PROB>(
- 0.0, 0.0))));
- D1[k][value+msl]=make_pair(count, count/sum);
- }
- } while (line.length());
- } while (file);
- do {
- getline(file, line);
- } while (line.length()&&line[0]=='#');
- do {
- while (line.length()==0)
- getline(file, line);
- if (line[0]=='#')
- break;
- Vector<string> linestr;
- tokenize(line, linestr);
- m4_key k(M4_Dependencies, 0, 0, 0, 0, 0, -1, -1);
- bool sumRead=0;
- for (unsigned int i=0; i<linestr.size(); i+=2) {
- if (linestr[i]=="l:") {
- k.l=atoi(linestr[i+1].c_str());
- iassert(M4_Dependencies&DEP_MODELb_l);
- } else if (linestr[i]=="m:") {
- k.m=atoi(linestr[i+1].c_str());
- iassert(M4_Dependencies&DEP_MODELb_m);
- } else if (linestr[i]=="F:") {
- k.F=(*fwordclasses)(linestr[i+1]);
- iassert(M4_Dependencies&DEP_MODELb_F);
- } else if (linestr[i]=="E:") {
- k.E=(*ewordclasses)(linestr[i+1]);
- iassert(M4_Dependencies&DEP_MODELb_E);
- } else if (linestr[i]=="SUM:") {
- cerr << "Warning: obviously no dependency.\n";
- sumRead=1;
- } else if (linestr[i]=="FULL-SUM:") {
- break;
- } else {
- cerr << "ERROR: error in reading d4 tables: " << linestr[i]
- << ' ' << linestr[i+1] << endl;
- }
- }
- string str;
- double sum;
- if (sumRead==0)
- file >> str >> sum;
- else {
- str=linestr[0];
- sum=atof(linestr[1].c_str());
- }
- if (str!="SUM:")
- cerr << "ERROR: should read SUM but read " << str << endl;
- do {
- int value;
- double count;
- getline(file, line);
- istrstream twonumbers(line.c_str());
- if (twonumbers >> value >> count) {
- if (Db1.count(k)==0)
- Db1.insert(make_pair(k, Vpff(msl*2+1,
- pair<COUNT, PROB>(0.0, 0.0))));
- Db1[k][value+msl]=make_pair(count, count/sum);
- }
- } while (file&&line.length());
- } while (file);
- return 1;
- }
+ } while (file&&line.length());
+ } while (file);
+ return 1;
+ }
};
#endif
diff --git a/mgizapp/src/D5Tables.h b/mgizapp/src/D5Tables.h
index 0cfc0c6..7e0ebf3 100644
--- a/mgizapp/src/D5Tables.h
+++ b/mgizapp/src/D5Tables.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,38 +32,36 @@ extern float d5modelsmooth_factor;
class d5model
{
- private:
+private:
typedef Vector < pair < COUNT,PROB > >Vpff;
map< m4_key,Vpff,compare1 > D1;
map< m4_key,Vpff,compareb1 > Db1;
- public:
+public:
d4model&d4m;
WordClasses* ewordclasses;
WordClasses* fwordclasses;
template<class MAPPER>
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile
- , const vcbList& elist,
- const vcbList& flist)
- {
- ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
- if( !estrm )
- cerr << "ERROR: can not read classes from " << efile << endl;
- else
- ewordclasses->read(estrm,m1,elist);
- if( !fstrm )
- cerr << "ERROR: can not read classes from " << ffile << endl;
- else
- fwordclasses->read(fstrm,m2,flist);
- }
+ , const vcbList& elist,
+ const vcbList& flist) {
+ ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
+ if( !estrm )
+ cerr << "ERROR: can not read classes from " << efile << endl;
+ else
+ ewordclasses->read(estrm,m1,elist);
+ if( !fstrm )
+ cerr << "ERROR: can not read classes from " << ffile << endl;
+ else
+ fwordclasses->read(fstrm,m2,flist);
+ }
d5model (d4model&_d4m)
:D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m),
- ewordclasses(_d4m.ewordclasses),fwordclasses(_d4m.fwordclasses)
- {}
+ ewordclasses(_d4m.ewordclasses),fwordclasses(_d4m.fwordclasses)
+ {}
COUNT &getCountRef_first (PositionIndex vacancies_j,
- PositionIndex vacancies_jp, int F,
- PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total)
- {
+ PositionIndex vacancies_jp, int F,
+ PositionIndex l, PositionIndex m,
+ PositionIndex vacancies_total) {
massert(vacancies_j>0);
massert(vacancies_total>0);
//massert(vacancies_jp<=vacancies_total);
@@ -77,10 +75,9 @@ class d5model
return (p->second)[vacancies_j].first;
}
COUNT &getCountRef_bigger (PositionIndex vacancies_j,
- PositionIndex vacancies_jp, int F,
- PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total)
- {
+ PositionIndex vacancies_jp, int F,
+ PositionIndex l, PositionIndex m,
+ PositionIndex vacancies_total) {
massert(vacancies_j>0);
massert(vacancies_total>0);
massert (vacancies_jp <= vacancies_j);
@@ -93,9 +90,8 @@ class d5model
return (p->second)[vacancies_j - vacancies_jp].first;
}
PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp,
- int F, PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total) const
- {
+ int F, PositionIndex l, PositionIndex m,
+ PositionIndex vacancies_total) const {
massert(vacancies_j>0);
massert(vacancies_total>0);
//massert(vacancies_jp<=vacancies_total);
@@ -109,9 +105,8 @@ class d5model
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second);
}
PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp,
- int F, PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total) const
- {
+ int F, PositionIndex l, PositionIndex m,
+ PositionIndex vacancies_total) const {
massert(vacancies_j>0);
massert(vacancies_total>0);
massert (vacancies_jp <= vacancies_j);
@@ -123,108 +118,96 @@ class d5model
else
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second);
}
- void normalizeTable ()
- {
- int nParams=0;
- for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
- {
- Vpff&d1=i->second;
- COUNT sum=0.0;
- for(PositionIndex i=0;i<d1.size();i++)
- sum+=d1[i].first+d5modelsmooth_countoffset;
- for(PositionIndex i=0;i<d1.size();i++)
- {
- d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size());
- nParams++;
- }
- }
- for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
- {
- Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex i=0;i<db1.size();i++)
- sum+=db1[i].first+d5modelsmooth_countoffset;
- for(PositionIndex i=0;i<db1.size();i++)
- {
- db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size());
- nParams++;
- }
- }
- cout << "D5 table contains " << nParams << " parameters.\n";
+ void normalizeTable () {
+ int nParams=0;
+ for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
+ Vpff&d1=i->second;
+ COUNT sum=0.0;
+ for(PositionIndex i=0; i<d1.size(); i++)
+ sum+=d1[i].first+d5modelsmooth_countoffset;
+ for(PositionIndex i=0; i<d1.size(); i++) {
+ d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size());
+ nParams++;
+ }
+ }
+ for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
+ Vpff&db1=i->second;
+ double sum=0.0;
+ for(PositionIndex i=0; i<db1.size(); i++)
+ sum+=db1[i].first+d5modelsmooth_countoffset;
+ for(PositionIndex i=0; i<db1.size(); i++) {
+ db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size());
+ nParams++;
+ }
}
-
-friend ostream&operator<<(ostream&out,d5model&d5m) {
+ cout << "D5 table contains " << nParams << " parameters.\n";
+ }
+
+ friend ostream&operator<<(ostream&out,d5model&d5m) {
out << "# Translation tables for Model 5 .\n";
out << "# Table for head of cept.\n";
- for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin();i!=d5m.D1.end();++i){
- const Vpff&d1=i->second;
- COUNT sum=0.0;
- for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
- if ( sum ) {
- for(unsigned ii=0;ii<d1.size();ii++)
- {
- print1_m5(out,i->first,*d5m.ewordclasses,*d5m.fwordclasses);
- out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n';
- }
- out << endl;
+ for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin(); i!=d5m.D1.end(); ++i) {
+ const Vpff&d1=i->second;
+ COUNT sum=0.0;
+ for(PositionIndex ii=0; ii<d1.size(); ii++)sum+=d1[ii].first;
+ if ( sum ) {
+ for(unsigned ii=0; ii<d1.size(); ii++) {
+ print1_m5(out,i->first,*d5m.ewordclasses,*d5m.fwordclasses);
+ out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n';
}
+ out << endl;
+ }
}
out << "# Table for non-head of cept.\n";
- for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin();i!=d5m.Db1.end();++i){
- const Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
- if( sum ){
- for(unsigned ii=0;ii<db1.size();ii++){
- printb1_m5(out,i->first,*d5m.fwordclasses);
- out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n';
- }
- out << endl;
+ for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin(); i!=d5m.Db1.end(); ++i) {
+ const Vpff&db1=i->second;
+ double sum=0.0;
+ for(PositionIndex ii=0; ii<db1.size(); ++ii)sum+=db1[ii].first;
+ if( sum ) {
+ for(unsigned ii=0; ii<db1.size(); ii++) {
+ printb1_m5(out,i->first,*d5m.fwordclasses);
+ out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n';
}
+ out << endl;
+ }
}
return out;
-}
- void readProbTable(const char*x)
- {
- ifstream f(x);
- string l;
- while(getline(f,l))
- {
- if(l.length()&&l[0]=='#')
- continue;
- istrstream is(l.c_str());
- string E,F;
- int v1,v2,ii;
- double prob,count;
- if(is>>E>>F>>v1>>v2>>ii>>prob>>count)
- {
- //cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl;
- if( count>0 )
- if( E=="-1")
- getCountRef_bigger(ii,0,(*fwordclasses)(F),1000,1000,v2)+=count;
- else
- getCountRef_first(ii,v1,(*fwordclasses)(F),1000,1000,v2)+=count;
- }
- }
- normalizeTable();
- //ofstream of("M5FILE");
- //of << (*this);
+ }
+ void readProbTable(const char*x) {
+ ifstream f(x);
+ string l;
+ while(getline(f,l)) {
+ if(l.length()&&l[0]=='#')
+ continue;
+ istrstream is(l.c_str());
+ string E,F;
+ int v1,v2,ii;
+ double prob,count;
+ if(is>>E>>F>>v1>>v2>>ii>>prob>>count) {
+ //cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl;
+ if( count>0 )
+ if( E=="-1")
+ getCountRef_bigger(ii,0,(*fwordclasses)(F),1000,1000,v2)+=count;
+ else
+ getCountRef_first(ii,v1,(*fwordclasses)(F),1000,1000,v2)+=count;
+ }
}
- void clear()
- {
- for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
- {
- Vpff&d1=i->second;
- for(PositionIndex i=0;i<d1.size();i++)
- d1[i].first=0.0;
- }
- for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
- {
- Vpff&db1=i->second;
- for(PositionIndex i=0;i<db1.size();i++)
- db1[i].first=0.0;
- }
+ normalizeTable();
+ //ofstream of("M5FILE");
+ //of << (*this);
+ }
+ void clear() {
+ for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin(); i!=D1.end(); ++i) {
+ Vpff&d1=i->second;
+ for(PositionIndex i=0; i<d1.size(); i++)
+ d1[i].first=0.0;
}
+ for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin(); i!=Db1.end(); ++i) {
+ Vpff&db1=i->second;
+ for(PositionIndex i=0; i<db1.size(); i++)
+ db1[i].first=0.0;
+ }
+ }
};
#endif
diff --git a/mgizapp/src/Dictionary.cpp b/mgizapp/src/Dictionary.cpp
index b12f9b2..1be17f8 100644
--- a/mgizapp/src/Dictionary.cpp
+++ b/mgizapp/src/Dictionary.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -31,15 +31,16 @@ USA.
#include "Dictionary.h"
#include <cstring>
-Dictionary::Dictionary(const char *filename){
- if(!strcmp(filename, "")){
+Dictionary::Dictionary(const char *filename)
+{
+ if(!strcmp(filename, "")) {
dead = true;
return;
}
dead = false;
cout << "Reading dictionary from: " << filename << '\n';
ifstream dFile(filename);
- if(!dFile){
+ if(!dFile) {
cerr << "ERROR: Can't open dictionary: " << filename << '\n';
exit(1);
}
@@ -48,7 +49,7 @@ Dictionary::Dictionary(const char *filename){
currindexmax = 0;
currval = 0;
int p, q;
- while((dFile >> p >> q)){
+ while((dFile >> p >> q)) {
pairs[0].push_back(p);
pairs[1].push_back(q);
}
@@ -57,24 +58,24 @@ Dictionary::Dictionary(const char *filename){
}
-bool Dictionary::indict(int p, int q){
+bool Dictionary::indict(int p, int q)
+{
if(dead) return false;
if(p == 0 && q == 0) return false;
- if(currval == p){
+ if(currval == p) {
for(int i = currindexmin; i <= currindexmax; i++)
if(pairs[1][i] == q) return true;
return false;
- }
- else{
+ } else {
int begin = 0, end = pairs[0].size() - 1, middle = 0;
unsigned int t;
bool ret = false;
- while(begin <= end){
+ while(begin <= end) {
middle = begin + ((end - begin) >> 1);
if(p < pairs[0][middle]) end = middle - 1;
else if(p > pairs[0][middle]) begin = middle + 1;
- else{
- break;
+ else {
+ break;
}
}
t = middle;
@@ -89,5 +90,5 @@ bool Dictionary::indict(int p, int q){
return ret;
}
}
-
+
diff --git a/mgizapp/src/Dictionary.h b/mgizapp/src/Dictionary.h
index 3a5c71e..35ebe8d 100644
--- a/mgizapp/src/Dictionary.h
+++ b/mgizapp/src/Dictionary.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -33,14 +33,15 @@ USA.
#ifndef DICTIONARY_H
#define DICTIONARY_H
-class Dictionary{
- private:
+class Dictionary
+{
+private:
Vector<int> pairs[2];
int currval;
int currindexmin;
int currindexmax;
bool dead;
- public:
+public:
Dictionary(const char *);
bool indict(int, int);
};
diff --git a/mgizapp/src/FlexArray.h b/mgizapp/src/FlexArray.h
index 1dd73ed..5803731 100644
--- a/mgizapp/src/FlexArray.h
+++ b/mgizapp/src/FlexArray.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,25 +36,39 @@ public:
: p(_end-_start+1),start(_start),End(_end) {}
FlexArray(int _start,int _end,const T&init)
: p(_end-_start+1,init),start(_start),End(_end) {}
- T&operator[](int i)
- {return p[i-start];}
- const T&operator[](int i)const
- {return p[i-start];}
- int low()const{return start;}
- int high()const{return End;}
+ T&operator[](int i) {
+ return p[i-start];
+ }
+ const T&operator[](int i)const {
+ return p[i-start];
+ }
+ int low()const {
+ return start;
+ }
+ int high()const {
+ return End;
+ }
#ifdef WIN32
- T*begin(){return const_cast<double*>(&p[0]);}
- T*end(){return const_cast<double*>(&(p[0])+p.size());}
+ T*begin() {
+ return const_cast<double*>(&p[0]);
+ }
+ T*end() {
+ return const_cast<double*>(&(p[0])+p.size());
+ }
#else
- T*begin(){return conv<double>(p.begin());}
- T*end(){return conv<double>(p.end());}
+ T*begin() {
+ return conv<double>(p.begin());
+ }
+ T*end() {
+ return conv<double>(p.end());
+ }
#endif
};
template<class T>
inline ostream&operator<<(ostream&out,const FlexArray<T>&x)
{
- for(int i=x.low();i<=x.high();++i)
+ for(int i=x.low(); i<=x.high(); ++i)
out << i << ':' << x[i] << ';' << ' ';
return out;
}
diff --git a/mgizapp/src/ForwardBackward.cpp b/mgizapp/src/ForwardBackward.cpp
index e477dd0..828fb49 100644
--- a/mgizapp/src/ForwardBackward.cpp
+++ b/mgizapp/src/ForwardBackward.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -28,30 +28,31 @@ USA.
#include "mymath.h"
-double ForwardBackwardTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
+double ForwardBackwardTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E)
+{
const int I=net.size1(),J=net.size2(),N=I*J;
Array<double> alpha(N,0),beta(N,0),sum(J);
- for(int i=0;i<I;i++)
+ for(int i=0; i<I; i++)
beta[N-I+i]=net.getBetainit(i);
#ifdef WIN32
double * cur_beta=const_cast<double*>(&(beta[0]))+N-I-1;
#else
double * cur_beta=conv<double>(beta.begin())+N-I-1;
#endif
- for(int j=J-2;j>=0;--j)
- for(int ti=I-1;ti>=0;--ti,--cur_beta) {
+ for(int j=J-2; j>=0; --j)
+ for(int ti=I-1; ti>=0; --ti,--cur_beta) {
const double *next_beta=conv<double>(beta.begin())+(j+1)*I;
const double *alprob=&net.outProb(j,ti,0),*next_node=&net.nodeProb(0,j+1);
- for(int ni=0;ni<I;++ni,(next_node+=J)){
- massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
- massert(next_node == &net.nodeProb(ni,j+1));
- /* if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
- cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
- << next_beta-beta.begin() << ") alprob:" << (*alprob) << " lexprob:" << (*next_node) << endl;*/
- (*cur_beta)+=(*next_beta++)*(*alprob++)*(*next_node);
+ for(int ni=0; ni<I; ++ni,(next_node+=J)) {
+ massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
+ massert(next_node == &net.nodeProb(ni,j+1));
+ /* if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
+ cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
+ << next_beta-beta.begin() << ") alprob:" << (*alprob) << " lexprob:" << (*next_node) << endl;*/
+ (*cur_beta)+=(*next_beta++)*(*alprob++)*(*next_node);
}
}
- for(int i=0;i<I;i++)
+ for(int i=0; i<I; i++)
alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
#ifdef WIN32
double* cur_alpha=const_cast<double*>(&(alpha[0]))+I;
@@ -61,41 +62,39 @@ double ForwardBackwardTraining(const HMMNetwork&net,Array<double>&g,Array<Array2
cur_beta=conv<double>(beta.begin())+I;
#endif
- for(int j=1;j<J;j++){
+ for(int j=1; j<J; j++) {
Array2<double>&e=E[ (E.size()==1)?0:(j-1) ];
- if( (E.size()!=1) || j==1 )
- {
- e.resize(I,I);
- fill(e.begin(),e.end(),0.0);
- }
-
- for(int ti=0;ti<I;++ti,++cur_alpha,++cur_beta) {
+ if( (E.size()!=1) || j==1 ) {
+ e.resize(I,I);
+ fill(e.begin(),e.end(),0.0);
+ }
+
+ for(int ti=0; ti<I; ++ti,++cur_alpha,++cur_beta) {
const double * prev_alpha=conv<double>(alpha.begin())+I*(j-1);
double *cur_e= &e(ti,0);
double this_node=net.nodeProb(ti,j);
const double* alprob= &net.outProb(j-1,0,ti);
- for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
- massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
- massert(&e(ti,pi)==cur_e);
- const double alpha_increment= *prev_alpha*(*alprob)*this_node;
- (*cur_alpha)+=alpha_increment;
- (*cur_e++)+=alpha_increment*(*cur_beta);
+ for(int pi=0; pi<I; ++pi,++prev_alpha,(alprob+=I)) {
+ massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
+ massert(&e(ti,pi)==cur_e);
+ const double alpha_increment= *prev_alpha*(*alprob)*this_node;
+ (*cur_alpha)+=alpha_increment;
+ (*cur_e++)+=alpha_increment*(*cur_beta);
}
}
}
g.resize(N);
transform(alpha.begin(),alpha.end(),beta.begin(),g.begin(),multiplies<double>());
double bsum=0,esum=0,esum2;
- for(int i=0;i<I;i++)
+ for(int i=0; i<I; i++)
bsum+=beta[i]*net.nodeProb(i,0)*net.getAlphainit(i);
- for(unsigned int j=0;j<(unsigned int)E.size();j++)
- {
- Array2<double>&e=E[j];
- const double *epe=e.end();
- for(const double*ep=e.begin();ep!=epe;++ep)
- esum+=*ep;
- }
- if( J>1 )
+ for(unsigned int j=0; j<(unsigned int)E.size(); j++) {
+ Array2<double>&e=E[j];
+ const double *epe=e.end();
+ for(const double*ep=e.begin(); ep!=epe; ++ep)
+ esum+=*ep;
+ }
+ if( J>1 )
esum2=esum/(J-1);
else
esum2=0.0;
@@ -104,59 +103,58 @@ double ForwardBackwardTraining(const HMMNetwork&net,Array<double>&g,Array<Array2
#ifdef WIN32
double * sumptr=const_cast<double*>(&(sum[0]));
double* ge=const_cast<double*>(&(g[0])+g.size());
- for(double* gp=const_cast<double*>(&(g[0]));gp!=ge;gp+=I)
- {
- *sumptr++=normalize_if_possible(gp,gp+I);
- if(bsum && !(mfabs((*(sumptr-1)-bsum)/bsum)<1e-3*I))
- cout << "ERROR: " << *(sumptr-1) << " " << bsum << " " << mfabs((*(sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
+ for(double* gp=const_cast<double*>(&(g[0])); gp!=ge; gp+=I) {
+ *sumptr++=normalize_if_possible(gp,gp+I);
+ if(bsum && !(mfabs((*(sumptr-1)-bsum)/bsum)<1e-3*I))
+ cout << "ERROR: " << *(sumptr-1) << " " << bsum << " " << mfabs((*(sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
}
#else
double * sumptr=conv<double>(sum.begin());
double* ge=conv<double>(g.end());
- for(double* gp=conv<double>(g.begin());gp!=ge;gp+=I)
- {
- *sumptr++=normalize_if_possible(gp,gp+I);
- if(bsum && !(mfabs((*(sumptr-1)-bsum)/bsum)<1e-3*I))
- cout << "ERROR: " << *(sumptr-1) << " " << bsum << " " << mfabs((*(sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
- }
+ for(double* gp=conv<double>(g.begin()); gp!=ge; gp+=I) {
+ *sumptr++=normalize_if_possible(gp,gp+I);
+ if(bsum && !(mfabs((*(sumptr-1)-bsum)/bsum)<1e-3*I))
+ cout << "ERROR: " << *(sumptr-1) << " " << bsum << " " << mfabs((*(sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
+ }
#endif
- for(unsigned int j=0;j<(unsigned int)E.size();j++)
- {
- Array2<double>&e=E[j];
- double* epe=e.end();
- if( esum )
- for(double*ep=e.begin();ep!=epe;++ep)
- *ep/=esum;
- else
- for(double*ep=e.begin();ep!=epe;++ep)
- *ep/=1.0/(max(I*I,I*I*(J-1)));
- }
+ for(unsigned int j=0; j<(unsigned int)E.size(); j++) {
+ Array2<double>&e=E[j];
+ double* epe=e.end();
+ if( esum )
+ for(double*ep=e.begin(); ep!=epe; ++ep)
+ *ep/=esum;
+ else
+ for(double*ep=e.begin(); ep!=epe; ++ep)
+ *ep/=1.0/(max(I*I,I*I*(J-1)));
+ }
if( sum.size() )
return sum[0];
else
return 1.0;
}
-void HMMViterbi(const HMMNetwork&net,Array<int>&vit) {
+void HMMViterbi(const HMMNetwork&net,Array<int>&vit)
+{
const int I=net.size1(),J=net.size2();
vit.resize(J);
Array<double>g;
Array<Array2<double> >e(1);
ForwardBackwardTraining(net,g,e);
- for(int j=0;j<J;j++) {
+ for(int j=0; j<J; j++) {
#ifdef WIN32
- double * begin=const_cast<double*>(&(g[0]))+I*j;
+ double * begin=const_cast<double*>(&(g[0]))+I*j;
#else
double * begin=conv<double>(g.begin())+I*j;
#endif
vit[j]=max_element(begin,begin+I)-begin;
}
}
-void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit) {
+void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit)
+{
const int I=net.size1(),J=net.size2();
vit.resize(J);
- for(int j=0;j<J;j++) {
+ for(int j=0; j<J; j++) {
#ifdef WIN32
- double* begin=const_cast<double*>(&(g[0]))+I*j;
+ double* begin=const_cast<double*>(&(g[0]))+I*j;
#else
double* begin=conv<double>(g.begin())+I*j;
#endif
@@ -164,20 +162,20 @@ void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit) {
}
}
-double HMMRealViterbi(const HMMNetwork&net,Array<int>&vitar,int pegi,int pegj,bool verbose){
+double HMMRealViterbi(const HMMNetwork&net,Array<int>&vitar,int pegi,int pegj,bool verbose)
+{
const int I=net.size1(),J=net.size2(),N=I*J;
Array<double> alpha(N,-1);
- Array<double*> bp(N,(double*)0);
+ Array<double*> bp(N,(double*)0);
vitar.resize(J);
if( J==0 )
return 1.0;
- for(int i=0;i<I;i++)
- {
- alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
- if( i>I/2 )
- alpha[i]=0; // only first empty word can be chosen
- bp[i]=0;
- }
+ for(int i=0; i<I; i++) {
+ alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
+ if( i>I/2 )
+ alpha[i]=0; // only first empty word can be chosen
+ bp[i]=0;
+ }
#ifdef WIN32
double *cur_alpha=const_cast<double*>(&alpha[0])+I;
double **cur_bp=const_cast<double**>(&bp[0])+I;
@@ -185,37 +183,35 @@ double HMMRealViterbi(const HMMNetwork&net,Array<int>&vitar,int pegi,int pegj,bo
double *cur_alpha=conv<double>(alpha.begin())+I;
double **cur_bp=conv<double*>(bp.begin())+I;
#endif
- for(int j=1;j<J;j++)
- {
- if( pegj+1==j)
- for(int ti=0;ti<I;ti++)
- if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
- (cur_alpha-I)[ti]=0.0;
- for(int ti=0;ti<I;++ti,++cur_alpha,++cur_bp) {
+ for(int j=1; j<J; j++) {
+ if( pegj+1==j)
+ for(int ti=0; ti<I; ti++)
+ if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
+ (cur_alpha-I)[ti]=0.0;
+ for(int ti=0; ti<I; ++ti,++cur_alpha,++cur_bp) {
#ifdef WIN32
- double* prev_alpha=const_cast<double*>(&(alpha[0]))+I*(j-1);
+ double* prev_alpha=const_cast<double*>(&(alpha[0]))+I*(j-1);
#else
- double* prev_alpha=conv<double>(alpha.begin())+I*(j-1);
+ double* prev_alpha=conv<double>(alpha.begin())+I*(j-1);
#endif
- double this_node=net.nodeProb(ti,j);
- const double *alprob= &net.outProb(j-1,0,ti);
- for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
- massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
- const double alpha_increment= *prev_alpha*(*alprob)*this_node;
- if( alpha_increment> *cur_alpha )
- {
- (*cur_alpha)=alpha_increment;
- (*cur_bp)=prev_alpha;
- }
- }
+ double this_node=net.nodeProb(ti,j);
+ const double *alprob= &net.outProb(j-1,0,ti);
+ for(int pi=0; pi<I; ++pi,++prev_alpha,(alprob+=I)) {
+ massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
+ const double alpha_increment= *prev_alpha*(*alprob)*this_node;
+ if( alpha_increment> *cur_alpha ) {
+ (*cur_alpha)=alpha_increment;
+ (*cur_bp)=prev_alpha;
+ }
}
}
- for(int i=0;i<I;i++)
+ }
+ for(int i=0; i<I; i++)
alpha[N-I+i]*=net.getBetainit(i);
if( pegj==J-1)
- for(int ti=0;ti<I;ti++)
+ for(int ti=0; ti<I; ti++)
if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
- (alpha)[N-I+ti]=0.0;
+ (alpha)[N-I+ti]=0.0;
int j=J-1;
#ifdef WIN32
@@ -225,58 +221,51 @@ double HMMRealViterbi(const HMMNetwork&net,Array<int>&vitar,int pegi,int pegj,bo
#endif
vitar[J-1]=max_element(cur_alpha,cur_alpha+I)-cur_alpha;
double ret= *max_element(cur_alpha,cur_alpha+I);
- while(bp[vitar[j]+j*I])
- {
- cur_alpha-=I;
- vitar[j-1]=bp[vitar[j]+j*I]-cur_alpha;
- massert(vitar[j-1]<I&&vitar[j-1]>=0);
- j--;
- }
+ while(bp[vitar[j]+j*I]) {
+ cur_alpha-=I;
+ vitar[j-1]=bp[vitar[j]+j*I]-cur_alpha;
+ massert(vitar[j-1]<I&&vitar[j-1]>=0);
+ j--;
+ }
massert(j==0);
- if( verbose )
- {
- cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
- for(int j=0;j<J;j++)
- cout << "NP " << net.nodeProb(vitar[j],j) << ' ' << "AP " << ((j==0)?net.getAlphainit(vitar[j]):net.outProb(j-1,vitar[j-1],vitar[j])) << " j:" << j << " i:" << vitar[j] << "; ";
- cout << endl;
- }
+ if( verbose ) {
+ cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
+ for(int j=0; j<J; j++)
+ cout << "NP " << net.nodeProb(vitar[j],j) << ' ' << "AP " << ((j==0)?net.getAlphainit(vitar[j]):net.outProb(j-1,vitar[j-1],vitar[j])) << " j:" << j << " i:" << vitar[j] << "; ";
+ cout << endl;
+ }
return ret;
}
-double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
+double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E)
+{
Array<int> vitar;
double ret=HMMRealViterbi(net,vitar);
const int I=net.size1(),J=net.size2();
- if( E.size()==1 )
- {
- Array2<double>&e=E[0];
- e.resize(I,I);
- g.resize(I*J);
- fill(g.begin(),g.end(),0.0);
- fill(e.begin(),e.end(),0.0);
- for(int i=0;i<J;++i)
- {
- g[i*I+vitar[i]]=1.0;
- if( i>0 )
- e(vitar[i],vitar[i-1])++;
- }
+ if( E.size()==1 ) {
+ Array2<double>&e=E[0];
+ e.resize(I,I);
+ g.resize(I*J);
+ fill(g.begin(),g.end(),0.0);
+ fill(e.begin(),e.end(),0.0);
+ for(int i=0; i<J; ++i) {
+ g[i*I+vitar[i]]=1.0;
+ if( i>0 )
+ e(vitar[i],vitar[i-1])++;
}
- else
- {
- g.resize(I*J);
- fill(g.begin(),g.end(),0.0);
- for(int i=0;i<J;++i)
- {
- g[i*I+vitar[i]]=1.0;
- if( i>0 )
- {
- Array2<double>&e=E[i-1];
- e.resize(I,I);
- fill(e.begin(),e.end(),0.0);
- e(vitar[i],vitar[i-1])++;
- }
- }
- }
+ } else {
+ g.resize(I*J);
+ fill(g.begin(),g.end(),0.0);
+ for(int i=0; i<J; ++i) {
+ g[i*I+vitar[i]]=1.0;
+ if( i>0 ) {
+ Array2<double>&e=E[i-1];
+ e.resize(I,I);
+ fill(e.begin(),e.end(),0.0);
+ e(vitar[i],vitar[i-1])++;
+ }
+ }
+ }
return ret;
}
diff --git a/mgizapp/src/ForwardBackward.h b/mgizapp/src/ForwardBackward.h
index 42449d3..e5abc04 100644
--- a/mgizapp/src/ForwardBackward.h
+++ b/mgizapp/src/ForwardBackward.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -29,7 +29,7 @@ USA.
class HMMNetwork
{
- public:
+public:
int as,bs;
Array2<double> n;
Array<Array2<double> > e;
@@ -39,19 +39,28 @@ class HMMNetwork
double finalMultiply;
HMMNetwork(int I,int J)
: as(I),bs(J),n(as,bs),/*e(as,as,0.0),*/e(0),alphainit(as,1.0/as),betainit(as,1.0),ab(as*bs),finalMultiply(1.0)
- {}
- double getAlphainit(int i)const{return alphainit[i];}
- double getBetainit(int i)const{return betainit[i];}
- inline int size1()const{return as;}
- inline int size2()const{return bs;}
- inline const double&nodeProb(int i,int j)const
- {return n(i,j);}
- inline const double&outProb(int j,int i1,int i2)const
- {/*massert(e[min(int(e.size())-1,j)](i1,i2) );*/ return e[min(int(e.size())-1,j)](i1,i2);}
- friend ostream&operator<<(ostream&out,const HMMNetwork&x)
- {
- return out <<"N: \n"<< x.n << endl << "E: \n" << x.e << "A:\n" << x.alphainit << "B:\n" << x.betainit << endl;
- }
+ {}
+ double getAlphainit(int i)const {
+ return alphainit[i];
+ }
+ double getBetainit(int i)const {
+ return betainit[i];
+ }
+ inline int size1()const {
+ return as;
+ }
+ inline int size2()const {
+ return bs;
+ }
+ inline const double&nodeProb(int i,int j)const {
+ return n(i,j);
+ }
+ inline const double&outProb(int j,int i1,int i2)const {
+ /*massert(e[min(int(e.size())-1,j)](i1,i2) );*/ return e[min(int(e.size())-1,j)](i1,i2);
+ }
+ friend ostream&operator<<(ostream&out,const HMMNetwork&x) {
+ return out <<"N: \n"<< x.n << endl << "E: \n" << x.e << "A:\n" << x.alphainit << "B:\n" << x.betainit << endl;
+ }
};
double ForwardBackwardTraining(const HMMNetwork&mc,Array<double>&gamma,Array<Array2<double> >&epsilon);
void HMMViterbi(const HMMNetwork&mc,Array<int>&vit);
diff --git a/mgizapp/src/Globals.h b/mgizapp/src/Globals.h
index 693a117..d6feb4b 100644
--- a/mgizapp/src/Globals.h
+++ b/mgizapp/src/Globals.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -30,10 +30,10 @@ USA.
extern float PROB_SMOOTH,MINCOUNTINCREASE;
extern bool Verbose, Log, Peg, Transfer, Transfer2to3, useDict ;
-extern string Prefix, LogFilename, OPath,
- SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename,
- SourceVocabClassesFilename, TargetVocabClassesFilename,
- t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
+extern string Prefix, LogFilename, OPath,
+ SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename,
+ SourceVocabClassesFilename, TargetVocabClassesFilename,
+ t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
extern ofstream logmsg ;
extern Mutex logmsg_lock;
extern double M5P0,P0 ;
diff --git a/mgizapp/src/HMMTables.cpp b/mgizapp/src/HMMTables.cpp
index 6460b31..b3cc421 100644
--- a/mgizapp/src/HMMTables.cpp
+++ b/mgizapp/src/HMMTables.cpp
@@ -9,14 +9,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -27,190 +27,194 @@
#include "Parameter.h"
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
- MAPPERCLASSTOSTRING>::writeJumps(ostream&out) const {
- double ssum=0.0;
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- alProb.begin(); i!=alProb.end(); ++i) {
- double sum=0.0;
- out << "\n\nDistribution for: ";
- printAlDeps(out, i->first, *mapper1, *mapper2);
- out << ' ';
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- out << a << ':' << i->second[a] << ';' << ' ';
- sum+=i->second[a];
- }
- out << '\n' << '\n';
- out << "SUM: " << sum << '\n';
- ssum+=sum;
- }
- out << "FULL-SUM: " << ssum << '\n';
+ MAPPERCLASSTOSTRING>::writeJumps(ostream&out) const
+{
+ double ssum=0.0;
+ for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
+ alProb.begin(); i!=alProb.end(); ++i) {
+ double sum=0.0;
+ out << "\n\nDistribution for: ";
+ printAlDeps(out, i->first, *mapper1, *mapper2);
+ out << ' ';
+ for (int a=i->second.low(); a<=i->second.high(); ++a)
+ if (i->second[a]) {
+ out << a << ':' << i->second[a] << ';' << ' ';
+ sum+=i->second[a];
+ }
+ out << '\n' << '\n';
+ out << "SUM: " << sum << '\n';
+ ssum+=sum;
+ }
+ out << "FULL-SUM: " << ssum << '\n';
}
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
- MAPPERCLASSTOSTRING>::readJumps(istream&) {
+ MAPPERCLASSTOSTRING>::readJumps(istream&)
+{
}
template<class CLS, class MAPPERCLASSTOSTRING> double HMMTables<CLS,
- MAPPERCLASSTOSTRING>::getAlProb(int istrich, int k, int sentLength,
- int J, CLS w1, CLS w2, int j, int iter) const {
- massert(k<sentLength&&k>=0);
- massert(istrich<sentLength&&istrich>=-1);
- int pos=istrich-k;
- switch (PredictionInAlignments) {
- case 0:
- pos=istrich-k;
- break;
- case 1:
- pos=k;
- break;
- case 2:
- pos=(k*J-j*sentLength);
- if (pos>0)
- pos+=J/2;
- else
- pos-=J/2;
- pos/=J;
- break;
- default:
- abort();
- }
- lock->lock();
- typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator p=
- alProb.find(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
- if (p!=alProb.end() ) {
- lock->unlock();
- return (p->second)[pos];
- } else {
- if (iter>0&&iter<5000)
- cout << "WARNING: Not found: " << ' ' << J << ' ' << sentLength
- << '\n';;
- lock->unlock();
- return 1.0/(2*sentLength-1);
- }
- lock->unlock();
+ MAPPERCLASSTOSTRING>::getAlProb(int istrich, int k, int sentLength,
+ int J, CLS w1, CLS w2, int j, int iter) const
+{
+ massert(k<sentLength&&k>=0);
+ massert(istrich<sentLength&&istrich>=-1);
+ int pos=istrich-k;
+ switch (PredictionInAlignments) {
+ case 0:
+ pos=istrich-k;
+ break;
+ case 1:
+ pos=k;
+ break;
+ case 2:
+ pos=(k*J-j*sentLength);
+ if (pos>0)
+ pos+=J/2;
+ else
+ pos-=J/2;
+ pos/=J;
+ break;
+ default:
+ abort();
+ }
+ lock->lock();
+ typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator p=
+ alProb.find(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
+ if (p!=alProb.end() ) {
+ lock->unlock();
+ return (p->second)[pos];
+ } else {
+ if (iter>0&&iter<5000)
+ cout << "WARNING: Not found: " << ' ' << J << ' ' << sentLength
+ << '\n';;
+ lock->unlock();
+ return 1.0/(2*sentLength-1);
+ }
+ lock->unlock();
}
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
- MAPPERCLASSTOSTRING>::addAlCount(int istrich, int k, int sentLength,
- int J, CLS w1, CLS w2, int j, double value, double valuePredicted) {
-
-
- int pos=istrich-k;
- switch (PredictionInAlignments) {
- case 0:
- pos=istrich-k;
- break;
- case 1:
- pos=k;
- break;
- case 2:
- pos=(k*J-j*sentLength);
- if (pos>0)
- pos+=J/2;
- else
- pos-=J/2;
- pos/=J;
- break;
- default:
- abort();
- }
-
-
- AlDeps<CLS> deps(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
-
- {
- lock->lock();
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProb.find(deps);
- if (p==alProb.end() ) {
- if ( (CompareAlDeps&1)==0)
- p=alProb.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
- else
- p=alProb.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
- }
- p->second[pos]+=value;
- lock->unlock();
- }
-
- if (valuePredicted) {
- lock->lock();
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProbPredicted.find(deps);
- if (p==alProbPredicted.end() ) {
- if ( (CompareAlDeps&1)==0)
- p
- =alProbPredicted.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
- else
- p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
- }
- p->second[pos]+=valuePredicted;
- lock->unlock();
- }
-
+ MAPPERCLASSTOSTRING>::addAlCount(int istrich, int k, int sentLength,
+ int J, CLS w1, CLS w2, int j, double value, double valuePredicted)
+{
+
+
+ int pos=istrich-k;
+ switch (PredictionInAlignments) {
+ case 0:
+ pos=istrich-k;
+ break;
+ case 1:
+ pos=k;
+ break;
+ case 2:
+ pos=(k*J-j*sentLength);
+ if (pos>0)
+ pos+=J/2;
+ else
+ pos-=J/2;
+ pos/=J;
+ break;
+ default:
+ abort();
+ }
+
+
+ AlDeps<CLS> deps(AlDeps<CLS>(sentLength, istrich, j, w1, w2));
+
+ {
+ lock->lock();
+ typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
+ alProb.find(deps);
+ if (p==alProb.end() ) {
+ if ( (CompareAlDeps&1)==0)
+ p=alProb.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
+ else
+ p=alProb.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
+ }
+ p->second[pos]+=value;
+ lock->unlock();
+ }
+
+ if (valuePredicted) {
+ lock->lock();
+ typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
+ alProbPredicted.find(deps);
+ if (p==alProbPredicted.end() ) {
+ if ( (CompareAlDeps&1)==0)
+ p
+ =alProbPredicted.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
+ else
+ p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
+ }
+ p->second[pos]+=valuePredicted;
+ lock->unlock();
+ }
+
}
-template<class CLS, class MAPPERCLASSTOSTRING>
+template<class CLS, class MAPPERCLASSTOSTRING>
hmmentry_type& HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetAlphaInit(int I)
{
- alphalock->lock();
- if( !init_alpha.count(I) ){
+ alphalock->lock();
+ if( !init_alpha.count(I) ) {
#ifdef WIN32
- init_alpha[I]=hmmentry_type(Array<double>(I,0),new Mutex());
+ init_alpha[I]=hmmentry_type(Array<double>(I,0),new Mutex());
#else
- init_alpha[I]=hmmentry_type(Array<double>(I,0),Mutex());
+ init_alpha[I]=hmmentry_type(Array<double>(I,0),Mutex());
#endif
- }
- hmmentry_type& ret = init_alpha[I];
- alphalock->unlock();
- return ret;
+ }
+ hmmentry_type& ret = init_alpha[I];
+ alphalock->unlock();
+ return ret;
}
-template<class CLS, class MAPPERCLASSTOSTRING>
+template<class CLS, class MAPPERCLASSTOSTRING>
hmmentry_type& HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetBetaInit(int I)
{
- betalock->lock();
- if( !init_beta.count(I) ){
+ betalock->lock();
+ if( !init_beta.count(I) ) {
#ifdef WIN32
- init_beta[I]=hmmentry_type(Array<double>(I,0),new Mutex());
+ init_beta[I]=hmmentry_type(Array<double>(I,0),new Mutex());
#else
- init_beta[I]=pair<Array<double>,Mutex>(Array<double>(I,0),Mutex());
+ init_beta[I]=pair<Array<double>,Mutex>(Array<double>(I,0),Mutex());
#endif
- }
- hmmentry_type& ret = init_beta[I];
- betalock->unlock();
- return ret;
+ }
+ hmmentry_type& ret = init_beta[I];
+ betalock->unlock();
+ return ret;
}
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::getAlphaInit(int I, Array<double>&x) const {
- alphalock->lock();
- hash_map<int,hmmentry_type >::const_iterator i=init_alpha.find(I);
- if (i==init_alpha.end() ){
- alphalock->unlock();
- return 0;
- }
- else {
- x=i->second.first;
- alphalock->unlock();
- for (unsigned int j=x.size()/2+1; j<x.size(); ++j)
- // only first empty word can be chosen
- x[j]=0;
- return 1;
- }
- alphalock->unlock();
+ MAPPERCLASSTOSTRING>::getAlphaInit(int I, Array<double>&x) const
+{
+ alphalock->lock();
+ hash_map<int,hmmentry_type >::const_iterator i=init_alpha.find(I);
+ if (i==init_alpha.end() ) {
+ alphalock->unlock();
+ return 0;
+ } else {
+ x=i->second.first;
+ alphalock->unlock();
+ for (unsigned int j=x.size()/2+1; j<x.size(); ++j)
+ // only first empty word can be chosen
+ x[j]=0;
+ return 1;
+ }
+ alphalock->unlock();
}
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::getBetaInit(int I, Array<double>&x) const {
- betalock->lock();
- hash_map<int,hmmentry_type >::const_iterator i=init_beta.find(I);
- if (i==init_beta.end() ){
- betalock->unlock();
- return 0;
- }
- else {
- x=i->second.first;
- betalock->unlock();
- return 1;
- }
- betalock->unlock();
+ MAPPERCLASSTOSTRING>::getBetaInit(int I, Array<double>&x) const
+{
+ betalock->lock();
+ hash_map<int,hmmentry_type >::const_iterator i=init_beta.find(I);
+ if (i==init_beta.end() ) {
+ betalock->unlock();
+ return 0;
+ } else {
+ x=i->second.first;
+ betalock->unlock();
+ return 1;
+ }
+ betalock->unlock();
}
/***********************************
@@ -218,375 +222,381 @@ template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
************************************/
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::writeJumps(const char* alprob,
- const char* alpredict, const char* alpha, const char* beta) const {
- if (alprob) {
- ofstream ofs(alprob);
- if (!ofs.is_open()) {
- cerr << "Cannot open file for HMM output " << alprob << endl;
- return false;
- }
- cerr << "Dumping HMM table to " << alprob << endl;
-
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- alProb.begin(); i!=alProb.end(); ++i) {
- double sum=0.0;
- ofs <<i->first.englishSentenceLength << " "
- << i->first.classPrevious << " " << i->first.previous
- << " " << i->first.j << " " << i->first.Cj <<" "
- << i->second.low() <<" " << i->second.high()<< " ";
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- ofs << a << ' ' << i->second[a] << ' ';
- sum+=i->second[a];
- }
- ofs << endl;
- }
- ofs.close();
- }
- if (alpredict) {
- ofstream ofs(alpredict);
- if (!ofs.is_open()) {
- cerr << "Cannot open file for HMM output " << alpredict << endl;
- return false;
- }
- cerr << "Dumping HMM table to " << alpredict << endl;
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- alProbPredicted.begin(); i!=alProbPredicted.end(); ++i) {
- double sum=0.0;
- ofs << i->first.englishSentenceLength << " "
- << i->first.classPrevious << " " << i->first.previous
- << " " << i->first.j << " " << i->first.Cj <<" "
- << i->second.low() <<" " << i->second.high()<< " ";
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- ofs << a << ' ' << i->second[a] << ' ';
- sum+=i->second[a];
- }
- ofs << endl;
- }
- ofs.close();
- }
- if (alpha) {
- ofstream ofs(alpha);
-
- if (!ofs.is_open()) {
- cerr << "Cannot open file for HMM output " << alpha << endl;
- return false;
- }
- cerr << "Dumping HMM table to " << alpha << endl;
- for (typename hash_map<int,hmmentry_type>::const_iterator i=
- init_alpha.begin(); i!=init_alpha.end(); i++) {
- ofs << i->first << " " << i->second.first.size() <<" ";
- int j;
- for (j=0; j<i->second.first.size(); j++) {
- ofs << i->second.first[j] << " ";
- }
- ofs<<endl;
- }
- ofs.close();
- }
- if (beta) {
- ofstream ofs(beta);
- if (!ofs.is_open()) {
- cerr << "Cannot open file for HMM output " << beta << endl;
- return false;
- }
- cerr << "Dumping HMM table to " << beta << endl;
- for (typename hash_map<int,hmmentry_type>::const_iterator i=
- init_beta.begin(); i!=init_beta.end(); i++) {
- ofs << i->first << " " << i->second.first.size() << " ";
- int j;
- for (j=0; j<i->second.first.size(); j++) {
- ofs << i->second.first[j] << " ";
- }
- ofs << endl;
- }
- ofs.close();
- }
- return true;
+ MAPPERCLASSTOSTRING>::writeJumps(const char* alprob,
+ const char* alpredict, const char* alpha, const char* beta) const
+{
+ if (alprob) {
+ ofstream ofs(alprob);
+ if (!ofs.is_open()) {
+ cerr << "Cannot open file for HMM output " << alprob << endl;
+ return false;
+ }
+ cerr << "Dumping HMM table to " << alprob << endl;
+
+ for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
+ alProb.begin(); i!=alProb.end(); ++i) {
+ double sum=0.0;
+ ofs <<i->first.englishSentenceLength << " "
+ << i->first.classPrevious << " " << i->first.previous
+ << " " << i->first.j << " " << i->first.Cj <<" "
+ << i->second.low() <<" " << i->second.high()<< " ";
+ for (int a=i->second.low(); a<=i->second.high(); ++a)
+ if (i->second[a]) {
+ ofs << a << ' ' << i->second[a] << ' ';
+ sum+=i->second[a];
+ }
+ ofs << endl;
+ }
+ ofs.close();
+ }
+ if (alpredict) {
+ ofstream ofs(alpredict);
+ if (!ofs.is_open()) {
+ cerr << "Cannot open file for HMM output " << alpredict << endl;
+ return false;
+ }
+ cerr << "Dumping HMM table to " << alpredict << endl;
+ for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
+ alProbPredicted.begin(); i!=alProbPredicted.end(); ++i) {
+ double sum=0.0;
+ ofs << i->first.englishSentenceLength << " "
+ << i->first.classPrevious << " " << i->first.previous
+ << " " << i->first.j << " " << i->first.Cj <<" "
+ << i->second.low() <<" " << i->second.high()<< " ";
+ for (int a=i->second.low(); a<=i->second.high(); ++a)
+ if (i->second[a]) {
+ ofs << a << ' ' << i->second[a] << ' ';
+ sum+=i->second[a];
+ }
+ ofs << endl;
+ }
+ ofs.close();
+ }
+ if (alpha) {
+ ofstream ofs(alpha);
+
+ if (!ofs.is_open()) {
+ cerr << "Cannot open file for HMM output " << alpha << endl;
+ return false;
+ }
+ cerr << "Dumping HMM table to " << alpha << endl;
+ for (typename hash_map<int,hmmentry_type>::const_iterator i=
+ init_alpha.begin(); i!=init_alpha.end(); i++) {
+ ofs << i->first << " " << i->second.first.size() <<" ";
+ int j;
+ for (j=0; j<i->second.first.size(); j++) {
+ ofs << i->second.first[j] << " ";
+ }
+ ofs<<endl;
+ }
+ ofs.close();
+ }
+ if (beta) {
+ ofstream ofs(beta);
+ if (!ofs.is_open()) {
+ cerr << "Cannot open file for HMM output " << beta << endl;
+ return false;
+ }
+ cerr << "Dumping HMM table to " << beta << endl;
+ for (typename hash_map<int,hmmentry_type>::const_iterator i=
+ init_beta.begin(); i!=init_beta.end(); i++) {
+ ofs << i->first << " " << i->second.first.size() << " ";
+ int j;
+ for (j=0; j<i->second.first.size(); j++) {
+ ofs << i->second.first[j] << " ";
+ }
+ ofs << endl;
+ }
+ ofs.close();
+ }
+ return true;
}
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::readJumps(const char* alprob,
- const char* alpredict, const char* alpha, const char* beta) {
- if (alprob) {
- ifstream ifs(alprob);
- if (!ifs.is_open()) {
- cerr << "Cannot open file for HMM input " << alprob << endl;
- return false;
- }
- cerr << "Reading HMM table from " << alprob << endl;
- string strLine="";
- while (!ifs.eof()) {
- strLine = "";
- getline(ifs, strLine);
- if (strLine.length()) {
- stringstream ss(strLine.c_str());
- AlDeps<CLS> dep;
- int low, high;
- ss >> dep.englishSentenceLength >> dep.classPrevious
- >> dep.previous >> dep.j >> dep.Cj >> low >> high;
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProb.find(dep);
- if (p==alProb.end() ) {
- p=alProb.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
- }
- int pos;
- double val;
- while (!ss.eof()) {
- pos = low-1;
- val = 0;
- ss >> pos >> val;
- if (pos>low-1) {
- p->second[pos]+=val;
- }
- }
- }
- }
- }
- if (alpredict) {
- ifstream ifs(alpredict);
- if (!ifs.is_open()) {
- cerr << "Cannot open file for HMM input " << alpredict << endl;
- return false;
- }
- cerr << "Reading HMM table from " << alpredict << endl;
- string strLine="";
- while (!ifs.eof()) {
- strLine = "";
- getline(ifs, strLine);
- if (strLine.length()) {
- stringstream ss(strLine.c_str());
- AlDeps<CLS> dep;
- int low, high;
- ss >> dep.englishSentenceLength >> dep.classPrevious
- >> dep.previous >> dep.j >> dep.Cj >> low >> high;
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProbPredicted.find(dep);
- if (p==alProbPredicted.end() ) {
- p=alProbPredicted.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
- }
- int pos;
- double val;
-
- while (!ss.eof()) {
- pos = low-1;
- val = 0;
- ss >> pos >> val;
- if (pos>low-1) {
- p->second[pos]+=val;
- }
- }
- }
- }
- }
-
- if (alpha) {
- ifstream ifs(alpha);
-
- if (!ifs.is_open()) {
- cerr << "Cannot open file for HMM input " << alpha << endl;
- return false;
- }
- string strLine="";
- while (!ifs.eof()) {
- strLine = "";
- getline(ifs, strLine);
- if (strLine.length()) {
- stringstream ss(strLine.c_str());
- int id = -1, size = -1;
- ss >> id >> size;
- if (id<0||size<0||id!=size) {
- cerr << "Mismatch in alpha init table!" << endl;
- return false;
- }
- hmmentry_type&alp = doGetAlphaInit(id);
- Array<double>& gk = alp.first;
- int j;
- double v;
+ MAPPERCLASSTOSTRING>::readJumps(const char* alprob,
+ const char* alpredict, const char* alpha, const char* beta)
+{
+ if (alprob) {
+ ifstream ifs(alprob);
+ if (!ifs.is_open()) {
+ cerr << "Cannot open file for HMM input " << alprob << endl;
+ return false;
+ }
+ cerr << "Reading HMM table from " << alprob << endl;
+ string strLine="";
+ while (!ifs.eof()) {
+ strLine = "";
+ getline(ifs, strLine);
+ if (strLine.length()) {
+ stringstream ss(strLine.c_str());
+ AlDeps<CLS> dep;
+ int low, high;
+ ss >> dep.englishSentenceLength >> dep.classPrevious
+ >> dep.previous >> dep.j >> dep.Cj >> low >> high;
+ typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
+ alProb.find(dep);
+ if (p==alProb.end() ) {
+ p=alProb.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
+ }
+ int pos;
+ double val;
+ while (!ss.eof()) {
+ pos = low-1;
+ val = 0;
+ ss >> pos >> val;
+ if (pos>low-1) {
+ p->second[pos]+=val;
+ }
+ }
+ }
+ }
+ }
+ if (alpredict) {
+ ifstream ifs(alpredict);
+ if (!ifs.is_open()) {
+ cerr << "Cannot open file for HMM input " << alpredict << endl;
+ return false;
+ }
+ cerr << "Reading HMM table from " << alpredict << endl;
+ string strLine="";
+ while (!ifs.eof()) {
+ strLine = "";
+ getline(ifs, strLine);
+ if (strLine.length()) {
+ stringstream ss(strLine.c_str());
+ AlDeps<CLS> dep;
+ int low, high;
+ ss >> dep.englishSentenceLength >> dep.classPrevious
+ >> dep.previous >> dep.j >> dep.Cj >> low >> high;
+ typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
+ alProbPredicted.find(dep);
+ if (p==alProbPredicted.end() ) {
+ p=alProbPredicted.insert(make_pair(dep,FlexArray<double> (low,high,0.0))).first;
+ }
+ int pos;
+ double val;
+
+ while (!ss.eof()) {
+ pos = low-1;
+ val = 0;
+ ss >> pos >> val;
+ if (pos>low-1) {
+ p->second[pos]+=val;
+ }
+ }
+ }
+ }
+ }
+
+ if (alpha) {
+ ifstream ifs(alpha);
+
+ if (!ifs.is_open()) {
+ cerr << "Cannot open file for HMM input " << alpha << endl;
+ return false;
+ }
+ string strLine="";
+ while (!ifs.eof()) {
+ strLine = "";
+ getline(ifs, strLine);
+ if (strLine.length()) {
+ stringstream ss(strLine.c_str());
+ int id = -1, size = -1;
+ ss >> id >> size;
+ if (id<0||size<0||id!=size) {
+ cerr << "Mismatch in alpha init table!" << endl;
+ return false;
+ }
+ hmmentry_type&alp = doGetAlphaInit(id);
+ Array<double>& gk = alp.first;
+ int j;
+ double v;
#ifdef WIN32
- alp.second->lock();
+ alp.second->lock();
#else
- alp.second.lock();
+ alp.second.lock();
#endif
- for (j=0; j<gk.size(); j++) {
- ss >> v;
- gk[j]+=v;
- }
+ for (j=0; j<gk.size(); j++) {
+ ss >> v;
+ gk[j]+=v;
+ }
#ifdef WIN32
- alp.second->unlock();
+ alp.second->unlock();
#else
- alp.second.unlock();
+ alp.second.unlock();
#endif
- }
- }
- }
-
- if (beta) {
- ifstream ifs(beta);
-
- if (!ifs.is_open()) {
- cerr << "Cannot open file for HMM input " << beta << endl;
- return false;
- }
- string strLine="";
- while (!ifs.eof()) {
- strLine = "";
- getline(ifs, strLine);
- if (strLine.length()) {
- stringstream ss(strLine.c_str());
- int id = -1, size = -1;
- ss >> id >> size;
- if (id<0||size<0||id!=size) {
- cerr << "Mismatch in alpha init table!" << endl;
- return false;
- }
- hmmentry_type&bet1 = doGetBetaInit(id);
- Array<double>&bet = bet1.first;
-
- int j;
- double v;
+ }
+ }
+ }
+
+ if (beta) {
+ ifstream ifs(beta);
+
+ if (!ifs.is_open()) {
+ cerr << "Cannot open file for HMM input " << beta << endl;
+ return false;
+ }
+ string strLine="";
+ while (!ifs.eof()) {
+ strLine = "";
+ getline(ifs, strLine);
+ if (strLine.length()) {
+ stringstream ss(strLine.c_str());
+ int id = -1, size = -1;
+ ss >> id >> size;
+ if (id<0||size<0||id!=size) {
+ cerr << "Mismatch in alpha init table!" << endl;
+ return false;
+ }
+ hmmentry_type&bet1 = doGetBetaInit(id);
+ Array<double>&bet = bet1.first;
+
+ int j;
+ double v;
#ifdef WIN32
- bet1.second->lock();
+ bet1.second->lock();
#else
- bet1.second.lock();
+ bet1.second.lock();
#endif
- for (j=0; j<bet.size(); j++) {
- ss >> v;
- bet[j]+=v;
- }
+ for (j=0; j<bet.size(); j++) {
+ ss >> v;
+ bet[j]+=v;
+ }
#ifdef WIN32
- bet1.second->unlock();
+ bet1.second->unlock();
#else
- bet1.second.unlock();
+ bet1.second.unlock();
#endif
- }
- }
- }
+ }
+ }
+ }
- return true;
+ return true;
}
template<class CLS, class MAPPERCLASSTOSTRING> bool HMMTables<CLS,
- MAPPERCLASSTOSTRING>::merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht) {
-
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- ht.alProb.begin(); i!=ht.alProb.end(); ++i) {
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProb.find(i->first);
- if (p==alProb.end() ) {
- p=alProb.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
- }
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- p->second[a] += i->second[a];
- }
-
- }
-
- for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
- ht.alProbPredicted.begin(); i!=ht.alProbPredicted.end(); ++i) {
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
- alProbPredicted.find(i->first);
- if (p==alProbPredicted.end() ) {
- p=alProbPredicted.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
- }
- for (int a=i->second.low(); a<=i->second.high(); ++a)
- if (i->second[a]) {
- p->second[a] += i->second[a];
- }
-
- }
-
- for (typename hash_map<int,hmmentry_type>::iterator i=
- ht.init_alpha.begin(); i!=ht.init_alpha.end(); i++) {
- hmmentry_type& alp = doGetAlphaInit(i->first);
- int j;
- for (j=0; j<alp.first.size(); j++) {
- alp.first[j]+=i->second.first[j];
- }
- }
- for (typename hash_map<int,hmmentry_type>::iterator i=
- ht.init_beta.begin(); i!=ht.init_beta.end(); i++) {
- hmmentry_type&alp = doGetBetaInit(i->first);
- int j;
- for (j=0; j<alp.first.size(); j++) {
- alp.first[j]+=i->second.first[j];
- }
- }
-
- return true;
+ MAPPERCLASSTOSTRING>::merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht)
+{
+
+ for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
+ ht.alProb.begin(); i!=ht.alProb.end(); ++i) {
+ typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
+ alProb.find(i->first);
+ if (p==alProb.end() ) {
+ p=alProb.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
+ }
+ for (int a=i->second.low(); a<=i->second.high(); ++a)
+ if (i->second[a]) {
+ p->second[a] += i->second[a];
+ }
+
+ }
+
+ for (typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=
+ ht.alProbPredicted.begin(); i!=ht.alProbPredicted.end(); ++i) {
+ typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=
+ alProbPredicted.find(i->first);
+ if (p==alProbPredicted.end() ) {
+ p=alProbPredicted.insert(make_pair(i->first,FlexArray<double> (i->second.low(),i->second.high(),0.0))).first;
+ }
+ for (int a=i->second.low(); a<=i->second.high(); ++a)
+ if (i->second[a]) {
+ p->second[a] += i->second[a];
+ }
+
+ }
+
+ for (typename hash_map<int,hmmentry_type>::iterator i=
+ ht.init_alpha.begin(); i!=ht.init_alpha.end(); i++) {
+ hmmentry_type& alp = doGetAlphaInit(i->first);
+ int j;
+ for (j=0; j<alp.first.size(); j++) {
+ alp.first[j]+=i->second.first[j];
+ }
+ }
+ for (typename hash_map<int,hmmentry_type>::iterator i=
+ ht.init_beta.begin(); i!=ht.init_beta.end(); i++) {
+ hmmentry_type&alp = doGetBetaInit(i->first);
+ int j;
+ for (j=0; j<alp.first.size(); j++) {
+ alp.first[j]+=i->second.first[j];
+ }
+ }
+
+ return true;
}
//////////////////////////////////////
template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
- MAPPERCLASSTOSTRING>::HMMTables(double _probForEmpty,
- const MAPPERCLASSTOSTRING&m1, const MAPPERCLASSTOSTRING&m2) :
- probabilityForEmpty(mfabs(_probForEmpty)),
- updateProbabilityForEmpty(_probForEmpty<0.0), mapper1(&m1),
- mapper2(&m2) {
- lock = new Mutex();
- alphalock = new Mutex();
- betalock = new Mutex();
+ MAPPERCLASSTOSTRING>::HMMTables(double _probForEmpty,
+ const MAPPERCLASSTOSTRING&m1, const MAPPERCLASSTOSTRING&m2) :
+ probabilityForEmpty(mfabs(_probForEmpty)),
+ updateProbabilityForEmpty(_probForEmpty<0.0), mapper1(&m1),
+ mapper2(&m2)
+{
+ lock = new Mutex();
+ alphalock = new Mutex();
+ betalock = new Mutex();
}
template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
- MAPPERCLASSTOSTRING>::HMMTables(const HMMTables& ref):
-mapper1(ref.mapper1), mapper2(ref.mapper2)
+ MAPPERCLASSTOSTRING>::HMMTables(const HMMTables& ref):
+ mapper1(ref.mapper1), mapper2(ref.mapper2)
{
- probabilityForEmpty=ref.probabilityForEmpty;
- updateProbabilityForEmpty=ref.updateProbabilityForEmpty;
- init_alpha=ref.init_alpha;
- init_beta=ref.init_beta;
- alProb=ref.alProb;
- alProbPredicted=ref.alProbPredicted;
- globalCounter=ref.globalCounter;
- divSum=ref.divSum;
- p0_count=ref.p0_count;
- np0_count=ref.np0_count;
+ probabilityForEmpty=ref.probabilityForEmpty;
+ updateProbabilityForEmpty=ref.updateProbabilityForEmpty;
+ init_alpha=ref.init_alpha;
+ init_beta=ref.init_beta;
+ alProb=ref.alProb;
+ alProbPredicted=ref.alProbPredicted;
+ globalCounter=ref.globalCounter;
+ divSum=ref.divSum;
+ p0_count=ref.p0_count;
+ np0_count=ref.np0_count;
}
template<class CLS, class MAPPERCLASSTOSTRING> void HMMTables<CLS,
- MAPPERCLASSTOSTRING>::operator=(const HMMTables& ref){
- probabilityForEmpty=ref.probabilityForEmpty;
- updateProbabilityForEmpty=ref.updateProbabilityForEmpty;
- init_alpha=ref.init_alpha;
- init_beta=ref.init_beta;
- alProb=ref.alProb;
- alProbPredicted=ref.alProbPredicted;
- globalCounter=ref.globalCounter;
- divSum=ref.divSum;
- p0_count=ref.p0_count;
- np0_count=ref.np0_count;
+ MAPPERCLASSTOSTRING>::operator=(const HMMTables& ref)
+{
+ probabilityForEmpty=ref.probabilityForEmpty;
+ updateProbabilityForEmpty=ref.updateProbabilityForEmpty;
+ init_alpha=ref.init_alpha;
+ init_beta=ref.init_beta;
+ alProb=ref.alProb;
+ alProbPredicted=ref.alProbPredicted;
+ globalCounter=ref.globalCounter;
+ divSum=ref.divSum;
+ p0_count=ref.p0_count;
+ np0_count=ref.np0_count;
}
template<class CLS, class MAPPERCLASSTOSTRING> HMMTables<CLS,
- MAPPERCLASSTOSTRING>::~HMMTables() {
+ MAPPERCLASSTOSTRING>::~HMMTables()
+{
#if 0
- for (typename hash_map<int,hmmentry_type>::iterator i=
- init_alpha.begin(); i!=init_alpha.end(); i++) {
- i->second.second->unlock();
- }
- for (typename hash_map<int,hmmentry_type>::iterator i=
- init_beta.begin(); i!=init_beta.end(); i++) {
- i->second.second->unlock();
- }
-
-
- delete lock;
- delete alphalock;
- delete betalock;
-
- for (typename hash_map<int,hmmentry_type>::iterator i=
- init_alpha.begin(); i!=init_alpha.end(); i++) {
- delete i->second.second;
- }
- for (typename hash_map<int,hmmentry_type>::iterator i=
- init_beta.begin(); i!=init_beta.end(); i++) {
- delete i->second.second;
- }
+ for (typename hash_map<int,hmmentry_type>::iterator i=
+ init_alpha.begin(); i!=init_alpha.end(); i++) {
+ i->second.second->unlock();
+ }
+ for (typename hash_map<int,hmmentry_type>::iterator i=
+ init_beta.begin(); i!=init_beta.end(); i++) {
+ i->second.second->unlock();
+ }
+
+
+ delete lock;
+ delete alphalock;
+ delete betalock;
+
+ for (typename hash_map<int,hmmentry_type>::iterator i=
+ init_alpha.begin(); i!=init_alpha.end(); i++) {
+ delete i->second.second;
+ }
+ for (typename hash_map<int,hmmentry_type>::iterator i=
+ init_beta.begin(); i!=init_beta.end(); i++) {
+ delete i->second.second;
+ }
#endif
}
diff --git a/mgizapp/src/HMMTables.h b/mgizapp/src/HMMTables.h
index 944b173..777b0a5 100644
--- a/mgizapp/src/HMMTables.h
+++ b/mgizapp/src/HMMTables.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,66 +36,84 @@ using __gnu_cxx::hash_map;
#include "syncObj.h"
template<class T>
-T normalize_if_possible(T*a,T*b){
- T sum=0;
- for(T*i=a;i!=b;++i)
- sum+=*i;
- if( sum )
- for(T*i=a;i!=b;++i)
- *i/=sum;
- else
- fill(a,b,1.0/(b-a));
- return sum;
+T normalize_if_possible(T*a,T*b)
+{
+ T sum=0;
+ for(T*i=a; i!=b; ++i)
+ sum+=*i;
+ if( sum )
+ for(T*i=a; i!=b; ++i)
+ *i/=sum;
+ else
+ fill(a,b,1.0/(b-a));
+ return sum;
}
extern short CompareAlDeps;
template<class CLS>
-class AlDeps{
+class AlDeps
+{
public:
- int englishSentenceLength;
- CLS classPrevious;
- int previous;
- int j;
- CLS Cj;
- AlDeps(){};
- AlDeps(int l,int p=0,int _j=0,CLS s1=0,CLS _Cj=0)
- : englishSentenceLength(l),classPrevious(s1),previous(p),j(_j),Cj(_Cj)
- {}
- friend bool operator<(const AlDeps&x,const AlDeps&y){
- if( (CompareAlDeps&1) && x.englishSentenceLength<y.englishSentenceLength ) return 1;
- if( (CompareAlDeps&1) && y.englishSentenceLength<x.englishSentenceLength ) return 0;
- if( (CompareAlDeps&2) && x.classPrevious<y.classPrevious ) return 1;
- if( (CompareAlDeps&2) && y.classPrevious<x.classPrevious ) return 0;
- if( (CompareAlDeps&4) && x.previous<y.previous ) return 1;
- if( (CompareAlDeps&4) && y.previous<x.previous ) return 0;
- if( (CompareAlDeps&8) && x.j<y.j ) return 1;
- if( (CompareAlDeps&8) && y.j<x.j ) return 0;
- if( (CompareAlDeps&16) && x.Cj<y.Cj ) return 1;
- if( (CompareAlDeps&16) && y.Cj<x.Cj ) return 0;
- return 0;
- }
- friend bool operator==(const AlDeps&x,const AlDeps&y)
- { return !( x<y || y<x ); }
+ int englishSentenceLength;
+ CLS classPrevious;
+ int previous;
+ int j;
+ CLS Cj;
+ AlDeps() {};
+ AlDeps(int l,int p=0,int _j=0,CLS s1=0,CLS _Cj=0)
+ : englishSentenceLength(l),classPrevious(s1),previous(p),j(_j),Cj(_Cj)
+ {}
+ friend bool operator<(const AlDeps&x,const AlDeps&y) {
+ if( (CompareAlDeps&1) && x.englishSentenceLength<y.englishSentenceLength ) return 1;
+ if( (CompareAlDeps&1) && y.englishSentenceLength<x.englishSentenceLength ) return 0;
+ if( (CompareAlDeps&2) && x.classPrevious<y.classPrevious ) return 1;
+ if( (CompareAlDeps&2) && y.classPrevious<x.classPrevious ) return 0;
+ if( (CompareAlDeps&4) && x.previous<y.previous ) return 1;
+ if( (CompareAlDeps&4) && y.previous<x.previous ) return 0;
+ if( (CompareAlDeps&8) && x.j<y.j ) return 1;
+ if( (CompareAlDeps&8) && y.j<x.j ) return 0;
+ if( (CompareAlDeps&16) && x.Cj<y.Cj ) return 1;
+ if( (CompareAlDeps&16) && y.Cj<x.Cj ) return 0;
+ return 0;
+ }
+ friend bool operator==(const AlDeps&x,const AlDeps&y) {
+ return !( x<y || y<x );
+ }
};
template<class CLS>
-class Hash_AlDeps{
+class Hash_AlDeps
+{
public:
- unsigned
- int
- operator()
- (const AlDeps<CLS>&x)
- const
- {
- unsigned int hash=0;
- if( (CompareAlDeps&1) ) { hash=hash+x.englishSentenceLength;hash*=31;}
- if( (CompareAlDeps&2) ) { hash=hash+x.classPrevious;hash*=31;}
- if( (CompareAlDeps&4) ) { hash=hash+x.previous;hash*=31;}
- if( (CompareAlDeps&8) ) { hash=hash+x.j;hash*=31;}
- if( (CompareAlDeps&16) ) { hash=hash+x.Cj;hash*=31;}
- return hash;
-
+ unsigned
+ int
+ operator()
+ (const AlDeps<CLS>&x)
+ const {
+ unsigned int hash=0;
+ if( (CompareAlDeps&1) ) {
+ hash=hash+x.englishSentenceLength;
+ hash*=31;
+ }
+ if( (CompareAlDeps&2) ) {
+ hash=hash+x.classPrevious;
+ hash*=31;
+ }
+ if( (CompareAlDeps&4) ) {
+ hash=hash+x.previous;
+ hash*=31;
}
+ if( (CompareAlDeps&8) ) {
+ hash=hash+x.j;
+ hash*=31;
+ }
+ if( (CompareAlDeps&16) ) {
+ hash=hash+x.Cj;
+ hash*=31;
+ }
+ return hash;
+
+ }
};
#ifdef WIN32
@@ -107,71 +125,71 @@ typedef pair<Array<double>,Mutex> hmmentry_type;
template<class CLS,class MAPPERCLASSTOSTRING>
class HMMTables
{
- Mutex* lock;
- Mutex* alphalock,*betalock;
+ Mutex* lock;
+ Mutex* alphalock,*betalock;
public:
- double probabilityForEmpty;
- bool updateProbabilityForEmpty;
- hash_map<int, hmmentry_type > init_alpha;
- hash_map<int, hmmentry_type > init_beta;
- map<AlDeps<CLS>,FlexArray<double> > alProb;
- map<AlDeps<CLS>,FlexArray<double> > alProbPredicted;
- int globalCounter;
- double divSum;
- double p0_count,np0_count;
- const MAPPERCLASSTOSTRING*mapper1;
- const MAPPERCLASSTOSTRING*mapper2;
+ double probabilityForEmpty;
+ bool updateProbabilityForEmpty;
+ hash_map<int, hmmentry_type > init_alpha;
+ hash_map<int, hmmentry_type > init_beta;
+ map<AlDeps<CLS>,FlexArray<double> > alProb;
+ map<AlDeps<CLS>,FlexArray<double> > alProbPredicted;
+ int globalCounter;
+ double divSum;
+ double p0_count,np0_count;
+ const MAPPERCLASSTOSTRING*mapper1;
+ const MAPPERCLASSTOSTRING*mapper2;
public:
- bool merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht);
- const HMMTables<CLS,MAPPERCLASSTOSTRING>*getThis()const {return this;}
- HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2);
- HMMTables(const HMMTables& ref);
- void operator=(const HMMTables& ref);
- virtual ~HMMTables();
- virtual double getAlProb(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter=0) const;
- virtual void writeJumps(ostream&) const;
- /**By Edward Gao, write out all things needed to rebuild the count table*/
- virtual bool writeJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta )const;
- virtual bool readJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta );
- void addAlCount(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted);
- virtual void readJumps(istream&);
- virtual bool getAlphaInit(int I,Array<double>&x)const;
- virtual bool getBetaInit(int I,Array<double> &x)const;
- hmmentry_type &doGetAlphaInit(int I);
- hmmentry_type &doGetBetaInit(int I);
- virtual double getProbabilityForEmpty()const
- {return probabilityForEmpty;}
- void performGISIteration(const HMMTables<CLS,MAPPERCLASSTOSTRING>*old){
- cout << "OLDSIZE: " << (old?(old->alProb.size()):0) << " NEWSIZE:"<< alProb.size()<< endl;
- for(typename map<AlDeps<CLS>,FlexArray<double> >::iterator i=alProb.begin();i!=alProb.end();++i) {
- if( alProbPredicted.count(i->first)){
- normalize_if_possible(i->second.begin(),i->second.end());
- normalize_if_possible(alProbPredicted[i->first].begin(),alProbPredicted[i->first].end());
- for(int j=i->second.low();j<=i->second.high();++j){
- if( i->second[j] )
- if(alProbPredicted[i->first][j]>0.0 )
- {
- double op=1.0;
- if( old && old->alProb.count(i->first) )
- op=(old->alProb.find(i->first)->second)[j];
- //cerr << "GIS: " << j << ' ' << " OLD:"
- // << op << "*true:"
- // << i->second[j] << "/pred:" << alProbPredicted[i->first][j] << " -> ";
-
-
- i->second[j]= op*(i->second[j]/alProbPredicted[i->first][j]);
- //cerr << i->second[j] << endl;
- }
- else{
- cerr << "ERROR2 in performGISiteration: " << i->second[j] << endl;
- }
- }
+ bool merge(HMMTables<CLS,MAPPERCLASSTOSTRING> & ht);
+ const HMMTables<CLS,MAPPERCLASSTOSTRING>*getThis()const {
+ return this;
+ }
+ HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2);
+ HMMTables(const HMMTables& ref);
+ void operator=(const HMMTables& ref);
+ virtual ~HMMTables();
+ virtual double getAlProb(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter=0) const;
+ virtual void writeJumps(ostream&) const;
+ /**By Edward Gao, write out all things needed to rebuild the count table*/
+ virtual bool writeJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta )const;
+ virtual bool readJumps(const char* alprob, const char* alpredict, const char* alpha, const char* beta );
+ void addAlCount(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted);
+ virtual void readJumps(istream&);
+ virtual bool getAlphaInit(int I,Array<double>&x)const;
+ virtual bool getBetaInit(int I,Array<double> &x)const;
+ hmmentry_type &doGetAlphaInit(int I);
+ hmmentry_type &doGetBetaInit(int I);
+ virtual double getProbabilityForEmpty()const {
+ return probabilityForEmpty;
+ }
+ void performGISIteration(const HMMTables<CLS,MAPPERCLASSTOSTRING>*old) {
+ cout << "OLDSIZE: " << (old?(old->alProb.size()):0) << " NEWSIZE:"<< alProb.size()<< endl;
+ for(typename map<AlDeps<CLS>,FlexArray<double> >::iterator i=alProb.begin(); i!=alProb.end(); ++i) {
+ if( alProbPredicted.count(i->first)) {
+ normalize_if_possible(i->second.begin(),i->second.end());
+ normalize_if_possible(alProbPredicted[i->first].begin(),alProbPredicted[i->first].end());
+ for(int j=i->second.low(); j<=i->second.high(); ++j) {
+ if( i->second[j] )
+ if(alProbPredicted[i->first][j]>0.0 ) {
+ double op=1.0;
+ if( old && old->alProb.count(i->first) )
+ op=(old->alProb.find(i->first)->second)[j];
+ //cerr << "GIS: " << j << ' ' << " OLD:"
+ // << op << "*true:"
+ // << i->second[j] << "/pred:" << alProbPredicted[i->first][j] << " -> ";
+
+
+ i->second[j]= op*(i->second[j]/alProbPredicted[i->first][j]);
+ //cerr << i->second[j] << endl;
+ } else {
+ cerr << "ERROR2 in performGISiteration: " << i->second[j] << endl;
}
- else
- cerr << "ERROR in performGISIteration: " << alProbPredicted.count(i->first) << endl;
}
+ } else
+ cerr << "ERROR in performGISIteration: " << alProbPredicted.count(i->first) << endl;
}
+ }
};
template<class CLS,class MAPPERCLASSTOSTRING>
diff --git a/mgizapp/src/MoveSwapMatrix.cpp b/mgizapp/src/MoveSwapMatrix.cpp
index 2b0c3a3..a10edac 100644
--- a/mgizapp/src/MoveSwapMatrix.cpp
+++ b/mgizapp/src/MoveSwapMatrix.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -24,31 +24,31 @@ USA.
template<class TRANSPAIR>
MoveSwapMatrix<TRANSPAIR>::MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a)
- : alignment(_a), ef(_ef), l(ef.get_l()), m(ef.get_m()), _cmove(l+1, m+1), _cswap(m+1, m+1),
- delmove(l+1, m+1,0),delswap(m+1, m+1,0),changed(l+2, 0), changedCounter(1),
- modelnr(_ef.modelnr()),lazyEvaluation(0),centerDeleted(0)
+ : alignment(_a), ef(_ef), l(ef.get_l()), m(ef.get_m()), _cmove(l+1, m+1), _cswap(m+1, m+1),
+ delmove(l+1, m+1,0),delswap(m+1, m+1,0),changed(l+2, 0), changedCounter(1),
+ modelnr(_ef.modelnr()),lazyEvaluation(0),centerDeleted(0)
{
double thisValue=ef.scoreOfAlignmentForChange((*this));
if( lazyEvaluation==0)
- for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
+ for(WordIndex j=1; j<=m; j++)updateJ(j, 0,thisValue);
}
template<class TRANSPAIR>
void MoveSwapMatrix<TRANSPAIR>::updateJ(WordIndex j, bool useChanged,double thisValue)
{
massert( lazyEvaluation==0 );
- for(WordIndex i=0;i<=l;i++)
+ for(WordIndex i=0; i<=l; i++)
if( (useChanged==0||changed[i]!=changedCounter) )
- if( get_al(j)!=i )
- _cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
+ if( get_al(j)!=i )
+ _cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
else
- _cmove(i, j)=1.0;
- for(WordIndex j2=j+1;j2<=m;j2++)
+ _cmove(i, j)=1.0;
+ for(WordIndex j2=j+1; j2<=m; j2++)
if( get_al(j)!=get_al(j2) )
_cswap(j, j2)=ef.scoreOfSwap((*this), j, j2,thisValue);
else
_cswap(j, j2)=1.0;
- for(WordIndex j2=1;j2<j;j2++)
+ for(WordIndex j2=1; j2<j; j2++)
if( get_al(j)!=get_al(j2) )
_cswap(j2, j)=ef.scoreOfSwap((*this), j2, j,thisValue);
else
@@ -58,7 +58,7 @@ template<class TRANSPAIR>
void MoveSwapMatrix<TRANSPAIR>::updateI(WordIndex i,double thisValue)
{
massert( lazyEvaluation==0);
- for(WordIndex j=1;j<=m;j++)
+ for(WordIndex j=1; j<=m; j++)
if( get_al(j)!=i )
_cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
else
@@ -66,61 +66,56 @@ void MoveSwapMatrix<TRANSPAIR>::updateI(WordIndex i,double thisValue)
}
template<class TRANSPAIR>
-void MoveSwapMatrix<TRANSPAIR>::printWrongs()const{
- for(WordIndex i=0;i<=l;i++)
- {
- for(WordIndex j=1;j<=m;j++)
- if( get_al(j)==i)
- cout << "A";
- else
- {
- LogProb real=_cmove(i, j), wanted=ef.scoreOfMove((*this), i, j);
- if( fabs(1.0-real/wanted)>1e-3 )
- cout << 'b';
- else if(fabs(1.0-real/wanted)>1e-10 )
- cout << 'e';
- else if(real!=wanted)
- cout << 'E';
- else
- cout << ' ';
- }
- cout << endl;
- }
+void MoveSwapMatrix<TRANSPAIR>::printWrongs()const
+{
+ for(WordIndex i=0; i<=l; i++) {
+ for(WordIndex j=1; j<=m; j++)
+ if( get_al(j)==i)
+ cout << "A";
+ else {
+ LogProb real=_cmove(i, j), wanted=ef.scoreOfMove((*this), i, j);
+ if( fabs(1.0-real/wanted)>1e-3 )
+ cout << 'b';
+ else if(fabs(1.0-real/wanted)>1e-10 )
+ cout << 'e';
+ else if(real!=wanted)
+ cout << 'E';
+ else
+ cout << ' ';
+ }
+ cout << endl;
+ }
cout << endl;
- for(WordIndex j=1;j<=m;j++)
- {
- for(WordIndex j1=1;j1<=m;j1++)
- if( j1>j )
- {
- if( get_al(j)==get_al(j1) )
- cout << 'A';
- else
- cout << (_cswap(j, j1)==ef.scoreOfSwap((*this), j, j1));
- }
- else
- cout << ' ';
- cout << endl;
- }
+ for(WordIndex j=1; j<=m; j++) {
+ for(WordIndex j1=1; j1<=m; j1++)
+ if( j1>j ) {
+ if( get_al(j)==get_al(j1) )
+ cout << 'A';
+ else
+ cout << (_cswap(j, j1)==ef.scoreOfSwap((*this), j, j1));
+ } else
+ cout << ' ';
+ cout << endl;
+ }
massert(0);
}
template<class TRANSPAIR>
-bool MoveSwapMatrix<TRANSPAIR>::isRight()const{
- if( lazyEvaluation )
+bool MoveSwapMatrix<TRANSPAIR>::isRight()const
+{
+ if( lazyEvaluation )
return 1;
- for(WordIndex i=0;i<=l;i++)
- for(WordIndex j=1;j<=m;j++)
- if( get_al(j)!=i && (!(doubleEqual(_cmove(i, j), ef.scoreOfMove((*this), i, j)))) )
- {
- cerr << "DIFF: " << i << " " << j << " " << _cmove(i, j) << " " << ef.scoreOfMove((*this), i, j) << endl;
- return 0;
- }
- for(WordIndex j=1;j<=m;j++)
- for(WordIndex j1=1;j1<=m;j1++)
- if( j1>j&&get_al(j)!=get_al(j1)&&(!doubleEqual(_cswap(j, j1), ef.scoreOfSwap((*this), j, j1))) )
- {
- cerr << "DIFFERENT: " << j << " " << j1 << " " << _cswap(j, j1) << " " << ef.scoreOfSwap((*this), j, j1) << endl;
- return 0;
- }
+ for(WordIndex i=0; i<=l; i++)
+ for(WordIndex j=1; j<=m; j++)
+ if( get_al(j)!=i && (!(doubleEqual(_cmove(i, j), ef.scoreOfMove((*this), i, j)))) ) {
+ cerr << "DIFF: " << i << " " << j << " " << _cmove(i, j) << " " << ef.scoreOfMove((*this), i, j) << endl;
+ return 0;
+ }
+ for(WordIndex j=1; j<=m; j++)
+ for(WordIndex j1=1; j1<=m; j1++)
+ if( j1>j&&get_al(j)!=get_al(j1)&&(!doubleEqual(_cswap(j, j1), ef.scoreOfSwap((*this), j, j1))) ) {
+ cerr << "DIFFERENT: " << j << " " << j1 << " " << _cswap(j, j1) << " " << ef.scoreOfSwap((*this), j, j1) << endl;
+ return 0;
+ }
return 1;
}
@@ -130,95 +125,81 @@ void MoveSwapMatrix<TRANSPAIR>::doMove(WordIndex _i, WordIndex _j)
WordIndex old_i=get_al(_j);
if( lazyEvaluation )
set(_j,_i);
- else
- {
- if ( modelnr==5||modelnr==6 )
- {
- set(_j, _i);
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
- }
- else if ( modelnr==4 )
- {
- changedCounter++;
- for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
- for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
- set(_j, _i);
- for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
- for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- for(unsigned int i=0;i<=l;i++)
- if(changed[i]==changedCounter)
- updateI(i,thisValue);
- for(unsigned int j=1;j<=m;j++)
- if( changed[get_al(j)]==changedCounter )
- updateJ(j, 1,thisValue);
- }
- else
- {
- assert(modelnr==3);
- set(_j, _i);
- changedCounter++;
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- updateI(old_i,thisValue);
- changed[old_i]=changedCounter;
- updateI(_i,thisValue);
- changed[_i]=changedCounter;
- for(WordIndex j=1;j<=m;j++)
- if( get_al(j)==_i || get_al(j)==old_i )
- updateJ(j, 1,thisValue);
- }
+ else {
+ if ( modelnr==5||modelnr==6 ) {
+ set(_j, _i);
+ double thisValue=ef.scoreOfAlignmentForChange((*this));
+ for(WordIndex j=1; j<=m; j++)updateJ(j, 0,thisValue);
+ } else if ( modelnr==4 ) {
+ changedCounter++;
+ for(unsigned int k=prev_cept(old_i); k<=next_cept(old_i); ++k)changed[k]=changedCounter;
+ for(unsigned int k=prev_cept(_i); k<=next_cept(_i); ++k)changed[k]=changedCounter;
+ set(_j, _i);
+ for(unsigned int k=prev_cept(old_i); k<=next_cept(old_i); ++k)changed[k]=changedCounter;
+ for(unsigned int k=prev_cept(_i); k<=next_cept(_i); ++k)changed[k]=changedCounter;
+ double thisValue=ef.scoreOfAlignmentForChange((*this));
+ for(unsigned int i=0; i<=l; i++)
+ if(changed[i]==changedCounter)
+ updateI(i,thisValue);
+ for(unsigned int j=1; j<=m; j++)
+ if( changed[get_al(j)]==changedCounter )
+ updateJ(j, 1,thisValue);
+ } else {
+ assert(modelnr==3);
+ set(_j, _i);
+ changedCounter++;
+ double thisValue=ef.scoreOfAlignmentForChange((*this));
+ updateI(old_i,thisValue);
+ changed[old_i]=changedCounter;
+ updateI(_i,thisValue);
+ changed[_i]=changedCounter;
+ for(WordIndex j=1; j<=m; j++)
+ if( get_al(j)==_i || get_al(j)==old_i )
+ updateJ(j, 1,thisValue);
}
+ }
}
template<class TRANSPAIR>
void MoveSwapMatrix<TRANSPAIR>::doSwap(WordIndex _j1, WordIndex _j2)
{
assert( cswap(_j1, _j2)>1 );
WordIndex i1=get_al(_j1), i2=get_al(_j2);
- if( lazyEvaluation==1 )
- {
+ if( lazyEvaluation==1 ) {
+ set(_j1, i2);
+ set(_j2, i1);
+ } else {
+ if ( modelnr==5||modelnr==6 ) {
set(_j1, i2);
set(_j2, i1);
+ double thisValue=ef.scoreOfAlignmentForChange((*this));
+ for(WordIndex j=1; j<=m; j++)updateJ(j, 0,thisValue);
+ } else if( modelnr==4 ) {
+ changedCounter++;
+ for(unsigned int k=prev_cept(i1); k<=next_cept(i1); ++k)changed[k]=changedCounter;
+ for(unsigned int k=prev_cept(i2); k<=next_cept(i2); ++k)changed[k]=changedCounter;
+ set(_j1, i2);
+ set(_j2, i1);
+ double thisValue=ef.scoreOfAlignmentForChange((*this));
+ for(unsigned int i=0; i<=l; i++)
+ if(changed[i]==changedCounter)
+ updateI(i,thisValue);
+ for(unsigned int j=1; j<=m; j++)
+ if( changed[get_al(j)]==changedCounter )
+ updateJ(j, 1,thisValue);
+ } else {
+ assert(modelnr==3);
+ set(_j1, i2);
+ set(_j2, i1);
+ changedCounter++;
+ double thisValue=ef.scoreOfAlignmentForChange((*this));
+ updateI(i1,thisValue);
+ changed[i1]=changedCounter;
+ updateI(i2,thisValue);
+ changed[i2]=changedCounter;
+ updateJ(_j1, 1,thisValue);
+ updateJ(_j2, 1,thisValue);
}
- else
- {
- if ( modelnr==5||modelnr==6 )
- {
- set(_j1, i2);
- set(_j2, i1);
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
- }
- else if( modelnr==4 )
- {
- changedCounter++;
- for(unsigned int k=prev_cept(i1);k<=next_cept(i1);++k)changed[k]=changedCounter;
- for(unsigned int k=prev_cept(i2);k<=next_cept(i2);++k)changed[k]=changedCounter;
- set(_j1, i2);
- set(_j2, i1);
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- for(unsigned int i=0;i<=l;i++)
- if(changed[i]==changedCounter)
- updateI(i,thisValue);
- for(unsigned int j=1;j<=m;j++)
- if( changed[get_al(j)]==changedCounter )
- updateJ(j, 1,thisValue);
- }
- else
- {
- assert(modelnr==3);
- set(_j1, i2);
- set(_j2, i1);
- changedCounter++;
- double thisValue=ef.scoreOfAlignmentForChange((*this));
- updateI(i1,thisValue);
- changed[i1]=changedCounter;
- updateI(i2,thisValue);
- changed[i2]=changedCounter;
- updateJ(_j1, 1,thisValue);
- updateJ(_j2, 1,thisValue);
- }
- }
+ }
}
#include "transpair_model3.h"
diff --git a/mgizapp/src/MoveSwapMatrix.h b/mgizapp/src/MoveSwapMatrix.h
index b1bbf15..27881ff 100644
--- a/mgizapp/src/MoveSwapMatrix.h
+++ b/mgizapp/src/MoveSwapMatrix.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -35,7 +35,7 @@ extern short DoViterbiTraining;
template<class TRANSPAIR>
class MoveSwapMatrix : public alignment
{
- private:
+private:
const TRANSPAIR&ef;
const WordIndex l, m;
Array2<LogProb, Vector<LogProb> > _cmove, _cswap;
@@ -45,72 +45,66 @@ class MoveSwapMatrix : public alignment
const int modelnr;
bool lazyEvaluation;
bool centerDeleted;
- public:
- bool check()const
- {
- return 1;
- }
- const TRANSPAIR&get_ef()const
- {return ef;}
- bool isCenterDeleted()const
- {return centerDeleted;}
- bool isLazy()const
- {return lazyEvaluation;}
+public:
+ bool check()const {
+ return 1;
+ }
+ const TRANSPAIR&get_ef()const {
+ return ef;
+ }
+ bool isCenterDeleted()const {
+ return centerDeleted;
+ }
+ bool isLazy()const {
+ return lazyEvaluation;
+ }
MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a);
void updateJ(WordIndex j, bool,double thisValue);
void updateI(WordIndex i,double thisValue);
void doMove(WordIndex _i, WordIndex _j);
void doSwap(WordIndex _j1, WordIndex _j2);
- void delCenter()
- {
- centerDeleted=1;
- }
- void delMove(WordIndex x, WordIndex y)
- {
- delmove(x,y)=1;
+ void delCenter() {
+ centerDeleted=1;
+ }
+ void delMove(WordIndex x, WordIndex y) {
+ delmove(x,y)=1;
+ }
+ void delSwap(WordIndex x, WordIndex y) {
+ massert(y>x);
+ delswap(x,y)=1;
+ delswap(y,x)=1;
+ }
+ bool isDelMove(WordIndex x, WordIndex y)const {
+ return DoViterbiTraining||delmove(x,y);
+ }
+ bool isDelSwap(WordIndex x, WordIndex y)const {
+ massert(y>x);
+ return DoViterbiTraining||delswap(x,y);
+ }
+ LogProb cmove(WordIndex x, WordIndex y)const {
+ massert( get_al(y)!=x );
+ massert( delmove(x,y)==0 );
+ if( lazyEvaluation )
+ return ef.scoreOfMove(*this,x,y);
+ else {
+ return _cmove(x, y);
}
- void delSwap(WordIndex x, WordIndex y)
- {
+ }
+ LogProb cswap(WordIndex x, WordIndex y)const {
+ massert(x<y);
+ massert(delswap(x,y)==0);
+ massert(get_al(x)!=get_al(y));
+ if( lazyEvaluation )
+ return ef.scoreOfSwap(*this,x,y);
+ else {
massert(y>x);
- delswap(x,y)=1;
- delswap(y,x)=1;
- }
- bool isDelMove(WordIndex x, WordIndex y)const
- {
- return DoViterbiTraining||delmove(x,y);
- }
- bool isDelSwap(WordIndex x, WordIndex y)const
- {
- massert(y>x);
- return DoViterbiTraining||delswap(x,y);
- }
- LogProb cmove(WordIndex x, WordIndex y)const
- {
- massert( get_al(y)!=x );
- massert( delmove(x,y)==0 );
- if( lazyEvaluation )
- return ef.scoreOfMove(*this,x,y);
- else
- {
- return _cmove(x, y);
- }
- }
- LogProb cswap(WordIndex x, WordIndex y)const
- {
- massert(x<y);
- massert(delswap(x,y)==0);
- massert(get_al(x)!=get_al(y));
- if( lazyEvaluation )
- return ef.scoreOfSwap(*this,x,y);
- else
- {
- massert(y>x);
- return _cswap(x, y);
- }
+ return _cswap(x, y);
}
+ }
void printWrongs()const;
bool isRight()const;
- friend ostream&operator<<(ostream&out, const MoveSwapMatrix<TRANSPAIR>&m)
- {return out << (alignment)m << "\nEF:\n"<< m.ef << "\nCMOVE\n"<<m._cmove << "\nCSWAP\n" << m._cswap << endl;};
+ friend ostream&operator<<(ostream&out, const MoveSwapMatrix<TRANSPAIR>&m) {
+ return out << (alignment)m << "\nEF:\n"<< m.ef << "\nCMOVE\n"<<m._cmove << "\nCSWAP\n" << m._cswap << endl;
+ };
};
#endif
diff --git a/mgizapp/src/NTables.cpp b/mgizapp/src/NTables.cpp
index e5676d5..028a377 100644
--- a/mgizapp/src/NTables.cpp
+++ b/mgizapp/src/NTables.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -29,155 +29,158 @@ GLOBAL_PARAMETER(double,NTablesFactorGraphemes,"nSmooth","smoothing for fertilit
GLOBAL_PARAMETER(double,NTablesFactorGeneral,"nSmoothGeneral","smoothing for fertility parameters (default: 0): weight for word-independent fertility parameters",PARLEV_SMOOTH,0.0);
template <class VALTYPE>
-void nmodel<VALTYPE>::printNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist,
- bool actual) const
- // prints the fertility table but with actual sourcce words (not their id)
+void nmodel<VALTYPE>::printNTable(int noEW, const char* filename,
+ const Vector<WordEntry>& evlist,
+ bool actual) const
+// prints the fertility table but with actual sourcce words (not their id)
{
- cerr << "Dumping nTable to: " << filename << '\n';
- ofstream of(filename);
- VALTYPE p ;
- WordIndex k, i ;
- for(i=1; int(i) < noEW; i++){
- if (evlist[i].freq > 0){
- if (actual)
- of << evlist[i].word << ' ' ;
- else
- of << i << ' ' ;
- for( k=0; k < MAX_FERTILITY; k++){
- p = getValue(i, k);
- if (p <= PROB_SMOOTH)
- p = 0;
- of << p << ' ';
- }
- of << '\n';
- }
+ cerr << "Dumping nTable to: " << filename << '\n';
+ ofstream of(filename);
+ VALTYPE p ;
+ WordIndex k, i ;
+ for(i=1; int(i) < noEW; i++) {
+ if (evlist[i].freq > 0) {
+ if (actual)
+ of << evlist[i].word << ' ' ;
+ else
+ of << i << ' ' ;
+ for( k=0; k < MAX_FERTILITY; k++) {
+ p = getValue(i, k);
+ if (p <= PROB_SMOOTH)
+ p = 0;
+ of << p << ' ';
+ }
+ of << '\n';
}
+ }
}
template <class VALTYPE>
-void nmodel<VALTYPE>::printRealNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist,
- bool actual) const
- // prints the fertility table but with actual sourcce words (not their id)
+void nmodel<VALTYPE>::printRealNTable(int noEW, const char* filename,
+ const Vector<WordEntry>& evlist,
+ bool actual) const
+// prints the fertility table but with actual sourcce words (not their id)
{
- cerr << "Dumping nTable to: " << filename << '\n';
- ofstream of(filename);
- VALTYPE p ;
- WordIndex k, i ;
- for(i=1; int(i) < noEW; i++){
- if (evlist[i].freq > 0){
- if (actual)
- of << evlist[i].word << ' ' ;
- else
- of << i << ' ' ;
- for( k=0; k < MAX_FERTILITY; k++){
- p = getValue(i, k);
-// if (p <= PROB_SMOOTH)
+ cerr << "Dumping nTable to: " << filename << '\n';
+ ofstream of(filename);
+ VALTYPE p ;
+ WordIndex k, i ;
+ for(i=1; int(i) < noEW; i++) {
+ if (evlist[i].freq > 0) {
+ if (actual)
+ of << evlist[i].word << ' ' ;
+ else
+ of << i << ' ' ;
+ for( k=0; k < MAX_FERTILITY; k++) {
+ p = getValue(i, k);
+// if (p <= PROB_SMOOTH)
// p = 0;
- of << p << ' ';
- }
- of << '\n';
- }
+ of << p << ' ';
+ }
+ of << '\n';
}
+ }
}
template <class VALTYPE>
-bool nmodel<VALTYPE>::readNTable(const char *filename){
+bool nmodel<VALTYPE>::readNTable(const char *filename)
+{
/* This function reads the n table from a file.
Each line is of the format: source_word_id p0 p1 p2 ... pn
This is the inverse operation of the printTable function.
NAS, 7/11/99
*/
- ifstream inf(filename);
- if(!inf.is_open()){
- return false;
- }
- cerr << "Reading fertility table from " << filename << "\n";
- if(!inf){
- cerr << "\nERROR: Cannot open " << filename <<"\n";
- return false;
+ ifstream inf(filename);
+ if(!inf.is_open()) {
+ return false;
+ }
+ cerr << "Reading fertility table from " << filename << "\n";
+ if(!inf) {
+ cerr << "\nERROR: Cannot open " << filename <<"\n";
+ return false;
+ }
+
+ VALTYPE prob;
+ WordIndex tok, i;
+ int nFert=0;
+ while(!inf.eof()) {
+ nFert++;
+ inf >> ws >> tok;
+ if (tok > MAX_VOCAB_SIZE) {
+ cerr << "NTables:readNTable(): unrecognized token id: " << tok
+ <<'\n';
+ exit(-1);
}
-
- VALTYPE prob;
- WordIndex tok, i;
- int nFert=0;
- while(!inf.eof()){
- nFert++;
- inf >> ws >> tok;
- if (tok > MAX_VOCAB_SIZE){
- cerr << "NTables:readNTable(): unrecognized token id: " << tok
- <<'\n';
- exit(-1);
- }
- for(i = 0; i < MAX_FERTILITY; i++){
- inf >> ws >> prob;
- getRef(tok, i)=prob;
- }
+ for(i = 0; i < MAX_FERTILITY; i++) {
+ inf >> ws >> prob;
+ getRef(tok, i)=prob;
}
- cerr << "Read " << nFert << " entries in fertility table.\n";
- inf.close();
- return true;
+ }
+ cerr << "Read " << nFert << " entries in fertility table.\n";
+ inf.close();
+ return true;
}
template <class VALTYPE>
-bool nmodel<VALTYPE>::merge(nmodel<VALTYPE>& n,int noEW, const Vector<WordEntry>& evlist){
+bool nmodel<VALTYPE>::merge(nmodel<VALTYPE>& n,int noEW, const Vector<WordEntry>& evlist)
+{
/* This function reads the n table from a file.
Each line is of the format: source_word_id p0 p1 p2 ... pn
This is the inverse operation of the printTable function.
NAS, 7/11/99
*/
-
- VALTYPE p ;
- WordIndex k, i ;
- for(i=1; int(i) < noEW; i++){
- if (evlist[i].freq > 0){
- for( k=0; k < MAX_FERTILITY; k++){
- p = n.getValue(i, k);
- getRef(i,k)+=p;
- }
- }
+
+ VALTYPE p ;
+ WordIndex k, i ;
+ for(i=1; int(i) < noEW; i++) {
+ if (evlist[i].freq > 0) {
+ for( k=0; k < MAX_FERTILITY; k++) {
+ p = n.getValue(i, k);
+ getRef(i,k)+=p;
+ }
}
- return true;
+ }
+ return true;
}
template <class VALTYPE>
-bool nmodel<VALTYPE>::readAugNTable(const char *filename){
+bool nmodel<VALTYPE>::readAugNTable(const char *filename)
+{
/* This function reads the n table from a file.
Each line is of the format: source_word_id p0 p1 p2 ... pn
This is the inverse operation of the printTable function.
NAS, 7/11/99
*/
- ifstream inf(filename);
- if(!inf.is_open()){
- return false;
- }
- cerr << "Reading fertility table from " << filename << "\n";
- if(!inf){
- cerr << "\nERROR: Cannot open " << filename <<"\n";
- return false;
+ ifstream inf(filename);
+ if(!inf.is_open()) {
+ return false;
+ }
+ cerr << "Reading fertility table from " << filename << "\n";
+ if(!inf) {
+ cerr << "\nERROR: Cannot open " << filename <<"\n";
+ return false;
+ }
+
+ VALTYPE prob;
+ WordIndex tok, i;
+ int nFert=0;
+ while(!inf.eof()) {
+ nFert++;
+ inf >> ws >> tok;
+ if (tok > MAX_VOCAB_SIZE) {
+ cerr << "NTables:readNTable(): unrecognized token id: " << tok
+ <<'\n';
+ exit(-1);
}
-
- VALTYPE prob;
- WordIndex tok, i;
- int nFert=0;
- while(!inf.eof()){
- nFert++;
- inf >> ws >> tok;
- if (tok > MAX_VOCAB_SIZE){
- cerr << "NTables:readNTable(): unrecognized token id: " << tok
- <<'\n';
- exit(-1);
- }
- for(i = 0; i < MAX_FERTILITY; i++){
- inf >> ws >> prob;
- getRef(tok, i)+=prob;
- }
+ for(i = 0; i < MAX_FERTILITY; i++) {
+ inf >> ws >> prob;
+ getRef(tok, i)+=prob;
}
- cerr << "Read " << nFert << " entries in fertility table.\n";
- inf.close();
- return true;
+ }
+ cerr << "Read " << nFert << " entries in fertility table.\n";
+ inf.close();
+ return true;
}
template class nmodel<COUNT>;
diff --git a/mgizapp/src/NTables.h b/mgizapp/src/NTables.h
index 698a470..c2d9614 100644
--- a/mgizapp/src/NTables.h
+++ b/mgizapp/src/NTables.h
@@ -8,14 +8,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,113 +32,118 @@
extern double NTablesFactorGraphemes, NTablesFactorGeneral;
-template<class VALTYPE> class nmodel {
+template<class VALTYPE> class nmodel
+{
private:
- Array2<VALTYPE, Vector<VALTYPE> > ntab;
+ Array2<VALTYPE, Vector<VALTYPE> > ntab;
public:
- nmodel(int maxw, int maxn) :
- ntab(maxw, maxn, 0.0) {
- }
- VALTYPE getValue(int w, unsigned int n) const {
- massert(w!=0);
- if (n>=ntab.getLen2())
- return 0.0;
- else
- return max(ntab(w, n), VALTYPE(PROB_SMOOTH));
- }
+ nmodel(int maxw, int maxn) :
+ ntab(maxw, maxn, 0.0) {
+ }
+ VALTYPE getValue(int w, unsigned int n) const {
+ massert(w!=0);
+ if (n>=ntab.getLen2())
+ return 0.0;
+ else
+ return max(ntab(w, n), VALTYPE(PROB_SMOOTH));
+ }
protected:
- inline VALTYPE&getRef(int w, int n) {
- //massert(w!=0);
- return ntab(w, n);
- };
- Mutex lock;
+ inline VALTYPE&getRef(int w, int n) {
+ //massert(w!=0);
+ return ntab(w, n);
+ };
+ Mutex lock;
public:
- inline void addValue(int w , int n,const VALTYPE& t){lock.lock();ntab(w,n)+=t;lock.unlock();};
+ inline void addValue(int w , int n,const VALTYPE& t) {
+ lock.lock();
+ ntab(w,n)+=t;
+ lock.unlock();
+ };
public:
- template<class COUNT> void normalize(nmodel<COUNT>&write,
- const Vector<WordEntry>* _evlist) const {
- int h1=ntab.getLen1(), h2=ntab.getLen2();
- int nParams=0;
- if (_evlist&&(NTablesFactorGraphemes||NTablesFactorGeneral)) {
- size_t maxlen=0;
- const Vector<WordEntry>&evlist=*_evlist;
- for (unsigned int i=1; i<evlist.size(); i++)
- maxlen=max(maxlen, evlist[i].word.length());
- Array2<COUNT,Vector<COUNT> > counts(maxlen+1, MAX_FERTILITY+1, 0.0);
- Vector<COUNT> nprob_general(MAX_FERTILITY+1,0.0);
- for (unsigned int i=1; i<min((unsigned int)h1,
- (unsigned int)evlist.size()); i++) {
- int l=evlist[i].word.length();
- for (int k=0; k<h2; k++) {
- counts(l, k)+=getValue(i, k);
- nprob_general[k]+=getValue(i, k);
- }
- }
- COUNT sum2=0;
- for (unsigned int i=1; i<maxlen+1; i++) {
- COUNT sum=0.0;
- for (int k=0; k<h2; k++)
- sum+=counts(i, k);
- sum2+=sum;
- if (sum) {
- double average=0.0;
- //cerr << "l: " << i << " " << sum << " ";
- for (int k=0; k<h2; k++) {
- counts(i, k)/=sum;
- //cerr << counts(i,k) << ' ';
- average+=k*counts(i, k);
- }
- //cerr << "avg: " << average << endl;
- //cerr << '\n';
- }
- }
- for (unsigned int k=0; k<nprob_general.size(); k++)
- nprob_general[k]/=sum2;
+ template<class COUNT> void normalize(nmodel<COUNT>&write,
+ const Vector<WordEntry>* _evlist) const {
+ int h1=ntab.getLen1(), h2=ntab.getLen2();
+ int nParams=0;
+ if (_evlist&&(NTablesFactorGraphemes||NTablesFactorGeneral)) {
+ size_t maxlen=0;
+ const Vector<WordEntry>&evlist=*_evlist;
+ for (unsigned int i=1; i<evlist.size(); i++)
+ maxlen=max(maxlen, evlist[i].word.length());
+ Array2<COUNT,Vector<COUNT> > counts(maxlen+1, MAX_FERTILITY+1, 0.0);
+ Vector<COUNT> nprob_general(MAX_FERTILITY+1,0.0);
+ for (unsigned int i=1; i<min((unsigned int)h1,
+ (unsigned int)evlist.size()); i++) {
+ int l=evlist[i].word.length();
+ for (int k=0; k<h2; k++) {
+ counts(l, k)+=getValue(i, k);
+ nprob_general[k]+=getValue(i, k);
+ }
+ }
+ COUNT sum2=0;
+ for (unsigned int i=1; i<maxlen+1; i++) {
+ COUNT sum=0.0;
+ for (int k=0; k<h2; k++)
+ sum+=counts(i, k);
+ sum2+=sum;
+ if (sum) {
+ double average=0.0;
+ //cerr << "l: " << i << " " << sum << " ";
+ for (int k=0; k<h2; k++) {
+ counts(i, k)/=sum;
+ //cerr << counts(i,k) << ' ';
+ average+=k*counts(i, k);
+ }
+ //cerr << "avg: " << average << endl;
+ //cerr << '\n';
+ }
+ }
+ for (unsigned int k=0; k<nprob_general.size(); k++)
+ nprob_general[k]/=sum2;
- for (int i=1; i<h1; i++) {
- int l=-1;
- if ((unsigned int)i<evlist.size())
- l=evlist[i].word.length();
- COUNT sum=0.0;
- for (int k=0; k<h2; k++)
- sum+=getValue(i, k)+((l==-1) ? 0.0 : (counts(l, k)
- *NTablesFactorGraphemes)) + NTablesFactorGeneral
- *nprob_general[k];
- assert(sum);
- for (int k=0; k<h2; k++) {
- write.getRef(i, k)=(getValue(i, k)+((l==-1) ? 0.0
- : (counts(l, k)*NTablesFactorGraphemes)))/sum
- + NTablesFactorGeneral*nprob_general[k];
- nParams++;
- }
- }
- } else
- for (int i=1; i<h1; i++) {
- COUNT sum=0.0;
- for (int k=0; k<h2; k++)
- sum+=getValue(i, k);
- assert(sum);
- for (int k=0; k<h2; k++) {
- write.getRef(i, k)=getValue(i, k)/sum;
- nParams++;
- }
- }
- cerr << "NTable contains " << nParams << " parameter.\n";
- }
+ for (int i=1; i<h1; i++) {
+ int l=-1;
+ if ((unsigned int)i<evlist.size())
+ l=evlist[i].word.length();
+ COUNT sum=0.0;
+ for (int k=0; k<h2; k++)
+ sum+=getValue(i, k)+((l==-1) ? 0.0 : (counts(l, k)
+ *NTablesFactorGraphemes)) + NTablesFactorGeneral
+ *nprob_general[k];
+ assert(sum);
+ for (int k=0; k<h2; k++) {
+ write.getRef(i, k)=(getValue(i, k)+((l==-1) ? 0.0
+ : (counts(l, k)*NTablesFactorGraphemes)))/sum
+ + NTablesFactorGeneral*nprob_general[k];
+ nParams++;
+ }
+ }
+ } else
+ for (int i=1; i<h1; i++) {
+ COUNT sum=0.0;
+ for (int k=0; k<h2; k++)
+ sum+=getValue(i, k);
+ assert(sum);
+ for (int k=0; k<h2; k++) {
+ write.getRef(i, k)=getValue(i, k)/sum;
+ nParams++;
+ }
+ }
+ cerr << "NTable contains " << nParams << " parameter.\n";
+ }
- bool merge(nmodel<VALTYPE>& n, int noEW, const Vector<WordEntry>& evlist);
- void clear() {
- int h1=ntab.getLen1(), h2=ntab.getLen2();
- for (int i=0; i<h1; i++)
- for (int k=0; k<h2; k++)
- ntab(i, k)=0;
- }
- void printNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist, bool) const;
- void printRealNTable(int noEW, const char* filename,
- const Vector<WordEntry>& evlist, bool) const;
- bool readAugNTable(const char *filename);
- bool readNTable(const char *filename);
+ bool merge(nmodel<VALTYPE>& n, int noEW, const Vector<WordEntry>& evlist);
+ void clear() {
+ int h1=ntab.getLen1(), h2=ntab.getLen2();
+ for (int i=0; i<h1; i++)
+ for (int k=0; k<h2; k++)
+ ntab(i, k)=0;
+ }
+ void printNTable(int noEW, const char* filename,
+ const Vector<WordEntry>& evlist, bool) const;
+ void printRealNTable(int noEW, const char* filename,
+ const Vector<WordEntry>& evlist, bool) const;
+ bool readAugNTable(const char *filename);
+ bool readNTable(const char *filename);
};
diff --git a/mgizapp/src/Parameter.cpp b/mgizapp/src/Parameter.cpp
index 1175ec7..d51554f 100644
--- a/mgizapp/src/Parameter.cpp
+++ b/mgizapp/src/Parameter.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -38,27 +38,24 @@ bool ParameterChangedFlag=0;
bool writeParameters(ofstream&of,const ParSet&parset,int level)
{
if(!of)return 0;
- for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
- {
- if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
- {
- ostrstream os;
- (*i)->printValue(os);
- os << ends;
- string s(os.str());
- of << (*i)->getString() << " ";
- if( absolutePathNames&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
- {
- char path[1024];
- getcwd(path,1024);
- of << path << '/';
- }
- if( ParameterPathPrefix.length()&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
- of << ParameterPathPrefix << '/';
- (*i)->printValue(of);
- of << endl;
- }
+ for(ParSet::const_iterator i=parset.begin(); i!=parset.end(); ++i) {
+ if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0) {
+ ostrstream os;
+ (*i)->printValue(os);
+ os << ends;
+ string s(os.str());
+ of << (*i)->getString() << " ";
+ if( absolutePathNames&&(*i)->isFilename()&&s.length()&&s[0]!='/' ) {
+ char path[1024];
+ getcwd(path,1024);
+ of << path << '/';
+ }
+ if( ParameterPathPrefix.length()&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
+ of << ParameterPathPrefix << '/';
+ (*i)->printValue(of);
+ of << endl;
}
+ }
return 1;
}
@@ -66,46 +63,41 @@ bool readParameters(ifstream&f,const ParSet&parset,int verb,int level)
{
string s;
if(!f)return 0;
- while(getline(f,s))
- {
- istrstream eingabe(s.c_str());
- string s1,s2;
- eingabe>>s1>>s2;
- if(makeSetCommand(s1,s2,parset,verb,level)==0)
- cerr << "ERROR: could not set: (C) " << s1 << " " << s2 << endl;
- }
+ while(getline(f,s)) {
+ istrstream eingabe(s.c_str());
+ string s1,s2;
+ eingabe>>s1>>s2;
+ if(makeSetCommand(s1,s2,parset,verb,level)==0)
+ cerr << "ERROR: could not set: (C) " << s1 << " " << s2 << endl;
+ }
return 1;
}
-
+
bool makeSetCommand(string _s1,string s2,const ParSet&parset,int verb,int level)
{
ParPtr anf;
int anfset=0;
string s1=simpleString(_s1);
- for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
- {
- if( *(*i)==s1 )
- {
- if( level==-1 || level==(*i)->getLevel() )
- (*i)->setParameter(s2,verb);
- else if(verb>1)
- cerr << "ERROR: Could not set: (A) " << s1 << " " << s2 << " " << level << " " << (*i)->getLevel() << endl;
- return 1;
- }
- else if( (*i)->getString().substr(0,s1.length())==s1 )
- {
- anf=(*i);anfset++;
- }
- }
- if(anfset==1)
- {
- if( level==-1 || level==anf->getLevel() )
- anf->setParameter(s2,verb);
- else if( verb>1 )
- cerr << "ERROR: Could not set: (B) " << s1 << " " << s2 << " " << level << " " << anf->getLevel() << endl;
+ for(ParSet::const_iterator i=parset.begin(); i!=parset.end(); ++i) {
+ if( *(*i)==s1 ) {
+ if( level==-1 || level==(*i)->getLevel() )
+ (*i)->setParameter(s2,verb);
+ else if(verb>1)
+ cerr << "ERROR: Could not set: (A) " << s1 << " " << s2 << " " << level << " " << (*i)->getLevel() << endl;
return 1;
+ } else if( (*i)->getString().substr(0,s1.length())==s1 ) {
+ anf=(*i);
+ anfset++;
}
+ }
+ if(anfset==1) {
+ if( level==-1 || level==anf->getLevel() )
+ anf->setParameter(s2,verb);
+ else if( verb>1 )
+ cerr << "ERROR: Could not set: (B) " << s1 << " " << s2 << " " << level << " " << anf->getLevel() << endl;
+ return 1;
+ }
if( anfset>1 )
cerr << "ERROR: ambiguous parameter '" << s1 << "'.\n";
if( anfset==0 )
@@ -116,28 +108,25 @@ bool makeSetCommand(string _s1,string s2,const ParSet&parset,int verb,int level)
ostream& printPars(ostream&of,const ParSet&parset,int level)
{
if(!of)return of;
- for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
- {
- if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
- {
- (*i)->printAt(of);
- of << endl;
- }
+ for(ParSet::const_iterator i=parset.begin(); i!=parset.end(); ++i) {
+ if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0) {
+ (*i)->printAt(of);
+ of << endl;
}
+ }
return of;
}
string simpleString(const string s)
{
string k;
- for(unsigned int i=0;i<s.length();++i)
- {
- char c[2];
- c[0]=tolower(s[i]);
- c[1]=0;
- if( (c[0]>='a'&&c[0]<='z')||(c[0]>='0'&&c[0]<='9') )
- k += c;
- }
+ for(unsigned int i=0; i<s.length(); ++i) {
+ char c[2];
+ c[0]=tolower(s[i]);
+ c[1]=0;
+ if( (c[0]>='a'&&c[0]<='z')||(c[0]>='0'&&c[0]<='9') )
+ k += c;
+ }
return k;
}
diff --git a/mgizapp/src/Parameter.h b/mgizapp/src/Parameter.h
index 64a9450..62b8053 100644
--- a/mgizapp/src/Parameter.h
+++ b/mgizapp/src/Parameter.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,33 +36,76 @@ USA.
#endif
inline unsigned int mConvert(const string&s,unsigned int &i)
-{
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1; }
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
- return i=atoi(s.c_str());
+{
+ if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) {
+ cerr << "TRUE\n";
+ return i=1;
+ }
+ if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) {
+ cerr << "FALSE\n";
+ return i=0;
+ }
+ return i=atoi(s.c_str());
+}
+inline int mConvert(const string&s,int &i)
+{
+ if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) {
+ cerr << "TRUE\n";
+ return i=1;
+ }
+ if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) {
+ cerr << "FALSE\n";
+ return i=0;
+ }
+ return i=atoi(s.c_str());
+}
+inline double mConvert(const string&s,double &d)
+{
+ return d=atof(s.c_str());
+}
+inline double mConvert(const string&s,float &d)
+{
+ return d=atof(s.c_str());
}
-inline int mConvert(const string&s,int &i){
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1;}
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
- return i=atoi(s.c_str());
+inline string mConvert(const string&s,string&n)
+{
+ return n=s;
}
-inline double mConvert(const string&s,double &d) { return d=atof(s.c_str()); }
-inline double mConvert(const string&s,float &d) { return d=atof(s.c_str()); }
-inline string mConvert(const string&s,string&n) { return n=s; }
-inline bool mConvert(const string&s,bool&n) {
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
- return n=atoi(s.c_str());
+inline bool mConvert(const string&s,bool&n)
+{
+ if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) {
+ cerr << "TRUE\n";
+ return n=1;
+ }
+ if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) {
+ cerr << "FALSE\n";
+ return n=0;
+ }
+ return n=atoi(s.c_str());
}
-inline short mConvert(const string&s,short&n) {
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
- return n=atoi(s.c_str());
+inline short mConvert(const string&s,short&n)
+{
+ if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) {
+ cerr << "TRUE\n";
+ return n=1;
+ }
+ if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) {
+ cerr << "FALSE\n";
+ return n=0;
+ }
+ return n=atoi(s.c_str());
}
-inline unsigned short mConvert(const string&s,unsigned short&n) {
- if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
- if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
- return n=atoi(s.c_str());
+inline unsigned short mConvert(const string&s,unsigned short&n)
+{
+ if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) {
+ cerr << "TRUE\n";
+ return n=1;
+ }
+ if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) {
+ cerr << "FALSE\n";
+ return n=0;
+ }
+ return n=atoi(s.c_str());
}
string simpleString(const string s);
@@ -71,89 +114,101 @@ inline int Hashstring(const string& s)
{
int sum=0;
string::const_iterator i=s.begin(),end=s.end();
- for(;i!=end;i++)sum=5*sum+(*i);
+ for(; i!=end; i++)sum=5*sum+(*i);
return sum;
}
class _Parameter
{
- protected:
+protected:
string name;
bool *ifChanged;
string description;
int level;
bool filename;
- public:
+public:
int onlyCopy;
_Parameter(string n,bool&b,string desc,int _level,bool _onlyCopy)
: name(simpleString(n)),ifChanged(&b),description(desc),level(_level),filename(0),onlyCopy(_onlyCopy) {}
- virtual ~_Parameter(){};
- bool operator==(const string&s)const
- { return name== simpleString(s); }
- void setChanged()
- { *ifChanged=true; }
+ virtual ~_Parameter() {};
+ bool operator==(const string&s)const {
+ return name== simpleString(s);
+ }
+ void setChanged() {
+ *ifChanged=true;
+ }
virtual bool setParameter(string s2,int)=0;
virtual ostream&printAt(ostream&out)=0;
virtual ostream&printValue(ostream&out)=0;
- const string&getString() const { return name; }
- int getLevel() const { return level;}
- bool isFilename() { return filename;}
- void setFilename(bool x=1) { filename=x;}
- friend bool operator==(const _Parameter&a,const _Parameter&b)
- { return a.name==b.name; }
- friend bool operator<(const _Parameter&a,const _Parameter&b)
- { return a.name<b.name; }
- friend int Hash(const _Parameter&aaa)
- { return Hashstring(aaa.name); }
- friend ostream&operator<<(ostream&out,const _Parameter&p)
- { return out<<"Parameter: "<<p.name <<endl;}
+ const string&getString() const {
+ return name;
+ }
+ int getLevel() const {
+ return level;
+ }
+ bool isFilename() {
+ return filename;
+ }
+ void setFilename(bool x=1) {
+ filename=x;
+ }
+ friend bool operator==(const _Parameter&a,const _Parameter&b) {
+ return a.name==b.name;
+ }
+ friend bool operator<(const _Parameter&a,const _Parameter&b) {
+ return a.name<b.name;
+ }
+ friend int Hash(const _Parameter&aaa) {
+ return Hashstring(aaa.name);
+ }
+ friend ostream&operator<<(ostream&out,const _Parameter&p) {
+ return out<<"Parameter: "<<p.name <<endl;
+ }
};
template<class T>
class Parameter : public _Parameter
{
- private:
+private:
T*t;
- public:
+public:
Parameter(string n,bool&b,string desc,T&_t,int level=0,bool onlyCopy=0)
: _Parameter(n,b,desc,level,onlyCopy),t(&_t) {}
- virtual ~Parameter(){}
- virtual bool setParameter(string s2,int verb)
- {
- T x;
- if( !(*t==mConvert(s2,x)))
- {
- bool printedFirst=0;
- if( verb>1 )
- {
- cout << "Parameter '"<<name <<"' changed from '"<<*t<<"' to '";
- printedFirst=1;
- }
- mConvert(s2,*t);
- if( printedFirst )
- cout << *t <<"'\n";
- setChanged();
- return 1;
- }
- return 0;
+ virtual ~Parameter() {}
+ virtual bool setParameter(string s2,int verb) {
+ T x;
+ if( !(*t==mConvert(s2,x))) {
+ bool printedFirst=0;
+ if( verb>1 ) {
+ cout << "Parameter '"<<name <<"' changed from '"<<*t<<"' to '";
+ printedFirst=1;
+ }
+ mConvert(s2,*t);
+ if( printedFirst )
+ cout << *t <<"'\n";
+ setChanged();
+ return 1;
}
- virtual ostream&printAt(ostream&out)
- {return out << name << " = " << *t << " (" << description << ")";}
- virtual ostream&printValue(ostream&out)
- {return out << *t;}
+ return 0;
+ }
+ virtual ostream&printAt(ostream&out) {
+ return out << name << " = " << *t << " (" << description << ")";
+ }
+ virtual ostream&printValue(ostream&out) {
+ return out << *t;
+ }
};
typedef MP<_Parameter> ParPtr;
class ParSet : public set<ParPtr>
{
- public:
- void insert(const ParPtr&x)
- {
- if( count(x)!=0 )
- cerr << "ERROR: element " << x->getString() << " already inserted.\n";
- set<ParPtr>::insert(x);
- }
+public:
+ void insert(const ParPtr&x) {
+ if( count(x)!=0 )
+ cerr << "ERROR: element " << x->getString() << " already inserted.\n";
+ set<ParPtr>::insert(x);
+ }
};
bool makeSetCommand(string s1,string s2,const ParSet&pars,int verb=1,int level= -1);
diff --git a/mgizapp/src/Perplexity.cpp b/mgizapp/src/Perplexity.cpp
index faa1f81..7c7e28e 100644
--- a/mgizapp/src/Perplexity.cpp
+++ b/mgizapp/src/Perplexity.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -24,7 +24,7 @@ USA.
* Mike Jahr, 7/21/99
* Machine Translation group, WS99
* Center for Language and Speech Processing
- *
+ *
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
*
* Simple class used to calculate cross entropy and perplexity
@@ -33,7 +33,8 @@ USA.
#include "Perplexity.h"
-void Perplexity::record(string model){
+void Perplexity::record(string model)
+{
mutex.lock();
modelid.push_back(model);
perp.push_back(perplexity());
diff --git a/mgizapp/src/Perplexity.h b/mgizapp/src/Perplexity.h
index 6e24cf0..a79c9ec 100644
--- a/mgizapp/src/Perplexity.h
+++ b/mgizapp/src/Perplexity.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -24,7 +24,7 @@ USA.
* Mike Jahr, 7/15/99
* Machine Translation group, WS99
* Center for Language and Speech Processing
- *
+ *
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
*
* Simple class used to calculate cross entropy and perplexity
@@ -44,71 +44,76 @@ USA.
#define CROSS_ENTROPY_BASE 2
-class Perplexity {
- private:
- double sum;
- double wc;
- Array2<double, Vector<double> > *E_M_L;
- Vector<string> modelid;
- Vector<double > perp;
- Vector<double > ce;
- Vector<string> name ;
- Mutex mutex;
- public:
- ~Perplexity() { delete E_M_L;}
- Perplexity() {
- E_M_L = new Array2<double, Vector<double> >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH);
- unsigned int l, m ;
- Vector<double> fact(MAX_SENTENCE_LENGTH, 1.0);
- for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++)
- fact[m] = fact[m-1] * m ;
- for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++)
- for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) {
- (*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) /
- (fact[m])) ;
- }
- sum = 0 ;
- wc = 0;
- perp.clear();
- ce.clear();
- name.clear();
- }
- inline void clear() {
- mutex.lock();
- sum = 0 ;
- wc = 0 ;
- mutex.unlock();
- }
- size_t size() const {return(min(perp.size(), ce.size()));}
- inline void addFactor(const double p, const double count, const int l,
- const int m,bool withPoisson) {
- mutex.lock();
- wc += count * m ; // number of french words
- sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ;
- mutex.unlock();
- }
-
- inline double perplexity() const {
- return exp( -1*sum / wc);
- }
-
- inline double cross_entropy() const {
- return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc));
- }
-
- inline double word_count() const {
- return wc;
- }
-
- inline double getSum() const {
- return sum ;
- }
-
- void record(string model);
-
- friend void generatePerplexityReport(const Perplexity&, const Perplexity&,
- const Perplexity&, const Perplexity&,
- ostream&, int, int, bool);
+class Perplexity
+{
+private:
+ double sum;
+ double wc;
+ Array2<double, Vector<double> > *E_M_L;
+ Vector<string> modelid;
+ Vector<double > perp;
+ Vector<double > ce;
+ Vector<string> name ;
+ Mutex mutex;
+public:
+ ~Perplexity() {
+ delete E_M_L;
+ }
+ Perplexity() {
+ E_M_L = new Array2<double, Vector<double> >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH);
+ unsigned int l, m ;
+ Vector<double> fact(MAX_SENTENCE_LENGTH, 1.0);
+ for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++)
+ fact[m] = fact[m-1] * m ;
+ for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++)
+ for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) {
+ (*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) /
+ (fact[m])) ;
+ }
+ sum = 0 ;
+ wc = 0;
+ perp.clear();
+ ce.clear();
+ name.clear();
+ }
+ inline void clear() {
+ mutex.lock();
+ sum = 0 ;
+ wc = 0 ;
+ mutex.unlock();
+ }
+ size_t size() const {
+ return(min(perp.size(), ce.size()));
+ }
+ inline void addFactor(const double p, const double count, const int l,
+ const int m,bool withPoisson) {
+ mutex.lock();
+ wc += count * m ; // number of french words
+ sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ;
+ mutex.unlock();
+ }
+
+ inline double perplexity() const {
+ return exp( -1*sum / wc);
+ }
+
+ inline double cross_entropy() const {
+ return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc));
+ }
+
+ inline double word_count() const {
+ return wc;
+ }
+
+ inline double getSum() const {
+ return sum ;
+ }
+
+ void record(string model);
+
+ friend void generatePerplexityReport(const Perplexity&, const Perplexity&,
+ const Perplexity&, const Perplexity&,
+ ostream&, int, int, bool);
};
diff --git a/mgizapp/src/Pointer.h b/mgizapp/src/Pointer.h
index fd05688..1552678 100644
--- a/mgizapp/src/Pointer.h
+++ b/mgizapp/src/Pointer.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -29,141 +29,189 @@ USA.
template<class T>
class SmartPointer
{
- protected:
+protected:
T*p;
- public:
- SmartPointer(T*_p=0)
+public:
+ SmartPointer(T*_p=0)
: p(_p) {}
- inline T&operator*() const
- {return *p;}
- inline T*operator->() const
- {return p;}
- inline operator bool() const
- {return p!=0;}
- inline T*ptr() const
- { return p; }
+ inline T&operator*() const {
+ return *p;
+ }
+ inline T*operator->() const {
+ return p;
+ }
+ inline operator bool() const {
+ return p!=0;
+ }
+ inline T*ptr() const {
+ return p;
+ }
};
template<class T> inline ostream &operator<<(ostream&out,const SmartPointer<T>&s)
-{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
+{
+ if( s.ptr() )return out << *s;
+ else return out <<"nullpointer";
+}
template<class T>
class SmartPointerConst
{
- protected:
+protected:
const T*p;
- public:
- SmartPointerConst(const T*_p=0)
+public:
+ SmartPointerConst(const T*_p=0)
: p(_p) {}
- inline const T&operator*() const
- {return *p;}
- inline const T*operator->() const
- {return p;}
- inline operator bool() const
- {return p!=0;}
- inline const T*ptr() const
- { return p; }
+ inline const T&operator*() const {
+ return *p;
+ }
+ inline const T*operator->() const {
+ return p;
+ }
+ inline operator bool() const {
+ return p!=0;
+ }
+ inline const T*ptr() const {
+ return p;
+ }
};
template<class T> inline ostream &operator<<(ostream&out,const SmartPointerConst<T>&s)
-{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
+{
+ if( s.ptr() )return out << *s;
+ else return out <<"nullpointer";
+}
template <class T>
class UP : public SmartPointer<T>
{
- public:
- UP(T*_p=0)
+public:
+ UP(T*_p=0)
: SmartPointer<T>(_p) {}
};
template<class T> inline bool operator==(const UP<T>&s1,const UP<T>&s2)
-{return s1.ptr()==s2.ptr();}
+{
+ return s1.ptr()==s2.ptr();
+}
template<class T> inline bool operator<(const UP<T>&s1,const UP<T>&s2)
-{return s1.ptr() < s2.ptr();}
+{
+ return s1.ptr() < s2.ptr();
+}
template<class T> inline int Hash(const UP<T> &wp)
-{if(wp.ptr())return Hash(*wp);else return 0;}
+{
+ if(wp.ptr())return Hash(*wp);
+ else return 0;
+}
template <class T>
class UPConst : public SmartPointerConst<T>
{
- public:
- UPConst(const T*_p=0)
+public:
+ UPConst(const T*_p=0)
: SmartPointerConst<T>(_p) {}
};
template<class T> inline bool operator==(const UPConst<T>&s1,const UPConst<T>&s2)
-{return s1.ptr()==s2.ptr();}
+{
+ return s1.ptr()==s2.ptr();
+}
template<class T> inline bool operator<(const UPConst<T>&s1,const UPConst<T>&s2)
-{return s1.ptr()<s2.ptr();}
+{
+ return s1.ptr()<s2.ptr();
+}
template<class T> inline int Hash(const UPConst<T> &wp)
-{if(wp.ptr())return Hash(*wp);else return 0;}
+{
+ if(wp.ptr())return Hash(*wp);
+ else return 0;
+}
+
-
template <class T>
class MP : public SmartPointer<T>
{
- public:
- MP(T*_p=0)
+public:
+ MP(T*_p=0)
: SmartPointer<T>(_p) {}
};
template <class T> inline bool operator==(const MP<T>&s1,const MP<T>&s2)
-{assert(s1);assert(s2);return *s1==*s2;}
+{
+ assert(s1);
+ assert(s2);
+ return *s1==*s2;
+}
template <class T> inline bool operator<(const MP<T>&s1,const MP<T>&s2)
-{assert(s1);assert(s2);return *s1 < *s2;}
+{
+ assert(s1);
+ assert(s2);
+ return *s1 < *s2;
+}
template <class T> inline int Hash(const MP<T> &wp)
-{if(wp.ptr())return Hash(*wp);else return 0;}
+{
+ if(wp.ptr())return Hash(*wp);
+ else return 0;
+}
template <class T>
class MPConst : public SmartPointerConst<T>
{
- public:
- MPConst(const T*_p=0)
+public:
+ MPConst(const T*_p=0)
: SmartPointerConst<T>(_p) {}
};
template <class T> inline bool operator==(const MPConst<T>&s1,const MPConst<T>&s2)
-{assert(s1);assert(s2);return *s1== *s2;}
+{
+ assert(s1);
+ assert(s2);
+ return *s1== *s2;
+}
template <class T> inline bool operator<(const MPConst<T>&s1,const MPConst<T>&s2)
-{assert(s1);assert(s2);return *s1 < *s2;}
+{
+ assert(s1);
+ assert(s2);
+ return *s1 < *s2;
+}
template <class T> inline int Hash(const MPConst<T> &wp)
-{if(wp.ptr())return Hash(*wp);else return 0;}
+{
+ if(wp.ptr())return Hash(*wp);
+ else return 0;
+}
-template <class T>
+template <class T>
class DELP : public SmartPointer<T>
{
- private:
+private:
DELP(const DELP<T>&x);
- public:
- const DELP<T>&operator=(DELP<T>&x)
- {
+public:
+ const DELP<T>&operator=(DELP<T>&x) {
delete this->p;
- this->p=x.p;x.p=0;
+ this->p=x.p;
+ x.p=0;
return *this;
}
- ~DELP()
- { delete this->p;this->p=0;}
- DELP(T*_p=0)
+ ~DELP() {
+ delete this->p;
+ this->p=0;
+ }
+ DELP(T*_p=0)
: SmartPointer<T>(_p) {}
- void set(T*_p)
- {
- delete this->p;
- this->p=_p;
- }
- friend bool operator==(const DELP<T>&s1,const DELP<T>&s2)
- {
- return *(s1.p)== *(s2.p);
- }
- friend bool operator<(const DELP<T>&s1,const DELP<T>&s2)
- {
- return *(s1.p) < *(s2.p);
- }
- friend inline int Hash(const DELP<T> &wp)
- {
- if(wp.p)
- return Hash(*wp.p);
- else
- return 0;
- }
+ void set(T*_p) {
+ delete this->p;
+ this->p=_p;
+ }
+ friend bool operator==(const DELP<T>&s1,const DELP<T>&s2) {
+ return *(s1.p)== *(s2.p);
+ }
+ friend bool operator<(const DELP<T>&s1,const DELP<T>&s2) {
+ return *(s1.p) < *(s2.p);
+ }
+ friend inline int Hash(const DELP<T> &wp) {
+ if(wp.p)
+ return Hash(*wp.p);
+ else
+ return 0;
+ }
};
#endif
diff --git a/mgizapp/src/SetArray.h b/mgizapp/src/SetArray.h
index 2a2125f..67f5ed7 100644
--- a/mgizapp/src/SetArray.h
+++ b/mgizapp/src/SetArray.h
@@ -10,148 +10,155 @@ be threadsafe
#include <map>
#include <vector>
#include "defs.h"
-#include "vocab.h"
+#include "vocab.h"
#include <cstdio>
#include <cstdlib>
#include <pthread.h>
#include "syncObj.h"
template <class COUNT, class PROB>
-class LpPair {
+class LpPair
+{
public:
- COUNT count ;
- PROB prob ;
-public: // constructor
- LpPair():count(0), prob(0){} ;
- LpPair(COUNT c, PROB p):count(c), prob(p){};
+ COUNT count ;
+ PROB prob ;
+public: // constructor
+ LpPair():count(0), prob(0) {} ;
+ LpPair(COUNT c, PROB p):count(c), prob(p) {};
} ;
template <class COUNT, class PROB>
-class SetArray{
+class SetArray
+{
public:
- typedef LpPair<COUNT, PROB> CPPair;
+ typedef LpPair<COUNT, PROB> CPPair;
protected:
-
- /*Information stores here*/
- std::vector<std::map<size_t,CPPair> > store;
- std::vector<Mutex> muts;
- size_t nEnglishWord;
- size_t nFrenchWord;
- void _init(){
- store.resize(nEnglishWord);
- muts.resize(nFrenchWord);
- }
-
+
+ /*Information stores here*/
+ std::vector<std::map<size_t,CPPair> > store;
+ std::vector<Mutex> muts;
+ size_t nEnglishWord;
+ size_t nFrenchWord;
+ void _init() {
+ store.resize(nEnglishWord);
+ muts.resize(nFrenchWord);
+ }
+
public:
-
- /*
- Get reference, not creating
- */
- CPPair* find(size_t fi, size_t si){
- /*HERE: lock, unlock after we get the pointer*/
- muts[fi].lock();
- /* Sync-ed */
- std::map<size_t,CPPair>& w = store[fi];
- typename std::map<size_t,CPPair>::iterator it = w.find((size_t)si);
- CPPair* q = ( it!=store[fi].end() ? &(it->second) : 0);
+
+ /*
+ Get reference, not creating
+ */
+ CPPair* find(size_t fi, size_t si) {
+ /*HERE: lock, unlock after we get the pointer*/
+ muts[fi].lock();
+ /* Sync-ed */
+ std::map<size_t,CPPair>& w = store[fi];
+ typename std::map<size_t,CPPair>::iterator it = w.find((size_t)si);
+ CPPair* q = ( it!=store[fi].end() ? &(it->second) : 0);
// for(it = w.begin(); it!=w.end();it++){
- // cout << it->first << endl;
- // }
- /* End Synced*/
- muts[fi].unlock();
- return q;
- };
-
- /*
- Get reference, creating it
- */
- inline CPPair& findRef(size_t fi, size_t si){
- std::map<size_t,CPPair> &x = store[fi];
- muts[fi].lock();
- /* Sync-ed */
- CPPair& ref= x[si];
- /* End Synced */
- muts[fi].unlock();
- };
-
-
- void insert(size_t fi, size_t si, COUNT count = 0, PROB prob = 0){
- muts[fi].lock();
- /*Syced*/
- std::map<size_t,CPPair> &x = store[fi];
- CPPair& v= x[si];
- v.count = count;
- v.prob = prob;
- muts[fi].unlock();
- }
-
- void incCount(size_t e, size_t f, COUNT inc)
- // increments the count of the given word pair. if the pair does not exist,
- // it creates it with the given value.
- {
- if( inc ){
- std::map<size_t,CPPair> &x = store[e];
- muts[e].lock();
- CPPair& ref= x[f];
- ref.count += inc;
- muts[e].unlock();
- }
- }
-
- PROB getProb(size_t e, size_t f) const
- // read probability value for P(fj/ei) from the hash table
- // if pair does not exist, return floor value PROB_SMOOTH
- {
- muts[e].lock();
- typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
- PROB b;
- if(it == store[e].end())
- b = PROB_SMOOTH;
- else
- b=max((it->second).prob, PROB_SMOOTH);
- muts[e].unlock();
- return b;
- }
-
- COUNT getCount(size_t e, size_t f) const
- /* read count value for entry pair (fj/ei) from the hash table */
- {
- muts[e].lock();
- typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
- COUNT c;
- if(it == store[e].end())
- c = 0;
- else
- c = ((*it).second).count;
- muts[e].unlock();
- }
-
- void erase(size_t e, size_t f)
- // In: a source and a target token ids.
- // removes the entry with that pair from table
- {
- muts[e].lock();
- store[e].erase(f);
- muts[e].unlock();
- };
-
- inline void setNumberOfEnlish(size_t e){nEnglishWord=e;_init();};
- inline void setNumberOfFrench(size_t f){nFrenchWord = f;};
-
- const std::map<size_t,CPPair>& getMap(size_t i) const{
- return store[i];
- }
-
- std::map<size_t,CPPair>& getMap1(size_t i){
- return store[i];
- }
-
- SetArray(size_t e, size_t f): nEnglishWord(e), nFrenchWord(f){
- _init();
+// cout << it->first << endl;
+// }
+ /* End Synced*/
+ muts[fi].unlock();
+ return q;
+ };
+
+ /*
+ Get reference, creating it
+ */
+ inline CPPair& findRef(size_t fi, size_t si) {
+ std::map<size_t,CPPair> &x = store[fi];
+ muts[fi].lock();
+ /* Sync-ed */
+ CPPair& ref= x[si];
+ /* End Synced */
+ muts[fi].unlock();
+ };
+
+
+ void insert(size_t fi, size_t si, COUNT count = 0, PROB prob = 0) {
+ muts[fi].lock();
+ /*Syced*/
+ std::map<size_t,CPPair> &x = store[fi];
+ CPPair& v= x[si];
+ v.count = count;
+ v.prob = prob;
+ muts[fi].unlock();
+ }
+
+ void incCount(size_t e, size_t f, COUNT inc)
+ // increments the count of the given word pair. if the pair does not exist,
+ // it creates it with the given value.
+ {
+ if( inc ) {
+ std::map<size_t,CPPair> &x = store[e];
+ muts[e].lock();
+ CPPair& ref= x[f];
+ ref.count += inc;
+ muts[e].unlock();
}
+ }
+
+ PROB getProb(size_t e, size_t f) const
+ // read probability value for P(fj/ei) from the hash table
+ // if pair does not exist, return floor value PROB_SMOOTH
+ {
+ muts[e].lock();
+ typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
+ PROB b;
+ if(it == store[e].end())
+ b = PROB_SMOOTH;
+ else
+ b=max((it->second).prob, PROB_SMOOTH);
+ muts[e].unlock();
+ return b;
+ }
+
+ COUNT getCount(size_t e, size_t f) const
+ /* read count value for entry pair (fj/ei) from the hash table */
+ {
+ muts[e].lock();
+ typename std::map<size_t,CPPair >::const_iterator it = store[e].find(f);
+ COUNT c;
+ if(it == store[e].end())
+ c = 0;
+ else
+ c = ((*it).second).count;
+ muts[e].unlock();
+ }
+
+ void erase(size_t e, size_t f)
+ // In: a source and a target token ids.
+ // removes the entry with that pair from table
+ {
+ muts[e].lock();
+ store[e].erase(f);
+ muts[e].unlock();
+ };
+
+ inline void setNumberOfEnlish(size_t e) {
+ nEnglishWord=e;
+ _init();
+ };
+ inline void setNumberOfFrench(size_t f) {
+ nFrenchWord = f;
+ };
+
+ const std::map<size_t,CPPair>& getMap(size_t i) const {
+ return store[i];
+ }
+
+ std::map<size_t,CPPair>& getMap1(size_t i) {
+ return store[i];
+ }
+
+ SetArray(size_t e, size_t f): nEnglishWord(e), nFrenchWord(f) {
+ _init();
+ }
};
diff --git a/mgizapp/src/TTables.cpp b/mgizapp/src/TTables.cpp
index 1e4f3b6..dd62310 100644
--- a/mgizapp/src/TTables.cpp
+++ b/mgizapp/src/TTables.cpp
@@ -8,16 +8,16 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
-
+
*/
#include "TTables.h"
#include "Parameter.h"
@@ -33,143 +33,142 @@ GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF,"COUNTINCREASE CUTOFF","countCutof
// To output to STDOUT, submit filename as NULL
template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printCountTable(const char *filename,
- const Vector<WordEntry>& evlist,
- const Vector<WordEntry>& fvlist,
- const bool actual) const
+void tmodel<COUNT, PROB>::printCountTable(const char *filename,
+ const Vector<WordEntry>& evlist,
+ const Vector<WordEntry>& fvlist,
+ const bool actual) const
{
- ostream *tof;
-
- if(filename)
- tof = new ofstream(filename);
- else
- tof = & cout;
-
- ostream &of = *tof;
- /* for(unsigned int i=0;i<es.size()-1;++i)
- for(unsigned int j=es[i];j<es[i+1];++j)
- {
- const CPPair&x=fs[j].second;
- WordIndex e=i,f=fs[j].first;
- if( actual )
- of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
- else
- of << e << ' ' << f << ' ' << x.prob << '\n';
- }*/
- for(unsigned int i=0;i<lexmat.size();++i){
- if( lexmat[i] ){
- for(unsigned int j=0;j<lexmat[i]->size();++j)
- {
- const CPPair&x=(*lexmat[i])[j].second;
- WordIndex e=i,f=(*lexmat[i])[j].first;
- if( x.prob>MINCOUNTINCREASE ){
- if( actual ){
- of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.count << '\n';
- }else{
- of << e << ' ' << f << ' ' << x.count << '\n';
- }
- }
- }
- }
- }
-
- if(filename){
- ((ofstream*)tof)->close();
- delete tof;
- }
+ ostream *tof;
+
+ if(filename)
+ tof = new ofstream(filename);
+ else
+ tof = & cout;
+
+ ostream &of = *tof;
+ /* for(unsigned int i=0;i<es.size()-1;++i)
+ for(unsigned int j=es[i];j<es[i+1];++j)
+ {
+ const CPPair&x=fs[j].second;
+ WordIndex e=i,f=fs[j].first;
+ if( actual )
+ of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
+ else
+ of << e << ' ' << f << ' ' << x.prob << '\n';
+ }*/
+ for(unsigned int i=0; i<lexmat.size(); ++i) {
+ if( lexmat[i] ) {
+ for(unsigned int j=0; j<lexmat[i]->size(); ++j) {
+ const CPPair&x=(*lexmat[i])[j].second;
+ WordIndex e=i,f=(*lexmat[i])[j].first;
+ if( x.prob>MINCOUNTINCREASE ) {
+ if( actual ) {
+ of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.count << '\n';
+ } else {
+ of << e << ' ' << f << ' ' << x.count << '\n';
+ }
+ }
+ }
+ }
+ }
+
+ if(filename) {
+ ((ofstream*)tof)->close();
+ delete tof;
+ }
}
template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printProbTable(const char *filename,
- const Vector<WordEntry>& evlist,
- const Vector<WordEntry>& fvlist,
- const bool actual) const
+void tmodel<COUNT, PROB>::printProbTable(const char *filename,
+ const Vector<WordEntry>& evlist,
+ const Vector<WordEntry>& fvlist,
+ const bool actual) const
{
- ofstream of(filename);
- /* for(unsigned int i=0;i<es.size()-1;++i)
- for(unsigned int j=es[i];j<es[i+1];++j)
- {
- const CPPair&x=fs[j].second;
- WordIndex e=i,f=fs[j].first;
- if( actual )
- of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
- else
- of << e << ' ' << f << ' ' << x.prob << '\n';
- }*/
- for(unsigned int i=0;i<lexmat.size();++i){
- if( lexmat[i] ){
- for(unsigned int j=0;j<lexmat[i]->size();++j)
- {
- const CPPair&x=(*lexmat[i])[j].second;
- WordIndex e=i,f=(*lexmat[i])[j].first;
- if( x.prob>PROB_SMOOTH ){
- if( actual ){
- of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
- }else{
- of << e << ' ' << f << ' ' << x.prob << '\n';
- }
- }
- }
- }
- }
+ ofstream of(filename);
+ /* for(unsigned int i=0;i<es.size()-1;++i)
+ for(unsigned int j=es[i];j<es[i+1];++j)
+ {
+ const CPPair&x=fs[j].second;
+ WordIndex e=i,f=fs[j].first;
+ if( actual )
+ of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
+ else
+ of << e << ' ' << f << ' ' << x.prob << '\n';
+ }*/
+ for(unsigned int i=0; i<lexmat.size(); ++i) {
+ if( lexmat[i] ) {
+ for(unsigned int j=0; j<lexmat[i]->size(); ++j) {
+ const CPPair&x=(*lexmat[i])[j].second;
+ WordIndex e=i,f=(*lexmat[i])[j].first;
+ if( x.prob>PROB_SMOOTH ) {
+ if( actual ) {
+ of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
+ } else {
+ of << e << ' ' << f << ' ' << x.prob << '\n';
+ }
+ }
+ }
+ }
+ }
}
template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printProbTableInverse(const char *,
- const Vector<WordEntry>&,
- const Vector<WordEntry>&,
- const double,
- const double,
- const bool ) const
+void tmodel<COUNT, PROB>::printProbTableInverse(const char *,
+ const Vector<WordEntry>&,
+ const Vector<WordEntry>&,
+ const double,
+ const double,
+ const bool ) const
{
}
template <class COUNT, class PROB>
void tmodel<COUNT, PROB>::normalizeTable(const vcbList&, const vcbList&, int)
{
- for(unsigned int i=0;i<lexmat.size();++i){
- double c=0.0;
- if( lexmat[i] ){
- unsigned int lSize=lexmat[i]->size();
- for(unsigned int j=0;j<lSize;++j)
- c+=(*lexmat[i])[j].second.count;
- for(unsigned int j=0;j<lSize;++j) {
- if( c==0 )
- (*lexmat[i])[j].second.prob=1.0/(lSize);
- else
- (*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c;
- (*lexmat[i])[j].second.count=0;
- }
- }
+ for(unsigned int i=0; i<lexmat.size(); ++i) {
+ double c=0.0;
+ if( lexmat[i] ) {
+ unsigned int lSize=lexmat[i]->size();
+ for(unsigned int j=0; j<lSize; ++j)
+ c+=(*lexmat[i])[j].second.count;
+ for(unsigned int j=0; j<lSize; ++j) {
+ if( c==0 )
+ (*lexmat[i])[j].second.prob=1.0/(lSize);
+ else
+ (*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c;
+ (*lexmat[i])[j].second.count=0;
+ }
}
+ }
}
template <class COUNT, class PROB>
-bool tmodel<COUNT, PROB>::readProbTable(const char *filename){
- /* This function reads the t table from a file.
- Each line is of the format: source_word_id target_word_id p(target_word|source_word)
- This is the inverse operation of the printTable function.
- NAS, 7/11/99
- */
- ifstream inf(filename);
- cerr << "Reading t prob. table from " << filename << "\n";
- if (!inf) {
- cerr << "\nERROR: Cannot open " << filename << "\n";
- return false;
- }
- WordIndex src_id, trg_id;
- PROB prob;
- int nEntry=0;
- while (inf >> src_id >> trg_id >> prob) {
- insert(src_id, trg_id, 0.0, prob);
- nEntry++;
- }
- cerr << "Read " << nEntry << " entries in prob. table.\n";
- return true;
+bool tmodel<COUNT, PROB>::readProbTable(const char *filename)
+{
+ /* This function reads the t table from a file.
+ Each line is of the format: source_word_id target_word_id p(target_word|source_word)
+ This is the inverse operation of the printTable function.
+ NAS, 7/11/99
+ */
+ ifstream inf(filename);
+ cerr << "Reading t prob. table from " << filename << "\n";
+ if (!inf) {
+ cerr << "\nERROR: Cannot open " << filename << "\n";
+ return false;
+ }
+ WordIndex src_id, trg_id;
+ PROB prob;
+ int nEntry=0;
+ while (inf >> src_id >> trg_id >> prob) {
+ insert(src_id, trg_id, 0.0, prob);
+ nEntry++;
+ }
+ cerr << "Read " << nEntry << " entries in prob. table.\n";
+ return true;
}
-template class tmodel<COUNT,PROB> ;
+template class tmodel<COUNT,PROB> ;
/* ---------------- End of Method Definitions of class tmodel ---------------*/
diff --git a/mgizapp/src/TTables.h b/mgizapp/src/TTables.h
index 8298deb..ff52a2a 100644
--- a/mgizapp/src/TTables.h
+++ b/mgizapp/src/TTables.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -34,10 +34,10 @@ USA.
#include "defs.h"
-#include "vocab.h"
+#include "vocab.h"
#include <cassert>
-
+
#include <iostream>
#include <algorithm>
#include <functional>
@@ -60,7 +60,7 @@ using __gnu_cxx::hash_map;
/* The tables defined in the following classes are defined as hash tables. For
- example. the t-table is a hash function of a word pair; an alignment is
+ example. the t-table is a hash function of a word pair; an alignment is
a hash function of a vector of integer numbers (sentence positions) and so
on */
@@ -74,49 +74,49 @@ typedef pair<WordIndex, WordIndex> wordPairIds;
class hashpair : public unary_function< pair<WordIndex, WordIndex>, size_t >
{
public:
- size_t operator() (const pair<WordIndex, WordIndex>& key) const
- {
- return (size_t) MAX_W*key.first + key.second; /* hash function and it
- is guarnteed to have
- unique id for each
+ size_t operator() (const pair<WordIndex, WordIndex>& key) const {
+ return (size_t) MAX_W*key.first + key.second; /* hash function and it
+ is guarnteed to have
+ unique id for each
unique pair */
- }
- #ifdef WIN32
- inline bool operator() (const pair<WordIndex, WordIndex>& key, const pair<WordIndex, WordIndex>& key2){
- return key.first==key2.first && key.second==key2.second;
- }
- enum
- { // parameters for hash table
- bucket_size = 1 // 0 < bucket_size
- };
- #endif
+ }
+#ifdef WIN32
+ inline bool operator() (const pair<WordIndex, WordIndex>& key, const pair<WordIndex, WordIndex>& key2) {
+ return key.first==key2.first && key.second==key2.second;
+ }
+ enum {
+ // parameters for hash table
+ bucket_size = 1 // 0 < bucket_size
+ };
+#endif
};
/* ------------------ Class Prototype Definitions ---------------------------*
Class Name: tmodel
- Objective: This defines the underlying data structur for t Tables and t
+ Objective: This defines the underlying data structur for t Tables and t
Count Tables. They are defined as a hash table. Each entry in the hash table
- is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The
- probability and the count are represented as log integer probability as
- defined by the class LogProb .
+ is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The
+ probability and the count are represented as log integer probability as
+ defined by the class LogProb .
- This class is used to represents t Tables (probabiliity) and n (fertility
+ This class is used to represents t Tables (probabiliity) and n (fertility
Tables and also their corresponding count tables .
-
+
*---------------------------------------------------------------------------*/
//typedef float COUNT ;
//typedef LogProb PROB ;
template <class COUNT, class PROB>
-class LpPair {
+class LpPair
+{
public:
- COUNT count ;
- PROB prob ;
-public: // constructor
- LpPair():count(0), prob(0){} ;
- LpPair(COUNT c, PROB p):count(c), prob(p){};
+ COUNT count ;
+ PROB prob ;
+public: // constructor
+ LpPair():count(0), prob(0) {} ;
+ LpPair(COUNT c, PROB p):count(c), prob(p) {};
} ;
template<class T>
@@ -133,7 +133,7 @@ T*mbinary_search(T*x,T*y,unsigned int val)
return mbinary_search(x,mid,val);
else
return mbinary_search(mid,y,val);
-
+
}
template<class T>
@@ -150,133 +150,136 @@ const T*mbinary_search(const T*x,const T*y,unsigned int val)
return mbinary_search(x,mid,val);
else
return mbinary_search(mid,y,val);
-
+
}
template <class COUNT, class PROB>
-class tmodel{
- typedef LpPair<COUNT, PROB> CPPair;
+class tmodel
+{
+ typedef LpPair<COUNT, PROB> CPPair;
public:
- bool recordDiff;
-
+ bool recordDiff;
+
public:
- int noEnglishWords; // total number of unique source words
- int noFrenchWords; // total number of unique target words
- //vector<pair<unsigned int,CPPair> > fs;
- //vector<unsigned int> es;
- vector< vector<pair<unsigned int,CPPair> >* > lexmat;
- vector< Mutex* > mutex;
-
- void erase(WordIndex e, WordIndex f){
- CPPair *p=find(e,f);
- if(p)
- *p=CPPair(0,0);
- };
-
- CPPair*find(int e,int f){
- //pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
- //pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
- if(e>=lexmat.size()||lexmat[e]==NULL){
- return NULL;
- }
- pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
- pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
- pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
- if( x==0 ){
- //cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
- //abort();
- return 0;
- }
- return &(x->second);
- }
-
- const CPPair*find(int e,int f)const{
- if(e>=lexmat.size()||lexmat[e]==NULL){
- return NULL;
- }
- const pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
- const pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
- //const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
- //const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
- const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
- if( x==0 ){
- //cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
- //abort();
- return 0;
- }
-
- return &(x->second);
- }
+ int noEnglishWords; // total number of unique source words
+ int noFrenchWords; // total number of unique target words
+ //vector<pair<unsigned int,CPPair> > fs;
+ //vector<unsigned int> es;
+ vector< vector<pair<unsigned int,CPPair> >* > lexmat;
+ vector< Mutex* > mutex;
+
+ void erase(WordIndex e, WordIndex f) {
+ CPPair *p=find(e,f);
+ if(p)
+ *p=CPPair(0,0);
+ };
+
+ CPPair*find(int e,int f) {
+ //pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
+ //pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
+ if(e>=lexmat.size()||lexmat[e]==NULL) {
+ return NULL;
+ }
+ pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
+ pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
+ pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
+ if( x==0 ) {
+ //cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
+ //abort();
+ return 0;
+ }
+ return &(x->second);
+ }
+
+ const CPPair*find(int e,int f)const {
+ if(e>=lexmat.size()||lexmat[e]==NULL) {
+ return NULL;
+ }
+ const pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
+ const pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
+ //const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
+ //const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
+ const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
+ if( x==0 ) {
+ //cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
+ //abort();
+ return 0;
+ }
+
+ return &(x->second);
+ }
public:
- void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
- CPPair* found = find(e,f);
- if(found)
- *found=CPPair(cval,pval);
- }
-
- CPPair*getPtr(int e,int f){return find(e,f);}
-
- tmodel(){};
- tmodel(const string&fn) {
- recordDiff = false;
- int count=0,count2=0;
- ifstream infile2(fn.c_str());
- cerr << "Inputfile in " << fn << endl;
- int e,f,olde=-1,oldf=-1;
- pair<unsigned int,CPPair> cp;
- vector< pair<unsigned int,CPPair> > cps;
- while(infile2>>e>>f){
- cp.first=f;
- assert(e>=olde);
- assert(e>olde ||f>oldf);
- if( e!=olde&&olde>=0 ){
- int oldsize=lexmat.size();
- lexmat.resize(olde+1);
- for(unsigned int i=oldsize;i<lexmat.size();++i)
- lexmat[i]=0;
- lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
- cps.clear();
- if( !((*lexmat[olde]).size()==(*lexmat[olde]).capacity()) )
- cerr << "eRROR: waste of memory: " << (*lexmat[olde]).size() << " " << (*lexmat[olde]).capacity() << endl;
- count2+=lexmat[olde]->capacity();
- }
- cps.push_back(cp);
- olde=e;
- oldf=f;
- count++;
- }
- lexmat.resize(olde+1);
- lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
- count2+=lexmat[olde]->capacity();
- cout << "There are " << count << " " << count2 << " entries in table" << '\n';
- mutex.resize(lexmat.size());
- for(int _i = 0; _i< lexmat.size();_i++){
- mutex[_i] = new Mutex();
- }
- /* Create mutex */
- }
-
- ~tmodel(){
- for(int _i = 0; _i< lexmat.size();_i++){
- delete mutex[_i];
- }
-
- }
+ void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0) {
+ CPPair* found = find(e,f);
+ if(found)
+ *found=CPPair(cval,pval);
+ }
+
+ CPPair*getPtr(int e,int f) {
+ return find(e,f);
+ }
+
+ tmodel() {};
+ tmodel(const string&fn) {
+ recordDiff = false;
+ int count=0,count2=0;
+ ifstream infile2(fn.c_str());
+ cerr << "Inputfile in " << fn << endl;
+ int e,f,olde=-1,oldf=-1;
+ pair<unsigned int,CPPair> cp;
+ vector< pair<unsigned int,CPPair> > cps;
+ while(infile2>>e>>f) {
+ cp.first=f;
+ assert(e>=olde);
+ assert(e>olde ||f>oldf);
+ if( e!=olde&&olde>=0 ) {
+ int oldsize=lexmat.size();
+ lexmat.resize(olde+1);
+ for(unsigned int i=oldsize; i<lexmat.size(); ++i)
+ lexmat[i]=0;
+ lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
+ cps.clear();
+ if( !((*lexmat[olde]).size()==(*lexmat[olde]).capacity()) )
+ cerr << "eRROR: waste of memory: " << (*lexmat[olde]).size() << " " << (*lexmat[olde]).capacity() << endl;
+ count2+=lexmat[olde]->capacity();
+ }
+ cps.push_back(cp);
+ olde=e;
+ oldf=f;
+ count++;
+ }
+ lexmat.resize(olde+1);
+ lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
+ count2+=lexmat[olde]->capacity();
+ cout << "There are " << count << " " << count2 << " entries in table" << '\n';
+ mutex.resize(lexmat.size());
+ for(int _i = 0; _i< lexmat.size(); _i++) {
+ mutex[_i] = new Mutex();
+ }
+ /* Create mutex */
+ }
+
+ ~tmodel() {
+ for(int _i = 0; _i< lexmat.size(); _i++) {
+ delete mutex[_i];
+ }
+
+ }
/* tmodel(const string&fn)
{
size_t count=0;
{
- ifstream infile1(fn.c_str());
- if( !infile1 )
- {
- cerr << "ERROR: can't read coocurrence file " << fn << '\n';
- abort();
- }
- int e,f;
- while(infile1>>e>>f)
- count++;
+ ifstream infile1(fn.c_str());
+ if( !infile1 )
+ {
+ cerr << "ERROR: can't read coocurrence file " << fn << '\n';
+ abort();
+ }
+ int e,f;
+ while(infile1>>e>>f)
+ count++;
}
cout << "There are " << count << " entries in table" << '\n';
ifstream infile2(fn.c_str());
@@ -285,64 +288,63 @@ public:
pair<unsigned int,CPPair> cp;
count=0;
while(infile2>>e>>f)
- {
- assert(e>=olde);
- assert(e>olde ||f>oldf);
- if( e!=olde )
- {
- es.resize(e+1);
- for(unsigned int i=olde+1;int(i)<=e;++i)
- es[i]=count;
- }
- cp.first=f;
- assert(count<fs.size());
- fs[count]=cp;
- //fs.push_back(cp);
- olde=e;
- oldf=f;
- count++;
- }
+ {
+ assert(e>=olde);
+ assert(e>olde ||f>oldf);
+ if( e!=olde )
+ {
+ es.resize(e+1);
+ for(unsigned int i=olde+1;int(i)<=e;++i)
+ es[i]=count;
+ }
+ cp.first=f;
+ assert(count<fs.size());
+ fs[count]=cp;
+ //fs.push_back(cp);
+ olde=e;
+ oldf=f;
+ count++;
+ }
assert(count==fs.size());
es.push_back(fs.size());
cout << fs.size() << " " << count << " coocurrences read" << '\n';
}*/
-
- void incCount(WordIndex e, WordIndex f, COUNT inc) {
- if( inc ){
- CPPair *p=find(e,f);
- if( p ){
- mutex[e]->lock();
- p->count += inc ;
- mutex[e]->unlock();
- }
- }
- }
- PROB getProb(WordIndex e, WordIndex f) const{
- const CPPair *p=find(e,f);
- if( p )
- return max(p->prob, PROB_SMOOTH);
- else
- return PROB_SMOOTH;
- }
-
- COUNT getCount(WordIndex e, WordIndex f) const
- {
- const CPPair *p=find(e,f);
- if( p )
- return p->count;
- else
- return 0.0;
+ void incCount(WordIndex e, WordIndex f, COUNT inc) {
+ if( inc ) {
+ CPPair *p=find(e,f);
+ if( p ) {
+ mutex[e]->lock();
+ p->count += inc ;
+ mutex[e]->unlock();
+ }
}
+ }
+
+ PROB getProb(WordIndex e, WordIndex f) const {
+ const CPPair *p=find(e,f);
+ if( p )
+ return max(p->prob, PROB_SMOOTH);
+ else
+ return PROB_SMOOTH;
+ }
+
+ COUNT getCount(WordIndex e, WordIndex f) const {
+ const CPPair *p=find(e,f);
+ if( p )
+ return p->count;
+ else
+ return 0.0;
+ }
void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
- void printProbTableInverse(const char *filename,
- const Vector<WordEntry>& evlist,
- const Vector<WordEntry>& fvlist,
- const double eTotal,
- const double fTotal,
- const bool actual = false ) const;
+ void printProbTableInverse(const char *filename,
+ const Vector<WordEntry>& evlist,
+ const Vector<WordEntry>& fvlist,
+ const double eTotal,
+ const double fTotal,
+ const bool actual = false ) const;
void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
bool readProbTable(const char *filename);
bool readSubSampledProbTable(const char* filename, std::set<WordIndex> &e, std::set<WordIndex> &f);
diff --git a/mgizapp/src/Vector.h b/mgizapp/src/Vector.h
index 5943181..839ca9e 100644
--- a/mgizapp/src/Vector.h
+++ b/mgizapp/src/Vector.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -41,7 +41,7 @@ Franz Josef Och (30/07/99)
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
{
o << "Vector(" << a.size() << "){ ";
- for(unsigned int iii=0;iii<a.size();iii++)
+ for(unsigned int iii=0; iii<a.size(); iii++)
o << " " << iii<< ": " << a[iii]<<" ;";
return o << "}\n";
}
@@ -55,256 +55,243 @@ template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
template<class T> class Vector
{
private:
- T *p;
- int realSize;
- int maxWritten;
-
- void copy(T *a, const T *b, int n);
- void copy(T *a, T *b, int n);
- void _expand();
+ T *p;
+ int realSize;
+ int maxWritten;
+
+ void copy(T *a, const T *b, int n);
+ void copy(T *a, T *b, int n);
+ void _expand();
public:
- Vector()
- : p(0), realSize(0), maxWritten(-1)
- {
+ Vector()
+ : p(0), realSize(0), maxWritten(-1) {
#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY: " << this<<" "<<(void*)p << '\n';
+ cout << "MAKE ARRAY: " << this<<" "<<(void*)p << '\n';
#endif
- }
+ }
Vector(const Vector<T> &x)
- : p(new T[x.maxWritten+1]), realSize(x.maxWritten+1), maxWritten(x.maxWritten)
- {
- memo_new(p);
- copy(p, x.p, realSize);
+ : p(new T[x.maxWritten+1]), realSize(x.maxWritten+1), maxWritten(x.maxWritten) {
+ memo_new(p);
+ copy(p, x.p, realSize);
#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< '\n';
+ cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< '\n';
#endif
- }
+ }
explicit Vector(int n)
- : p(new T[n]), realSize(n), maxWritten(n-1)
- {
- memo_new(p);
+ : p(new T[n]), realSize(n), maxWritten(n-1) {
+ memo_new(p);
#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- }
+ cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << '\n';
+#endif
+ }
Vector(int n, const T&_init)
- : p(new T[n]), realSize(n), maxWritten(n-1)
- {
- memo_new(p);
- for(int iii=0;iii<n;iii++)p[iii]=_init;
+ : p(new T[n]), realSize(n), maxWritten(n-1) {
+ memo_new(p);
+ for(int iii=0; iii<n; iii++)p[iii]=_init;
#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- }
-
- ~Vector()
- {
+ cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << '\n';
+#endif
+ }
+
+ ~Vector() {
#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- delete [] p;
- memo_del(p, 1);
+ cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << '\n';
+#endif
+ delete [] p;
+ memo_del(p, 1);
#ifndef NDEBUG
- p=0;realSize=-1;maxWritten=-1;
+ p=0;
+ realSize=-1;
+ maxWritten=-1;
#endif
- }
-
- Vector<T>& operator=(const Vector<T>&x)
- {
- if( this!= &x )
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- delete [] p;
- memo_del(p, 1);
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- p = new T[realSize];
- memo_new(p);
- copy(p, x.p, realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- }
- return *this;
- }
-
- Vector<T>& operator=(Vector<T>&x)
- {
- if( this!= &x )
- {
+ }
+
+ Vector<T>& operator=(const Vector<T>&x) {
+ if( this!= &x ) {
#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- delete [] p;
- memo_del(p, 1);
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- p = new T[realSize];
- memo_new(p);
- copy(p, x.p, realSize);
+ cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
+#endif
+ delete [] p;
+ memo_del(p, 1);
+ realSize = x.maxWritten+1;
+ maxWritten = x.maxWritten;
+ p = new T[realSize];
+ memo_new(p);
+ copy(p, x.p, realSize);
#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- }
- return *this;
- }
-
- void allowAccess(int n)
- {
- while( realSize<=n )
- _expand();
- maxWritten=max(maxWritten, n);
- assert( maxWritten<realSize );
- }
- void resize(int n)
- {
- while( realSize<n )
- _expand();
- maxWritten=n-1;
- }
- void clear()
- {
- resize(0);
- }
- void reserve(int n)
- {
- int maxOld=maxWritten;
- resize(n);
- maxWritten=maxOld;
- }
- void sort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p, p+until);
- }
- void invsort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p, p+until, greater<T>());
+ cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
+#endif
}
- void init(int n, const T&_init)
- {
+ return *this;
+ }
+
+ Vector<T>& operator=(Vector<T>&x) {
+ if( this!= &x ) {
#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
- delete []p;
+ cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
+#endif
+ delete [] p;
memo_del(p, 1);
- p=new T[n];
+ realSize = x.maxWritten+1;
+ maxWritten = x.maxWritten;
+ p = new T[realSize];
memo_new(p);
- realSize=n;
- maxWritten=n-1;
- for(int iii=0;iii<n;iii++)p[iii]=_init;
+ copy(p, x.p, realSize);
#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
+ cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
+#endif
}
- inline unsigned int size() const
- {assert( maxWritten<realSize );
- return maxWritten+1;}
- inline int low() const
- { return 0; }
- inline int high() const
- { return maxWritten; }
+ return *this;
+ }
+
+ void allowAccess(int n) {
+ while( realSize<=n )
+ _expand();
+ maxWritten=max(maxWritten, n);
+ assert( maxWritten<realSize );
+ }
+ void resize(int n) {
+ while( realSize<n )
+ _expand();
+ maxWritten=n-1;
+ }
+ void clear() {
+ resize(0);
+ }
+ void reserve(int n) {
+ int maxOld=maxWritten;
+ resize(n);
+ maxWritten=maxOld;
+ }
+ void sort(int until=-1) {
+ if( until== -1 ) until=size();
+ std::sort(p, p+until);
+ }
+ void invsort(int until=-1) {
+ if( until== -1 ) until=size();
+ std::sort(p, p+until, greater<T>());
+ }
+ void init(int n, const T&_init) {
+#ifdef VERY_ARRAY_DEBUG
+ cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
+#endif
+ delete []p;
+ memo_del(p, 1);
+ p=new T[n];
+ memo_new(p);
+ realSize=n;
+ maxWritten=n-1;
+ for(int iii=0; iii<n; iii++)p[iii]=_init;
+#ifdef VERY_ARRAY_DEBUG
+ cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
+#endif
+ }
+ inline unsigned int size() const {
+ assert( maxWritten<realSize );
+ return maxWritten+1;
+ }
+ inline int low() const {
+ return 0;
+ }
+ inline int high() const {
+ return maxWritten;
+ }
int findMax() const;
int findMin() const;
void errorAccess(int n) const;
- inline T*getPointerToData(){return p;}
- inline T*begin(){return p;}
- inline T*end(){return p+maxWritten+1;}
- inline T& operator[](int n)
- {
+ inline T*getPointerToData() {
+ return p;
+ }
+ inline T*begin() {
+ return p;
+ }
+ inline T*end() {
+ return p+maxWritten+1;
+ }
+ inline T& operator[](int n) {
#ifndef NDEBUG
- if( n<0 || n>maxWritten )
- errorAccess(n);
+ if( n<0 || n>maxWritten )
+ errorAccess(n);
#endif
- return p[n];
- }
- inline const T& operator[](int n) const
- {
+ return p[n];
+ }
+ inline const T& operator[](int n) const {
#ifndef NDEBUG
- if(n<0 || n>maxWritten )
- errorAccess(n);
+ if(n<0 || n>maxWritten )
+ errorAccess(n);
#endif
- return p[n];
- }
- inline const T& get(int n) const
- {
+ return p[n];
+ }
+ inline const T& get(int n) const {
#ifndef NDEBUG
- if(n<0 || n>maxWritten )
- errorAccess(n);
-#endif
- return p[n];
+ if(n<0 || n>maxWritten )
+ errorAccess(n);
+#endif
+ return p[n];
+ }
+ const T&top(int n=0) const {
+ return (*this)[maxWritten-n];
+ }
+ T&top(int n=0) {
+ return (*this)[maxWritten-n];
+ }
+ const T&back(int n=0) const {
+ return (*this)[maxWritten-n];
+ }
+ T&back(int n=0) {
+ return (*this)[maxWritten-n];
+ }
+ T&push_back(const T&x) {
+ allowAccess(maxWritten+1);
+ (*this)[maxWritten]=x;
+ return top();
+ }
+ bool writeTo(ostream&out) const {
+ out << "Vector ";
+ out << size() << " ";
+ //out << a << '\n';
+ for(int iv=0; iv<=maxWritten; iv++) {
+ writeOb(out, (*this)[iv]);
+ out << '\n';
}
- const T&top(int n=0) const
- {return (*this)[maxWritten-n];}
- T&top(int n=0)
- {return (*this)[maxWritten-n];}
- const T&back(int n=0) const
- {return (*this)[maxWritten-n];}
- T&back(int n=0)
- {return (*this)[maxWritten-n];}
- T&push_back(const T&x)
- {
- allowAccess(maxWritten+1);
- (*this)[maxWritten]=x;
- return top();
+ return 1;
+ }
+ bool readFrom(istream&in) {
+ string s;
+ if( !in ) {
+ cerr << "ERROR(Vector): file cannot be opened.\n";
+ return 0;
}
- bool writeTo(ostream&out) const
- {
- out << "Vector ";
- out << size() << " ";
- //out << a << '\n';
- for(int iv=0;iv<=maxWritten;iv++)
- {
- writeOb(out, (*this)[iv]);
- out << '\n';
- }
- return 1;
+ in >> s;
+ if( !(s=="Vector") ) {
+ cerr << "ERROR(Vector): Vector!='"<<s<<"'\n";
+ return 0;
}
- bool readFrom(istream&in)
- {
- string s;
- if( !in )
- {
- cerr << "ERROR(Vector): file cannot be opened.\n";
- return 0;
- }
- in >> s;
- if( !(s=="Vector") )
- {
- cerr << "ERROR(Vector): Vector!='"<<s<<"'\n";
- return 0;
- }
- int biggest;
- in >> biggest;
- // in >> a;
- resize(biggest);
- for(int iv=0;iv<size();iv++)
- {
- readOb(in, (*this)[iv]);
- }
- return 1;
+ int biggest;
+ in >> biggest;
+ // in >> a;
+ resize(biggest);
+ for(int iv=0; iv<size(); iv++) {
+ readOb(in, (*this)[iv]);
}
+ return 1;
+ }
};
template<class T> bool operator==(const Vector<T> &x, const Vector<T> &y)
{
if( &x == &y )
return 1;
- else
- {
- if( y.size()!=x.size() )
- return 0;
- else
- {
- for(unsigned int iii=0;iii<x.size();iii++)
- if( !(x[iii]==y[iii]) )
- return 0;
- return 1;
- }
+ else {
+ if( y.size()!=x.size() )
+ return 0;
+ else {
+ for(unsigned int iii=0; iii<x.size(); iii++)
+ if( !(x[iii]==y[iii]) )
+ return 0;
+ return 1;
}
+ }
}
template<class T> bool operator!=(const Vector<T> &x, const Vector<T> &y)
{
@@ -315,28 +302,26 @@ template<class T> bool operator<(const Vector<T> &x, const Vector<T> &y)
{
if( &x == &y )
return 0;
- else
- {
- if( y.size()<x.size() )
- return !(y<x);
- for(int iii=0;iii<x.size();iii++)
- {
- assert( iii!=y.size() );
- if( x[iii]<y[iii] )
- return 1;
- else if( y[iii]<x[iii] )
- return 0;
- }
- return x.size()!=y.size();//??
+ else {
+ if( y.size()<x.size() )
+ return !(y<x);
+ for(int iii=0; iii<x.size(); iii++) {
+ assert( iii!=y.size() );
+ if( x[iii]<y[iii] )
+ return 1;
+ else if( y[iii]<x[iii] )
+ return 0;
}
+ return x.size()!=y.size();//??
+ }
}
template<class T> void Vector<T>:: errorAccess(int n) const
{
- cerr << "ERROR: Access to array element " << n
- << " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
- cout << "ERROR: Access to array element " << n
+ cerr << "ERROR: Access to array element " << n
+ << " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
+ cout << "ERROR: Access to array element " << n
<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
assert(0);
#ifndef DEBUG
@@ -347,29 +332,31 @@ template<class T> void Vector<T>:: errorAccess(int n) const
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
{
o << "Vector(" << a.size() << "){ ";
- for(unsigned int iii=0;iii<a.size();iii++)
+ for(unsigned int iii=0; iii<a.size(); iii++)
o << " " << iii<< ": " << a[iii]<<" ;";
return o << "}\n";
}
template<class T> istream& operator>>(istream&in, Vector<T>&)
-{return in;}
+{
+ return in;
+}
template<class T> int Hash(const Vector<T>&a)
{
int n=0;
- for(int iii=0;iii<a.size();iii++)
+ for(int iii=0; iii<a.size(); iii++)
n+=Hash(a[iii])*(iii+1);
return n+a.size()*47;
}
template<class T> void Vector<T>::copy(T *aa, const T *bb, int n)
{
- for(int iii=0;iii<n;iii++)
+ for(int iii=0; iii<n; iii++)
aa[iii]=bb[iii];
}
template<class T> void Vector<T>::copy(T *aa, T *bb, int n)
{
- for(int iii=0;iii<n;iii++)
+ for(int iii=0; iii<n; iii++)
aa[iii]=bb[iii];
}
@@ -377,7 +364,7 @@ template<class T> void Vector<T>::_expand()
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
+#endif
T *oldp=p;
int oldsize=realSize;
realSize=realSize*2+1;
@@ -388,34 +375,32 @@ template<class T> void Vector<T>::_expand()
memo_del(oldp, 1);
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif
+#endif
}
template<class T> int Vector<T>::findMax() const
{
if( size()==0 )
return -1;
- else
- {
- int maxPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[maxPos]<(*this)[iii] )
- maxPos=iii;
- return maxPos;
- }
+ else {
+ int maxPos=0;
+ for(int iii=1; iii<size(); iii++)
+ if( (*this)[maxPos]<(*this)[iii] )
+ maxPos=iii;
+ return maxPos;
+ }
}
template<class T> int Vector<T>::findMin() const
{
if( size()==0 )
return -1;
- else
- {
- int minPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[iii]<(*this)[minPos] )
- minPos=iii;
- return minPos;
- }
+ else {
+ int minPos=0;
+ for(int iii=1; iii<size(); iii++)
+ if( (*this)[iii]<(*this)[minPos] )
+ minPos=iii;
+ return minPos;
+ }
}
#endif
diff --git a/mgizapp/src/WordClasses.h b/mgizapp/src/WordClasses.h
index 7992553..404b717 100644
--- a/mgizapp/src/WordClasses.h
+++ b/mgizapp/src/WordClasses.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -29,75 +29,67 @@ USA.
class WordClasses
{
- private:
+private:
map<string,string> Sw2c;
map<string,int> Sc2int;
Vector<string> Sint2c;
Vector<int> w2c;
unsigned int classes;
- public:
- WordClasses()
- : classes(1)
- {
- Sint2c.push_back("0");
- Sc2int["0"]=0;
- }
- template<class MAPPER> bool read(istream&in,const MAPPER&m,const vcbList& vcb)
- {
- string sline;
- int maxword=0;
- int readWord=0, putWord=0;
- while(getline(in,sline))
- {
- readWord ++;
- string word,wclass;
- istrstream iline(sline.c_str());
- iline>>word>>wclass;
-
- if( !Sc2int.count(wclass) )
- {
- Sc2int[wclass]=classes++;
- Sint2c.push_back(wclass);
- assert(classes==Sint2c.size());
- }
- if(vcb.has_word(word)){
- maxword=max(m(word),maxword);
- assert(Sw2c.count(word)==0);
- Sw2c[word]=wclass;
- putWord++;
- }
- }
- w2c=Vector<int>(maxword+1,0);
- for(map<string,string>::const_iterator i=Sw2c.begin();i!=Sw2c.end();++i)
- w2c[m(i->first)]=Sc2int[i->second];
- cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl;
- cout << "Actual number of read words: " << readWord << " stored words: " << putWord << endl;
- return 1;
- }
- int getClass(int w)const
- {
- if(w>=0&&int(w)<int(w2c.size()) )
- return w2c[w];
- else
- return 0;
- }
- int operator()(const string&x)const
- {
- if( Sc2int.count(x) )
- return Sc2int.find(x)->second;
- else
- {
- cerr << "WARNING: class " << x << " not found.\n";
- return 0;
- }
+public:
+ WordClasses()
+ : classes(1) {
+ Sint2c.push_back("0");
+ Sc2int["0"]=0;
+ }
+ template<class MAPPER> bool read(istream&in,const MAPPER&m,const vcbList& vcb) {
+ string sline;
+ int maxword=0;
+ int readWord=0, putWord=0;
+ while(getline(in,sline)) {
+ readWord ++;
+ string word,wclass;
+ istrstream iline(sline.c_str());
+ iline>>word>>wclass;
+
+ if( !Sc2int.count(wclass) ) {
+ Sc2int[wclass]=classes++;
+ Sint2c.push_back(wclass);
+ assert(classes==Sint2c.size());
+ }
+ if(vcb.has_word(word)) {
+ maxword=max(m(word),maxword);
+ assert(Sw2c.count(word)==0);
+ Sw2c[word]=wclass;
+ putWord++;
+ }
}
- string classString(unsigned int cnr)const
- {
- if( cnr<Sint2c.size())
- return Sint2c[cnr];
- else
- return string("0");
+ w2c=Vector<int>(maxword+1,0);
+ for(map<string,string>::const_iterator i=Sw2c.begin(); i!=Sw2c.end(); ++i)
+ w2c[m(i->first)]=Sc2int[i->second];
+ cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl;
+ cout << "Actual number of read words: " << readWord << " stored words: " << putWord << endl;
+ return 1;
+ }
+ int getClass(int w)const {
+ if(w>=0&&int(w)<int(w2c.size()) )
+ return w2c[w];
+ else
+ return 0;
+ }
+ int operator()(const string&x)const {
+ if( Sc2int.count(x) )
+ return Sc2int.find(x)->second;
+ else {
+ cerr << "WARNING: class " << x << " not found.\n";
+ return 0;
}
+ }
+ string classString(unsigned int cnr)const {
+ if( cnr<Sint2c.size())
+ return Sint2c[cnr];
+ else
+ return string("0");
+ }
};
#endif
diff --git a/mgizapp/src/alignment.cpp b/mgizapp/src/alignment.cpp
index 55a2e5c..506a0b1 100644
--- a/mgizapp/src/alignment.cpp
+++ b/mgizapp/src/alignment.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -30,9 +30,9 @@ ostream&operator<<(ostream&out, const alignment&a)
{
int m=a.a.size()-1,l=a.f.size()-1;
out << "AL(l:"<<l<<",m:"<<m<<")(a: ";
- for(int j=1;j<=m;j++)out << a(j) << ' ';
+ for(int j=1; j<=m; j++)out << a(j) << ' ';
out << ")(fert: ";
- for(int i=0;i<=l;i++)out << a.fert(i) << ' ';
+ for(int i=0; i<=l; i++)out << a.fert(i) << ' ';
return out << ") c:"<<"\n";
}
diff --git a/mgizapp/src/alignment.h b/mgizapp/src/alignment.h
index 03cf028..de6893c 100644
--- a/mgizapp/src/alignment.h
+++ b/mgizapp/src/alignment.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,196 +32,183 @@ Franz Josef Och (30/07/99)
class al_struct
{
- public:
+public:
al_struct()
- : prev(0),next(0){}
+ : prev(0),next(0) {}
PositionIndex prev,next;
};
class alignment
{
- private:
+private:
Vector<PositionIndex> a;
Vector<PositionIndex> positionSum,f;
- public:
+public:
Vector<PositionIndex> als_i;
Vector<al_struct> als_j;
PositionIndex l,m;
alignment()
- {}
+ {}
alignment(PositionIndex _l, PositionIndex _m)
: a(_m+1, (PositionIndex)0),
- positionSum(_l+1, (PositionIndex)0), f(_l+1, (PositionIndex)0), als_i(_l+1,0),als_j(_m+1),l(_l), m(_m)
- {
- f[0]=m;
- for(PositionIndex j=1;j<=m;j++)
- {
- if( j>1 )
- als_j[j].prev= j-1;
- if( j<m )
- als_j[j].next= j+1;
- }
- als_i[0]=1;
- }
- PositionIndex get_l()const
- {return l;}
- PositionIndex get_m()const
- {return m;}
- void doMove(int i,int j)
- {
- set(j,i);
- }
- void doSwap(int j1,int j2)
- {
- int aj1=a[j1],aj2=a[j2];
- set(j1,aj2);
- set(j2,aj1);
- }
- void set(PositionIndex j, PositionIndex aj)
- {
- PositionIndex old_aj=a[j];
- massert(j<a.size());massert(aj<f.size());
- massert(old_aj<f.size());massert(f[old_aj]>0);
- massert(j>0);
- positionSum[old_aj]-=j;
- // ausfuegen
- PositionIndex prev=als_j[j].prev;
- PositionIndex next=als_j[j].next;
- if( next )
- als_j[next].prev=prev;
- if( prev )
- als_j[prev].next=next;
- else
- als_i[old_aj]=next;
-
- // neue Position suchen
- PositionIndex lfd=als_i[aj],llfd=0;
- while( lfd && lfd<j )
- lfd = als_j[llfd=lfd].next;
+ positionSum(_l+1, (PositionIndex)0), f(_l+1, (PositionIndex)0), als_i(_l+1,0),als_j(_m+1),l(_l), m(_m) {
+ f[0]=m;
+ for(PositionIndex j=1; j<=m; j++) {
+ if( j>1 )
+ als_j[j].prev= j-1;
+ if( j<m )
+ als_j[j].next= j+1;
+ }
+ als_i[0]=1;
+ }
+ PositionIndex get_l()const {
+ return l;
+ }
+ PositionIndex get_m()const {
+ return m;
+ }
+ void doMove(int i,int j) {
+ set(j,i);
+ }
+ void doSwap(int j1,int j2) {
+ int aj1=a[j1],aj2=a[j2];
+ set(j1,aj2);
+ set(j2,aj1);
+ }
+ void set(PositionIndex j, PositionIndex aj) {
+ PositionIndex old_aj=a[j];
+ massert(j<a.size());
+ massert(aj<f.size());
+ massert(old_aj<f.size());
+ massert(f[old_aj]>0);
+ massert(j>0);
+ positionSum[old_aj]-=j;
+ // ausfuegen
+ PositionIndex prev=als_j[j].prev;
+ PositionIndex next=als_j[j].next;
+ if( next )
+ als_j[next].prev=prev;
+ if( prev )
+ als_j[prev].next=next;
+ else
+ als_i[old_aj]=next;
- // einfuegen
- als_j[j].prev=llfd;
- als_j[j].next=lfd;
- if( llfd )
- als_j[llfd].next=j;
- else
- als_i[aj]=j;
- if( lfd )
- als_j[lfd].prev=j;
+ // neue Position suchen
+ PositionIndex lfd=als_i[aj],llfd=0;
+ while( lfd && lfd<j )
+ lfd = als_j[llfd=lfd].next;
- f[old_aj]--;
- positionSum[aj]+=j;
- f[aj]++;
- a[j]=aj;
- }
- const Vector<PositionIndex>& getAlignment() const
- {return a ;}
- PositionIndex get_al(PositionIndex j)const
- {
- massert(j<a.size());
- return a[j];
- }
- PositionIndex operator()(PositionIndex j)const
- {
- massert(j<a.size());
- return a[j];
- }
- PositionIndex fert(PositionIndex i)const
- {
- massert(i<f.size());
- return f[i];
- }
- PositionIndex get_head(PositionIndex i)const
- {
- massert( als_i[i]==_get_head(i) );
- return als_i[i];
- }
- PositionIndex get_center(PositionIndex i)const
- {
- if( i==0 )return 0;
- massert(((positionSum[i]+f[i]-1)/f[i]==_get_center(i)));
- return (positionSum[i]+f[i]-1)/f[i];
- }
- PositionIndex _get_head(PositionIndex i)const
- {
- if( fert(i)==0 )return 0;
- for(PositionIndex j=1;j<=m;j++)
- if( a[j]==i )
- return j;
- return 0;
- }
- PositionIndex _get_center(PositionIndex i)const
- {
- if( i==0 )return 0;
- massert(fert(i));
- PositionIndex sum=0;
- for(PositionIndex j=1;j<=m;j++)
- if( a[j]==i )
- sum+=j;
- return (sum+fert(i)-1)/fert(i);
- }
- PositionIndex prev_cept(PositionIndex i)const
- {
- if( i==0 )return 0;
- PositionIndex k=i-1;
- while(k&&fert(k)==0)
- k--;
- return k;
- }
- PositionIndex next_cept(PositionIndex i)const
- {
- PositionIndex k=i+1;
- while(k<l+1&&fert(k)==0)
- k++;
- return k;
- }
- PositionIndex prev_in_cept(PositionIndex j)const
- {
- //PositionIndex k=j-1;
- //while(k&&a[k]!=a[j])
- //k--;
- //assert( als_j[j].prev==k );
- //assert(k);
- //return k;
- massert(als_j[j].prev==0||a[als_j[j].prev]==a[j]);
- return als_j[j].prev;
- }
+ // einfuegen
+ als_j[j].prev=llfd;
+ als_j[j].next=lfd;
+ if( llfd )
+ als_j[llfd].next=j;
+ else
+ als_i[aj]=j;
+ if( lfd )
+ als_j[lfd].prev=j;
+
+ f[old_aj]--;
+ positionSum[aj]+=j;
+ f[aj]++;
+ a[j]=aj;
+ }
+ const Vector<PositionIndex>& getAlignment() const {
+ return a ;
+ }
+ PositionIndex get_al(PositionIndex j)const {
+ massert(j<a.size());
+ return a[j];
+ }
+ PositionIndex operator()(PositionIndex j)const {
+ massert(j<a.size());
+ return a[j];
+ }
+ PositionIndex fert(PositionIndex i)const {
+ massert(i<f.size());
+ return f[i];
+ }
+ PositionIndex get_head(PositionIndex i)const {
+ massert( als_i[i]==_get_head(i) );
+ return als_i[i];
+ }
+ PositionIndex get_center(PositionIndex i)const {
+ if( i==0 )return 0;
+ massert(((positionSum[i]+f[i]-1)/f[i]==_get_center(i)));
+ return (positionSum[i]+f[i]-1)/f[i];
+ }
+ PositionIndex _get_head(PositionIndex i)const {
+ if( fert(i)==0 )return 0;
+ for(PositionIndex j=1; j<=m; j++)
+ if( a[j]==i )
+ return j;
+ return 0;
+ }
+ PositionIndex _get_center(PositionIndex i)const {
+ if( i==0 )return 0;
+ massert(fert(i));
+ PositionIndex sum=0;
+ for(PositionIndex j=1; j<=m; j++)
+ if( a[j]==i )
+ sum+=j;
+ return (sum+fert(i)-1)/fert(i);
+ }
+ PositionIndex prev_cept(PositionIndex i)const {
+ if( i==0 )return 0;
+ PositionIndex k=i-1;
+ while(k&&fert(k)==0)
+ k--;
+ return k;
+ }
+ PositionIndex next_cept(PositionIndex i)const {
+ PositionIndex k=i+1;
+ while(k<l+1&&fert(k)==0)
+ k++;
+ return k;
+ }
+ PositionIndex prev_in_cept(PositionIndex j)const {
+ //PositionIndex k=j-1;
+ //while(k&&a[k]!=a[j])
+ //k--;
+ //assert( als_j[j].prev==k );
+ //assert(k);
+ //return k;
+ massert(als_j[j].prev==0||a[als_j[j].prev]==a[j]);
+ return als_j[j].prev;
+ }
friend ostream &operator<<(ostream&out, const alignment&a);
- friend bool operator==(const alignment&a, const alignment&b)
- {
- massert(a.a.size()==b.a.size());
- for(PositionIndex j=1;j<=a.get_m();j++)
- if(a(j)!=b(j))
- return 0;
- return 1;
- }
- friend bool operator<(const alignment&x, const alignment&y)
- {
- massert(x.get_m()==y.get_m());
- for(PositionIndex j=1;j<=x.get_m();j++)
- if( x(j)<y(j) )
- return 1;
- else if( y(j)<x(j) )
- return 0;
- return 0;
- }
- friend int differences(const alignment&x, const alignment&y){
+ friend bool operator==(const alignment&a, const alignment&b) {
+ massert(a.a.size()==b.a.size());
+ for(PositionIndex j=1; j<=a.get_m(); j++)
+ if(a(j)!=b(j))
+ return 0;
+ return 1;
+ }
+ friend bool operator<(const alignment&x, const alignment&y) {
+ massert(x.get_m()==y.get_m());
+ for(PositionIndex j=1; j<=x.get_m(); j++)
+ if( x(j)<y(j) )
+ return 1;
+ else if( y(j)<x(j) )
+ return 0;
+ return 0;
+ }
+ friend int differences(const alignment&x, const alignment&y) {
int count=0;
massert(x.get_m()==y.get_m());
- for(PositionIndex j=1;j<=x.get_m();j++)
+ for(PositionIndex j=1; j<=x.get_m(); j++)
count += (x(j)!=y(j));
return count;
}
- bool valid()const
- {
- if( 2*f[0]>m )
- return 0;
- for(unsigned int i=1;i<=l;i++)
- if( f[i]>=MAX_FERTILITY )
- return 0;
- return 1;
- }
+ bool valid()const {
+ if( 2*f[0]>m )
+ return 0;
+ for(unsigned int i=1; i<=l; i++)
+ if( f[i]>=MAX_FERTILITY )
+ return 0;
+ return 1;
+ }
friend class transpair_model5;
};
#endif
diff --git a/mgizapp/src/cmd.c b/mgizapp/src/cmd.c
index 8847172..76a4db6 100644
--- a/mgizapp/src/cmd.c
+++ b/mgizapp/src/cmd.c
@@ -17,9 +17,9 @@
#endif
static Enum_T BoolEnum[] = {
- { "FALSE", 0 },
- { "TRUE", 1 },
- { 0, 0 }
+ { "FALSE", 0 },
+ { "TRUE", 1 },
+ { 0, 0 }
};
#ifdef NEEDSTRDUP
@@ -33,91 +33,91 @@ char *strdup();
#define MAXPARAM 256
static char *GetLine(),
- **str2array();
+ **str2array();
static int Scan(),
- SetParam(),
- SetEnum(),
- SetSubrange(),
- SetStrArray(),
- SetGte(),
- SetLte(),
- CmdError(),
- EnumError(),
- SubrangeError(),
- GteError(),
- LteError(),
- PrintParam(),
- PrintEnum(),
- PrintStrArray();
+ SetParam(),
+ SetEnum(),
+ SetSubrange(),
+ SetStrArray(),
+ SetGte(),
+ SetLte(),
+ CmdError(),
+ EnumError(),
+ SubrangeError(),
+ GteError(),
+ LteError(),
+ PrintParam(),
+ PrintEnum(),
+ PrintStrArray();
static Cmd_T cmds[MAXPARAM+1];
static char *SepString = " \t\n";
int DeclareParams(const char *ParName, ...)
{
- va_list args;
- static int ParamN = 0;
- int j,
- c;
- char *s;
-
- va_start(args, ParName);
- for(;ParName;) {
- if(ParamN==MAXPARAM) {
- fprintf(stderr, "Too many parameters !!\n");
- break;
- }
- for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
- ;
- if(!c) {
- fprintf(stderr,
- "Warning: parameter \"%s\" declared twice.\n",
- ParName);
- }
- for(c=ParamN; c>j; c--) {
- cmds[c] = cmds[c-1];
- }
- cmds[j].Name = ParName;
- cmds[j].Type = va_arg(args, int);
- cmds[j].Val = va_arg(args, void *);
- switch(cmds[j].Type) {
- case CMDENUMTYPE: /* get the pointer to Enum_T struct */
- cmds[j].p = va_arg(args, void *);
- break;
- case CMDSUBRANGETYPE: /* get the two extremes */
- cmds[j].p = (void*) calloc(2, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- ((int*)cmds[j].p)[1] = va_arg(args, int);
- break;
- case CMDGTETYPE: /* get lower or upper bound */
- case CMDLTETYPE:
- cmds[j].p = (void*) calloc(1, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- break;
- case CMDSTRARRAYTYPE: /* get the separators string */
- cmds[j].p = (s=va_arg(args, char*))
- ? (void*)strdup(s) : 0;
- break;
- case CMDBOOLTYPE:
- cmds[j].Type = CMDENUMTYPE;
- cmds[j].p = BoolEnum;
- break;
- case CMDDOUBLETYPE: /* nothing else is needed */
- case CMDINTTYPE:
- case CMDSTRINGTYPE:
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "DeclareParam()", "Unknown Type",
- cmds[j].Type, "for parameter", cmds[j].Name);
- exit(1);
- }
- ParamN++;
- ParName = va_arg(args, char *);
- }
- cmds[ParamN].Name = NULL;
- va_end(args);
- return 0;
+ va_list args;
+ static int ParamN = 0;
+ int j,
+ c;
+ char *s;
+
+ va_start(args, ParName);
+ for(; ParName;) {
+ if(ParamN==MAXPARAM) {
+ fprintf(stderr, "Too many parameters !!\n");
+ break;
+ }
+ for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
+ ;
+ if(!c) {
+ fprintf(stderr,
+ "Warning: parameter \"%s\" declared twice.\n",
+ ParName);
+ }
+ for(c=ParamN; c>j; c--) {
+ cmds[c] = cmds[c-1];
+ }
+ cmds[j].Name = ParName;
+ cmds[j].Type = va_arg(args, int);
+ cmds[j].Val = va_arg(args, void *);
+ switch(cmds[j].Type) {
+ case CMDENUMTYPE: /* get the pointer to Enum_T struct */
+ cmds[j].p = va_arg(args, void *);
+ break;
+ case CMDSUBRANGETYPE: /* get the two extremes */
+ cmds[j].p = (void*) calloc(2, sizeof(int));
+ ((int*)cmds[j].p)[0] = va_arg(args, int);
+ ((int*)cmds[j].p)[1] = va_arg(args, int);
+ break;
+ case CMDGTETYPE: /* get lower or upper bound */
+ case CMDLTETYPE:
+ cmds[j].p = (void*) calloc(1, sizeof(int));
+ ((int*)cmds[j].p)[0] = va_arg(args, int);
+ break;
+ case CMDSTRARRAYTYPE: /* get the separators string */
+ cmds[j].p = (s=va_arg(args, char*))
+ ? (void*)strdup(s) : 0;
+ break;
+ case CMDBOOLTYPE:
+ cmds[j].Type = CMDENUMTYPE;
+ cmds[j].p = BoolEnum;
+ break;
+ case CMDDOUBLETYPE: /* nothing else is needed */
+ case CMDINTTYPE:
+ case CMDSTRINGTYPE:
+ break;
+ default:
+ fprintf(stderr, "%s: %s %d %s \"%s\"\n",
+ "DeclareParam()", "Unknown Type",
+ cmds[j].Type, "for parameter", cmds[j].Name);
+ exit(1);
+ }
+ ParamN++;
+ ParName = va_arg(args, char *);
+ }
+ cmds[ParamN].Name = NULL;
+ va_end(args);
+ return 0;
}
int GetParams(n, a, CmdFileName)
@@ -125,129 +125,130 @@ int *n;
char ***a;
char *CmdFileName;
{
- char *Line,
- *ProgName;
- int argc = *n;
- char **argv = *a,
- *s;
- FILE *fp;
- int IsPipe;
+ char *Line,
+ *ProgName;
+ int argc = *n;
+ char **argv = *a,
+ *s;
+ FILE *fp;
+ int IsPipe;
#ifdef MSDOS
#define PATHSEP '\\'
- char *dot = NULL;
+ char *dot = NULL;
#else
#define PATHSEP '/'
#endif
- if(!(Line=malloc(LINSIZ))) {
- fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
- LINSIZ);
- exit(1);
- }
- if((ProgName=strrchr(*argv, PATHSEP))) {
- ++ProgName;
- } else {
- ProgName = *argv;
- }
+ if(!(Line=malloc(LINSIZ))) {
+ fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
+ LINSIZ);
+ exit(1);
+ }
+ if((ProgName=strrchr(*argv, PATHSEP))) {
+ ++ProgName;
+ } else {
+ ProgName = *argv;
+ }
#ifdef MSDOS
- if(dot=strchr(ProgName, '.')) *dot = 0;
+ if(dot=strchr(ProgName, '.')) *dot = 0;
#endif
- --argc;
- ++argv;
- for(;;) {
- if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
- CmdFileName = argv[0]+2;
- ++argv;
- --argc;
- }
- if(!CmdFileName) {
- break;
- }
- IsPipe = !strncmp(CmdFileName, "@@", 2);
- fp = IsPipe
- ? popen(CmdFileName+2, "r")
- : strcmp(CmdFileName, "-")
- ? fopen(CmdFileName, "r")
- : stdin;
- if(!fp) {
- fprintf(stderr, "Unable to open command file %s\n",
- CmdFileName);
- exit(1);
- }
- while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
- if(Scan(ProgName, cmds, Line)) {
- CmdError(Line);
- }
- }
- if(fp!=stdin) {
- if(IsPipe) pclose(fp); else fclose(fp);
- }
- CmdFileName = NULL;
- }
- while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
- *s = ' ';
- sprintf(Line, "%s/%s", ProgName, *argv+1);
- *s = '=';
- if(Scan(ProgName, cmds, Line)) CmdError(*argv);
- --argc;
- ++argv;
- }
- *n = argc;
- *a = argv;
+ --argc;
+ ++argv;
+ for(;;) {
+ if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
+ CmdFileName = argv[0]+2;
+ ++argv;
+ --argc;
+ }
+ if(!CmdFileName) {
+ break;
+ }
+ IsPipe = !strncmp(CmdFileName, "@@", 2);
+ fp = IsPipe
+ ? popen(CmdFileName+2, "r")
+ : strcmp(CmdFileName, "-")
+ ? fopen(CmdFileName, "r")
+ : stdin;
+ if(!fp) {
+ fprintf(stderr, "Unable to open command file %s\n",
+ CmdFileName);
+ exit(1);
+ }
+ while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
+ if(Scan(ProgName, cmds, Line)) {
+ CmdError(Line);
+ }
+ }
+ if(fp!=stdin) {
+ if(IsPipe) pclose(fp);
+ else fclose(fp);
+ }
+ CmdFileName = NULL;
+ }
+ while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
+ *s = ' ';
+ sprintf(Line, "%s/%s", ProgName, *argv+1);
+ *s = '=';
+ if(Scan(ProgName, cmds, Line)) CmdError(*argv);
+ --argc;
+ ++argv;
+ }
+ *n = argc;
+ *a = argv;
#ifdef MSDOS
- if(dot) *dot = '.';
+ if(dot) *dot = '.';
#endif
- free(Line);
- return 0;
+ free(Line);
+ return 0;
}
int PrintParams(ValFlag, fp)
int ValFlag;
FILE *fp;
{
- int i;
-
- fflush(fp);
- if(ValFlag) {
- fprintf(fp, "Parameters Values:\n");
- } else {
- fprintf(fp, "Parameters:\n");
- }
- for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
- fprintf(fp, "\n");
- fflush(fp);
- return 0;
+ int i;
+
+ fflush(fp);
+ if(ValFlag) {
+ fprintf(fp, "Parameters Values:\n");
+ } else {
+ fprintf(fp, "Parameters:\n");
+ }
+ for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
+ fprintf(fp, "\n");
+ fflush(fp);
+ return 0;
}
int SPrintParams(a, pfx)
char ***a,
- *pfx;
+ *pfx;
{
- int l,
- n;
- Cmd_T *cmd;
-
- if(!pfx) pfx="";
- l = strlen(pfx);
- for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
- a[0] = calloc(n, sizeof(char*));
- for(n=0, cmd=cmds; cmd->Name; cmd++) {
- if(!cmd->ArgStr) continue;
- a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
- sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
- ++n;
- }
- return n;
+ int l,
+ n;
+ Cmd_T *cmd;
+
+ if(!pfx) pfx="";
+ l = strlen(pfx);
+ for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
+ a[0] = calloc(n, sizeof(char*));
+ for(n=0, cmd=cmds; cmd->Name; cmd++) {
+ if(!cmd->ArgStr) continue;
+ a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
+ sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
+ ++n;
+ }
+ return n;
}
static int CmdError(opt)
char *opt;
{
- fprintf(stderr, "Invalid option \"%s\"\n", opt);
- fprintf(stderr, "This program expectes the following parameters:\n");
- PrintParams(FALSE, stderr);
- exit(0);
+ fprintf(stderr, "Invalid option \"%s\"\n", opt);
+ fprintf(stderr, "This program expectes the following parameters:\n");
+ PrintParams(FALSE, stderr);
+ exit(0);
}
static int PrintParam(cmd, ValFlag, fp)
@@ -255,48 +256,48 @@ Cmd_T *cmd;
int ValFlag;
FILE *fp;
{
- fprintf(fp, "%4s", "");
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDENUMTYPE:
- PrintEnum(cmd, ValFlag, fp);
- break;
- case CMDINTTYPE:
- case CMDSUBRANGETYPE:
- case CMDGTETYPE:
- case CMDLTETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDSTRINGTYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- if(*(char **)cmd->Val) {
- fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
- } else {
- fprintf(fp, ": %s", "NULL");
- }
- }
- fprintf(fp, "\n");
- break;
- case CMDSTRARRAYTYPE:
- PrintStrArray(cmd, ValFlag, fp);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "PrintParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- return 0;
+ fprintf(fp, "%4s", "");
+ switch(cmd->Type) {
+ case CMDDOUBLETYPE:
+ fprintf(fp, "%s", cmd->Name);
+ if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
+ fprintf(fp, "\n");
+ break;
+ case CMDENUMTYPE:
+ PrintEnum(cmd, ValFlag, fp);
+ break;
+ case CMDINTTYPE:
+ case CMDSUBRANGETYPE:
+ case CMDGTETYPE:
+ case CMDLTETYPE:
+ fprintf(fp, "%s", cmd->Name);
+ if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
+ fprintf(fp, "\n");
+ break;
+ case CMDSTRINGTYPE:
+ fprintf(fp, "%s", cmd->Name);
+ if(ValFlag) {
+ if(*(char **)cmd->Val) {
+ fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
+ } else {
+ fprintf(fp, ": %s", "NULL");
+ }
+ }
+ fprintf(fp, "\n");
+ break;
+ case CMDSTRARRAYTYPE:
+ PrintStrArray(cmd, ValFlag, fp);
+ break;
+ default:
+ fprintf(stderr, "%s: %s %d %s \"%s\"\n",
+ "PrintParam",
+ "Unknown Type",
+ cmd->Type,
+ "for parameter",
+ cmd->Name);
+ exit(1);
+ }
+ return 0;
}
static char *GetLine(fp, n, Line)
@@ -304,265 +305,265 @@ FILE *fp;
int n;
char *Line;
{
- int j,
- l,
- offs=0;
-
- for(;;) {
- if(!fgets(Line+offs, n-offs, fp)) {
- return NULL;
- }
- if(Line[offs]=='#') continue;
- l = strlen(Line+offs)-1;
- Line[offs+l] = 0;
- for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
- ;
- if(l<1) continue;
- if(j > offs) {
- char *s = Line+offs,
- *q = Line+j;
-
- while((*s++=*q++))
- ;
- }
- if(Line[offs+l-1]=='\\') {
- offs += l;
- Line[offs-1] = ' ';
- } else {
- break;
- }
- }
- return Line;
+ int j,
+ l,
+ offs=0;
+
+ for(;;) {
+ if(!fgets(Line+offs, n-offs, fp)) {
+ return NULL;
+ }
+ if(Line[offs]=='#') continue;
+ l = strlen(Line+offs)-1;
+ Line[offs+l] = 0;
+ for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
+ ;
+ if(l<1) continue;
+ if(j > offs) {
+ char *s = Line+offs,
+ *q = Line+j;
+
+ while((*s++=*q++))
+ ;
+ }
+ if(Line[offs+l-1]=='\\') {
+ offs += l;
+ Line[offs-1] = ' ';
+ } else {
+ break;
+ }
+ }
+ return Line;
}
static int Scan(ProgName, cmds, Line)
char *ProgName,
- *Line;
+ *Line;
Cmd_T *cmds;
{
- char *q,
- *p;
- int i,
- hl,
- HasToMatch = FALSE,
- c0,
- c;
-
- p = Line+strspn(Line, SepString);
- if(!(hl=strcspn(p, SepString))) {
- return 0;
- }
- if((q=strchr(p, '/')) && q-p<hl) {
- *q = 0;
- if(strcmp(p, ProgName)) {
- *q = '/';
- return 0;
- }
- *q = '/';
- HasToMatch=TRUE;
- p = q+1;
- }
- if(!(hl = strcspn(p, SepString))) {
- return 0;
- }
- c0 = p[hl];
- p[hl] = 0;
- for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
- ;
- p[hl] = c0;
- if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
- return HasToMatch && c;
+ char *q,
+ *p;
+ int i,
+ hl,
+ HasToMatch = FALSE,
+ c0,
+ c;
+
+ p = Line+strspn(Line, SepString);
+ if(!(hl=strcspn(p, SepString))) {
+ return 0;
+ }
+ if((q=strchr(p, '/')) && q-p<hl) {
+ *q = 0;
+ if(strcmp(p, ProgName)) {
+ *q = '/';
+ return 0;
+ }
+ *q = '/';
+ HasToMatch=TRUE;
+ p = q+1;
+ }
+ if(!(hl = strcspn(p, SepString))) {
+ return 0;
+ }
+ c0 = p[hl];
+ p[hl] = 0;
+ for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
+ ;
+ p[hl] = c0;
+ if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
+ return HasToMatch && c;
}
static int SetParam(cmd, s)
Cmd_T *cmd;
char *s;
{
- if(!*s && cmd->Type != CMDSTRINGTYPE) {
- fprintf(stderr,
- "WARNING: No value specified for parameter \"%s\"\n",
- cmd->Name);
- return 0;
- }
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
- fprintf(stderr,
- "Float value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDENUMTYPE:
- SetEnum(cmd, s);
- break;
- case CMDINTTYPE:
- if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDSTRINGTYPE:
- *(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
- ? strdup(s)
- : 0;
- break;
- case CMDSTRARRAYTYPE:
- SetStrArray(cmd, s);
- break;
- case CMDGTETYPE:
- SetGte(cmd, s);
- break;
- case CMDLTETYPE:
- SetLte(cmd, s);
- break;
- case CMDSUBRANGETYPE:
- SetSubrange(cmd, s);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "SetParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- cmd->ArgStr = strdup(s);
- return 0;
+ if(!*s && cmd->Type != CMDSTRINGTYPE) {
+ fprintf(stderr,
+ "WARNING: No value specified for parameter \"%s\"\n",
+ cmd->Name);
+ return 0;
+ }
+ switch(cmd->Type) {
+ case CMDDOUBLETYPE:
+ if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
+ fprintf(stderr,
+ "Float value required for parameter \"%s\"\n",
+ cmd->Name);
+ exit(1);
+ }
+ break;
+ case CMDENUMTYPE:
+ SetEnum(cmd, s);
+ break;
+ case CMDINTTYPE:
+ if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
+ fprintf(stderr,
+ "Integer value required for parameter \"%s\"\n",
+ cmd->Name);
+ exit(1);
+ }
+ break;
+ case CMDSTRINGTYPE:
+ *(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
+ ? strdup(s)
+ : 0;
+ break;
+ case CMDSTRARRAYTYPE:
+ SetStrArray(cmd, s);
+ break;
+ case CMDGTETYPE:
+ SetGte(cmd, s);
+ break;
+ case CMDLTETYPE:
+ SetLte(cmd, s);
+ break;
+ case CMDSUBRANGETYPE:
+ SetSubrange(cmd, s);
+ break;
+ default:
+ fprintf(stderr, "%s: %s %d %s \"%s\"\n",
+ "SetParam",
+ "Unknown Type",
+ cmd->Type,
+ "for parameter",
+ cmd->Name);
+ exit(1);
+ }
+ cmd->ArgStr = strdup(s);
+ return 0;
}
static int SetEnum(cmd, s)
Cmd_T *cmd;
char *s;
{
- Enum_T *en;
-
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && !strcmp(s, en->Name)) {
- *(int *) cmd->Val = en->Idx;
- return 0;
- }
- }
- return EnumError(cmd, s);
+ Enum_T *en;
+
+ for(en=(Enum_T *)cmd->p; en->Name; en++) {
+ if(*en->Name && !strcmp(s, en->Name)) {
+ *(int *) cmd->Val = en->Idx;
+ return 0;
+ }
+ }
+ return EnumError(cmd, s);
}
static int SetSubrange(cmd, s)
Cmd_T *cmd;
char *s;
{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
- return SubrangeError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
+ int n;
+
+ if(sscanf(s, "%d", &n)!=1) {
+ fprintf(stderr,
+ "Integer value required for parameter \"%s\"\n",
+ cmd->Name);
+ exit(1);
+ }
+ if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
+ return SubrangeError(cmd, n);
+ }
+ *(int *)cmd->Val = n;
+ return 0;
}
static int SetGte(cmd, s)
Cmd_T *cmd;
char *s;
{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n<*(int *)cmd->p) {
- return GteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
+ int n;
+
+ if(sscanf(s, "%d", &n)!=1) {
+ fprintf(stderr,
+ "Integer value required for parameter \"%s\"\n",
+ cmd->Name);
+ exit(1);
+ }
+ if(n<*(int *)cmd->p) {
+ return GteError(cmd, n);
+ }
+ *(int *)cmd->Val = n;
+ return 0;
}
static int SetStrArray(cmd, s)
Cmd_T *cmd;
char *s;
{
- *(char***)cmd->Val = str2array(s, (char*)cmd->p);
- return 0;
+ *(char***)cmd->Val = str2array(s, (char*)cmd->p);
+ return 0;
}
static int SetLte(cmd, s)
Cmd_T *cmd;
char *s;
{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n > *(int *)cmd->p) {
- return LteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
+ int n;
+
+ if(sscanf(s, "%d", &n)!=1) {
+ fprintf(stderr,
+ "Integer value required for parameter \"%s\"\n",
+ cmd->Name);
+ exit(1);
+ }
+ if(n > *(int *)cmd->p) {
+ return LteError(cmd, n);
+ }
+ *(int *)cmd->Val = n;
+ return 0;
}
static int EnumError(cmd, s)
Cmd_T *cmd;
char *s;
{
- Enum_T *en;
-
- fprintf(stderr,
- "Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
- fprintf(stderr, "Valid values are:\n");
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name) {
- fprintf(stderr, " %s\n", en->Name);
- }
- }
- fprintf(stderr, "\n");
- exit(1);
+ Enum_T *en;
+
+ fprintf(stderr,
+ "Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
+ fprintf(stderr, "Valid values are:\n");
+ for(en=(Enum_T *)cmd->p; en->Name; en++) {
+ if(*en->Name) {
+ fprintf(stderr, " %s\n", en->Name);
+ }
+ }
+ fprintf(stderr, "\n");
+ exit(1);
}
static int GteError(cmd, n)
Cmd_T *cmd;
int n;
{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be greater than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
+ fprintf(stderr,
+ "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
+ fprintf(stderr, "Valid values must be greater than or equal to %d\n",
+ *(int *)cmd->p);
+ exit(1);
}
static int LteError(cmd, n)
Cmd_T *cmd;
int n;
{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be less than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
+ fprintf(stderr,
+ "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
+ fprintf(stderr, "Valid values must be less than or equal to %d\n",
+ *(int *)cmd->p);
+ exit(1);
}
static int SubrangeError(cmd, n)
Cmd_T *cmd;
int n;
{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values range from %d to %d\n",
- *(int *)cmd->p, *((int *)cmd->p+1));
- exit(1);
+ fprintf(stderr,
+ "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
+ fprintf(stderr, "Valid values range from %d to %d\n",
+ *(int *)cmd->p, *((int *)cmd->p+1));
+ exit(1);
}
static int PrintEnum(cmd, ValFlag, fp)
@@ -570,18 +571,18 @@ Cmd_T *cmd;
int ValFlag;
FILE *fp;
{
- Enum_T *en;
-
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && en->Idx==*(int *)cmd->Val) {
- fprintf(fp, ": %s", en->Name);
- }
- }
- }
- fprintf(fp, "\n");
- return 0;
+ Enum_T *en;
+
+ fprintf(fp, "%s", cmd->Name);
+ if(ValFlag) {
+ for(en=(Enum_T *)cmd->p; en->Name; en++) {
+ if(*en->Name && en->Idx==*(int *)cmd->Val) {
+ fprintf(fp, ": %s", en->Name);
+ }
+ }
+ }
+ fprintf(fp, "\n");
+ return 0;
}
static int PrintStrArray(cmd, ValFlag, fp)
@@ -589,52 +590,52 @@ Cmd_T *cmd;
int ValFlag;
FILE *fp;
{
- char *indent,
- **s = *(char***)cmd->Val;
- int l = 4+strlen(cmd->Name);
-
- fprintf(fp, "%s", cmd->Name);
- indent = malloc(l+2);
- memset(indent, ' ', l+1);
- indent[l+1] = 0;
- if(ValFlag) {
- fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
- if(s) while(*s) {
- fprintf(fp, "\n%s %s", indent, *s++);
- }
- }
- free(indent);
- fprintf(fp, "\n");
- return 0;
+ char *indent,
+ **s = *(char***)cmd->Val;
+ int l = 4+strlen(cmd->Name);
+
+ fprintf(fp, "%s", cmd->Name);
+ indent = malloc(l+2);
+ memset(indent, ' ', l+1);
+ indent[l+1] = 0;
+ if(ValFlag) {
+ fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
+ if(s) while(*s) {
+ fprintf(fp, "\n%s %s", indent, *s++);
+ }
+ }
+ free(indent);
+ fprintf(fp, "\n");
+ return 0;
}
static char **str2array(s, sep)
char *s,
- *sep;
+ *sep;
{
- char *p,
- **a;
- int n = 0,
- l;
-
- if(!sep) sep = SepString;
- p = s += strspn(s, sep);
- while(*p) {
- p += strcspn(p, sep);
- p += strspn(p, sep);
- ++n;
- }
- a = calloc(n+1, sizeof(char *));
- p = s;
- n = 0;
- while(*p) {
- l = strcspn(p, sep);
- a[n] = malloc(l+1);
- memcpy(a[n], p, l);
- a[n][l] = 0;
- ++n;
- p += l;
- p += strspn(p, sep);
- }
- return a;
+ char *p,
+ **a;
+ int n = 0,
+ l;
+
+ if(!sep) sep = SepString;
+ p = s += strspn(s, sep);
+ while(*p) {
+ p += strcspn(p, sep);
+ p += strspn(p, sep);
+ ++n;
+ }
+ a = calloc(n+1, sizeof(char *));
+ p = s;
+ n = 0;
+ while(*p) {
+ l = strcspn(p, sep);
+ a[n] = malloc(l+1);
+ memcpy(a[n], p, l);
+ a[n][l] = 0;
+ ++n;
+ p += l;
+ p += strspn(p, sep);
+ }
+ return a;
}
diff --git a/mgizapp/src/cmd.h b/mgizapp/src/cmd.h
index 2e61bba..a190041 100644
--- a/mgizapp/src/cmd.h
+++ b/mgizapp/src/cmd.h
@@ -16,16 +16,16 @@
#define CMDBOOLTYPE 9
typedef struct {
- const char *Name;
- int Idx;
+ const char *Name;
+ int Idx;
} Enum_T;
typedef struct {
- int Type;
- const char *Name,
- *ArgStr;
- void *Val,
- *p;
+ int Type;
+ const char *Name,
+ *ArgStr;
+ void *Val,
+ *p;
} Cmd_T;
#ifdef __cplusplus
@@ -33,15 +33,15 @@ extern "C" {
#endif
#if defined(__STDC__) || defined(WIN32)
-int DeclareParams(const char *, ...);
+ int DeclareParams(const char *, ...);
#else
-int DeclareParams();
+ int DeclareParams();
#endif
-int GetParams(int *n, char ***a,char *CmdFileName),
- SPrintParams(),
- PrintParams();
+ int GetParams(int *n, char ***a,char *CmdFileName),
+ SPrintParams(),
+ PrintParams();
#ifdef __cplusplus
}
diff --git a/mgizapp/src/collCounts.cpp b/mgizapp/src/collCounts.cpp
index 698e915..341ad91 100644
--- a/mgizapp/src/collCounts.cpp
+++ b/mgizapp/src/collCounts.cpp
@@ -9,14 +9,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -33,258 +33,264 @@
extern float COUNTINCREASE_CUTOFF_AL;
// unifies collectCountsOverAlignments and findAlignmentNeighborhood FJO-20/07/99
template<class TRANSPAIR> int collectCountsOverNeighborhood(
- const MoveSwapMatrix<TRANSPAIR>&msc, LogProb ascore,
- Array2<LogProb,Vector<LogProb> >&dtcount,
- Array2<LogProb,Vector<LogProb> >&ncount, LogProb&p1count,
- LogProb&p0count, LogProb&total_count) {
- int nAl=0;
- const PositionIndex l=msc.get_l(), m=msc.get_m();
- Array2<LogProb,Vector<LogProb> > cmove(l+1, m+1), cswap(l+1, m+1);
- Vector<LogProb> negmove(m+1),negswap(m+1),plus1fert(l+1),minus1fert(l+1);
- LogProb total_move, total_swap;
- if (msc.isCenterDeleted()==0) {
- total_move+=ascore;
- nAl++;
- }
- for (PositionIndex j=1; j<=m; j++) {
- for (PositionIndex i=0; i<=l; i++) {
- if (msc(j)!=i && !msc.isDelMove(i, j) ) {
- LogProb newscore=ascore*msc.cmove(i, j);
- total_move+=newscore;
- nAl++;
- cmove(i, j)+=newscore;
- negmove[j]+=newscore;
- plus1fert[i]+=newscore;
- minus1fert[msc(j)]+=newscore;
- }
- }
- }
- for (PositionIndex j1=1; j1<=m; j1++) {
- for (PositionIndex j2=j1+1; j2<=m; j2++) {
- if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
- LogProb newscore=ascore*msc.cswap(j1, j2);
- total_swap+=newscore;
- nAl++;
- cswap(msc(j1), j2)+=newscore;
- cswap(msc(j2), j1)+=newscore;
- negswap[j1]+=newscore;
- negswap[j2]+=newscore;
- }
- }
- }
- total_count+=total_move+total_swap;
- for (PositionIndex j=1; j<=m; j++)
- for (PositionIndex i=0; i<=l; i++)
- dtcount(i, j) += ((i==msc(j)) ? (total_count
- -(negmove[j]+negswap[j])) : (cswap(i, j)+cmove(i, j)));
- for (PositionIndex i=1; i<=l; i++) {
- LogProb temp=minus1fert[i]+plus1fert[i];
- if (msc.fert(i)<MAX_FERTILITY)
- ncount(i, msc.fert(i))+=total_count-temp;
- if (msc.fert(i)>0&&msc.fert(i)-1<MAX_FERTILITY)
- ncount(i, msc.fert(i)-1)+=minus1fert[i];
- else if (minus1fert[i]!=0.0)
- cerr << "ERROR: M1Fa: " << minus1fert[i] << ' ' << i << ' '
- << msc.fert(i)<< endl;
- if (msc.fert(i)+1<MAX_FERTILITY)
- ncount(i, msc.fert(i)+1)+=plus1fert[i];
- }
- LogProb temp=minus1fert[0]+plus1fert[0];
- p1count += (total_count-temp)*(LogProb)msc.fert(0);
- p0count += (total_count-temp)*(LogProb)(m-2*msc.fert(0));
- if (msc.fert(0)>0) {
- p1count += (minus1fert[0])*(LogProb)(msc.fert(0)-1);
- p0count += (minus1fert[0])*(LogProb)(m-2*(msc.fert(0)-1));
- } else if (minus1fert[0]!=0.0)
- cerr << "ERROR: M1Fb: " << minus1fert[0] << endl;
- if (int(m)-2*(int(msc.fert(0))+1)>=0) {
- p1count += (plus1fert[0])*(LogProb)(msc.fert(0)+1);
- p0count += (plus1fert[0])*(LogProb)(m-2*(msc.fert(0)+1));
- }
- msc.check();
- return nAl;
+ const MoveSwapMatrix<TRANSPAIR>&msc, LogProb ascore,
+ Array2<LogProb,Vector<LogProb> >&dtcount,
+ Array2<LogProb,Vector<LogProb> >&ncount, LogProb&p1count,
+ LogProb&p0count, LogProb&total_count)
+{
+ int nAl=0;
+ const PositionIndex l=msc.get_l(), m=msc.get_m();
+ Array2<LogProb,Vector<LogProb> > cmove(l+1, m+1), cswap(l+1, m+1);
+ Vector<LogProb> negmove(m+1),negswap(m+1),plus1fert(l+1),minus1fert(l+1);
+ LogProb total_move, total_swap;
+ if (msc.isCenterDeleted()==0) {
+ total_move+=ascore;
+ nAl++;
+ }
+ for (PositionIndex j=1; j<=m; j++) {
+ for (PositionIndex i=0; i<=l; i++) {
+ if (msc(j)!=i && !msc.isDelMove(i, j) ) {
+ LogProb newscore=ascore*msc.cmove(i, j);
+ total_move+=newscore;
+ nAl++;
+ cmove(i, j)+=newscore;
+ negmove[j]+=newscore;
+ plus1fert[i]+=newscore;
+ minus1fert[msc(j)]+=newscore;
+ }
+ }
+ }
+ for (PositionIndex j1=1; j1<=m; j1++) {
+ for (PositionIndex j2=j1+1; j2<=m; j2++) {
+ if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
+ LogProb newscore=ascore*msc.cswap(j1, j2);
+ total_swap+=newscore;
+ nAl++;
+ cswap(msc(j1), j2)+=newscore;
+ cswap(msc(j2), j1)+=newscore;
+ negswap[j1]+=newscore;
+ negswap[j2]+=newscore;
+ }
+ }
+ }
+ total_count+=total_move+total_swap;
+ for (PositionIndex j=1; j<=m; j++)
+ for (PositionIndex i=0; i<=l; i++)
+ dtcount(i, j) += ((i==msc(j)) ? (total_count
+ -(negmove[j]+negswap[j])) : (cswap(i, j)+cmove(i, j)));
+ for (PositionIndex i=1; i<=l; i++) {
+ LogProb temp=minus1fert[i]+plus1fert[i];
+ if (msc.fert(i)<MAX_FERTILITY)
+ ncount(i, msc.fert(i))+=total_count-temp;
+ if (msc.fert(i)>0&&msc.fert(i)-1<MAX_FERTILITY)
+ ncount(i, msc.fert(i)-1)+=minus1fert[i];
+ else if (minus1fert[i]!=0.0)
+ cerr << "ERROR: M1Fa: " << minus1fert[i] << ' ' << i << ' '
+ << msc.fert(i)<< endl;
+ if (msc.fert(i)+1<MAX_FERTILITY)
+ ncount(i, msc.fert(i)+1)+=plus1fert[i];
+ }
+ LogProb temp=minus1fert[0]+plus1fert[0];
+ p1count += (total_count-temp)*(LogProb)msc.fert(0);
+ p0count += (total_count-temp)*(LogProb)(m-2*msc.fert(0));
+ if (msc.fert(0)>0) {
+ p1count += (minus1fert[0])*(LogProb)(msc.fert(0)-1);
+ p0count += (minus1fert[0])*(LogProb)(m-2*(msc.fert(0)-1));
+ } else if (minus1fert[0]!=0.0)
+ cerr << "ERROR: M1Fb: " << minus1fert[0] << endl;
+ if (int(m)-2*(int(msc.fert(0))+1)>=0) {
+ p1count += (plus1fert[0])*(LogProb)(msc.fert(0)+1);
+ p0count += (plus1fert[0])*(LogProb)(m-2*(msc.fert(0)+1));
+ }
+ msc.check();
+ return nAl;
}
;
template<class TRANSPAIR> double collectCountsOverNeighborhoodForSophisticatedModels(
- const MoveSwapMatrix<TRANSPAIR>&, LogProb, void*) {
- return 0.0;
+ const MoveSwapMatrix<TRANSPAIR>&, LogProb, void*)
+{
+ return 0.0;
}
template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
- const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
- const TRANSPAIR&ef, LogProb normalized_ascore, d4model*d4Table) {
- Mmsc.check();
- const PositionIndex m=msc.get_m(), l=msc.get_l();
- for (PositionIndex j=1; j<=m; ++j)
- if (msc(j)!=0)
- if (msc.get_head(msc(j))==j) {
- int ep=msc.prev_cept(msc(j));
- d4Table->augCountRef_first(j, msc.get_center(ep),
- d4Table->ewordclasses->getClass(ef.get_es(ep)),
- d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
- } else {
- //massert( &d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) == ef.getCountSecond(j,msc.prev_in_cept(j) ));
- d4Table->augCountRef_bigger(j, msc.prev_in_cept(j), 0,
- d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
- }
+ const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
+ const TRANSPAIR&ef, LogProb normalized_ascore, d4model*d4Table)
+{
+ Mmsc.check();
+ const PositionIndex m=msc.get_m(), l=msc.get_l();
+ for (PositionIndex j=1; j<=m; ++j)
+ if (msc(j)!=0)
+ if (msc.get_head(msc(j))==j) {
+ int ep=msc.prev_cept(msc(j));
+ d4Table->augCountRef_first(j, msc.get_center(ep),
+ d4Table->ewordclasses->getClass(ef.get_es(ep)),
+ d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
+ } else {
+ //massert( &d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) == ef.getCountSecond(j,msc.prev_in_cept(j) ));
+ d4Table->augCountRef_bigger(j, msc.prev_in_cept(j), 0,
+ d4Table->fwordclasses->getClass(ef.get_fs(j)), l, m,normalized_ascore);
+ }
}
template<class TRANSPAIR> void _collectCountsOverNeighborhoodForSophisticatedModels(
- const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
- const TRANSPAIR&ef, LogProb normalized_ascore, d5model*d5Table) {
- Mmsc.check();
- _collectCountsOverNeighborhoodForSophisticatedModels(Mmsc, msc, ef,
- normalized_ascore, &d5Table->d4m);
- Mmsc.check();
- const PositionIndex m=msc.get_m(), l=msc.get_l();
- PositionIndex prev_cept=0;
- PositionIndex vac_all=m;
- Vector<char> vac(m+1,0);
- for (PositionIndex i=1; i<=l; i++) {
- PositionIndex cur_j=msc.als_i[i];
- PositionIndex prev_j=0;
- PositionIndex k=0;
- if (cur_j) { // process first word of cept
- k++;
- d5Table->getCountRef_first(vacancies(vac, cur_j), vacancies(vac,
- msc.get_center(prev_cept)),
- d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
- vac_all-msc.fert(i)+k) +=normalized_ascore;
- vac_all--;
- assert(vac[cur_j]==0);
- vac[cur_j]=1;
- Mmsc.check();
- prev_j=cur_j;
- cur_j=msc.als_j[cur_j].next;
- }
- while (cur_j) { // process following words of cept
- k++;
- int vprev=vacancies(vac, prev_j);
- d5Table->getCountRef_bigger(vacancies(vac, cur_j), vprev,
- d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
- vac_all-vprev/*war weg*/-msc.fert(i)+k)+=normalized_ascore;
- vac_all--;
- vac[cur_j]=1;
- Mmsc.check();
- prev_j=cur_j;
- cur_j=msc.als_j[cur_j].next;
- }
- assert(k==msc.fert(i));
- if (k)
- prev_cept=i;
- }
- assert(vac_all==msc.fert(0));
+ const MoveSwapMatrix<TRANSPAIR>&Mmsc, const alignment&msc,
+ const TRANSPAIR&ef, LogProb normalized_ascore, d5model*d5Table)
+{
+ Mmsc.check();
+ _collectCountsOverNeighborhoodForSophisticatedModels(Mmsc, msc, ef,
+ normalized_ascore, &d5Table->d4m);
+ Mmsc.check();
+ const PositionIndex m=msc.get_m(), l=msc.get_l();
+ PositionIndex prev_cept=0;
+ PositionIndex vac_all=m;
+ Vector<char> vac(m+1,0);
+ for (PositionIndex i=1; i<=l; i++) {
+ PositionIndex cur_j=msc.als_i[i];
+ PositionIndex prev_j=0;
+ PositionIndex k=0;
+ if (cur_j) { // process first word of cept
+ k++;
+ d5Table->getCountRef_first(vacancies(vac, cur_j), vacancies(vac,
+ msc.get_center(prev_cept)),
+ d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
+ vac_all-msc.fert(i)+k) +=normalized_ascore;
+ vac_all--;
+ assert(vac[cur_j]==0);
+ vac[cur_j]=1;
+ Mmsc.check();
+ prev_j=cur_j;
+ cur_j=msc.als_j[cur_j].next;
+ }
+ while (cur_j) { // process following words of cept
+ k++;
+ int vprev=vacancies(vac, prev_j);
+ d5Table->getCountRef_bigger(vacancies(vac, cur_j), vprev,
+ d5Table->fwordclasses->getClass(ef.get_fs(cur_j)), l, m,
+ vac_all-vprev/*war weg*/-msc.fert(i)+k)+=normalized_ascore;
+ vac_all--;
+ vac[cur_j]=1;
+ Mmsc.check();
+ prev_j=cur_j;
+ cur_j=msc.als_j[cur_j].next;
+ }
+ assert(k==msc.fert(i));
+ if (k)
+ prev_cept=i;
+ }
+ assert(vac_all==msc.fert(0));
}
extern int NumberOfAlignmentsInSophisticatedCountCollection;
template<class TRANSPAIR, class MODEL> double collectCountsOverNeighborhoodForSophisticatedModels(
- const MoveSwapMatrix<TRANSPAIR>&msc, LogProb normalized_ascore,
- MODEL*d5Table) {
- const PositionIndex m=msc.get_m(), l=msc.get_l();
- alignment x(msc);
- double sum=0;
- msc.check();
- if ( !msc.isCenterDeleted() ) {
- _collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc, x,
- msc.get_ef(), normalized_ascore, d5Table);
- NumberOfAlignmentsInSophisticatedCountCollection++;
- sum+=normalized_ascore;
- }
- msc.check();
- for (WordIndex j=1; j<=m; j++)
- for (WordIndex i=0; i<=l; i++) {
- WordIndex old=x(j);
- if (i!=old&& !msc.isDelMove(i, j) ) {
- msc.check();
- double c=msc.cmove(i, j)*normalized_ascore;
- if (c > COUNTINCREASE_CUTOFF_AL) {
- x.set(j, i);
- _collectCountsOverNeighborhoodForSophisticatedModels<
- TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
- NumberOfAlignmentsInSophisticatedCountCollection++;
- x.set(j, old);
- sum+=c;
- }
- msc.check();
- }
- }
- for (PositionIndex j1=1; j1<=m; j1++) {
- for (PositionIndex j2=j1+1; j2<=m; j2++) {
- if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
- double c=msc.cswap(j1, j2)*normalized_ascore;
- msc.check();
- if (c > COUNTINCREASE_CUTOFF_AL) {
- int old1=msc(j1), old2=msc(j2);
- x.set(j1, old2);
- x.set(j2, old1);
- _collectCountsOverNeighborhoodForSophisticatedModels<
- TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
- NumberOfAlignmentsInSophisticatedCountCollection++;
- x.set(j1, old1);
- x.set(j2, old2);
- sum+=c;
- }
- msc.check();
- }
- }
- }
- msc.check();
- return sum;
+ const MoveSwapMatrix<TRANSPAIR>&msc, LogProb normalized_ascore,
+ MODEL*d5Table)
+{
+ const PositionIndex m=msc.get_m(), l=msc.get_l();
+ alignment x(msc);
+ double sum=0;
+ msc.check();
+ if ( !msc.isCenterDeleted() ) {
+ _collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc, x,
+ msc.get_ef(), normalized_ascore, d5Table);
+ NumberOfAlignmentsInSophisticatedCountCollection++;
+ sum+=normalized_ascore;
+ }
+ msc.check();
+ for (WordIndex j=1; j<=m; j++)
+ for (WordIndex i=0; i<=l; i++) {
+ WordIndex old=x(j);
+ if (i!=old&& !msc.isDelMove(i, j) ) {
+ msc.check();
+ double c=msc.cmove(i, j)*normalized_ascore;
+ if (c > COUNTINCREASE_CUTOFF_AL) {
+ x.set(j, i);
+ _collectCountsOverNeighborhoodForSophisticatedModels<
+ TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
+ NumberOfAlignmentsInSophisticatedCountCollection++;
+ x.set(j, old);
+ sum+=c;
+ }
+ msc.check();
+ }
+ }
+ for (PositionIndex j1=1; j1<=m; j1++) {
+ for (PositionIndex j2=j1+1; j2<=m; j2++) {
+ if (msc(j1)!=msc(j2) && !msc.isDelSwap(j1, j2) ) {
+ double c=msc.cswap(j1, j2)*normalized_ascore;
+ msc.check();
+ if (c > COUNTINCREASE_CUTOFF_AL) {
+ int old1=msc(j1), old2=msc(j2);
+ x.set(j1, old2);
+ x.set(j2, old1);
+ _collectCountsOverNeighborhoodForSophisticatedModels<
+ TRANSPAIR>(msc, x, msc.get_ef(), c, d5Table);
+ NumberOfAlignmentsInSophisticatedCountCollection++;
+ x.set(j1, old1);
+ x.set(j2, old2);
+ sum+=c;
+ }
+ msc.check();
+ }
+ }
+ }
+ msc.check();
+ return sum;
}
template<class TRANSPAIR, class MODEL> int collectCountsOverNeighborhood(
- const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
- Vector<WordIndex>&es, Vector<WordIndex>&fs, tmodel<COUNT,PROB>&tTable,
- amodel<COUNT>&aCountTable, amodel<COUNT>&dCountTable,
- nmodel<COUNT>&nCountTable, SyncDouble&p1count, SyncDouble&p0count,
- LogProb&_total, float count, bool addCounts, MODEL*d4Table) {
- int nAl=0;
- const PositionIndex l=es.size()-1, m=fs.size()-1;
- Array2<LogProb,Vector<LogProb> > dtcount(l+1, m+1), ncount(l+1,
- MAX_FERTILITY+1);
- LogProb p0=0, p1=0, all_total=0;
- for (unsigned int i=0; i<smsc.size(); ++i) {
- LogProb this_total=0;
- nAl+=collectCountsOverNeighborhood(*smsc[i].first, smsc[i].second,
- dtcount, ncount, p1, p0, this_total);
- all_total+=this_total;
- }
- _total=all_total;
- all_total/=(double)count;
- double sum2=0;
- if (addCounts && d4Table) {
- for (unsigned int i=0; i<smsc.size(); ++i) {
- //for(WordIndex j=1;j<=m;j++)for(WordIndex ii=0;ii<=l;ii++)
- // (*smsc[i].first).cmove(ii,j);
- sum2+=collectCountsOverNeighborhoodForSophisticatedModels(
- *smsc[i].first, smsc[i].second/all_total, d4Table);
- }
- if (!(fabs(count-sum2)<0.05))
- cerr << "WARNING: DIFFERENT SUMS: (" << count << ") (" << sum2
- << ")\n";
- }
+ const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
+ Vector<WordIndex>&es, Vector<WordIndex>&fs, tmodel<COUNT,PROB>&tTable,
+ amodel<COUNT>&aCountTable, amodel<COUNT>&dCountTable,
+ nmodel<COUNT>&nCountTable, SyncDouble&p1count, SyncDouble&p0count,
+ LogProb&_total, float count, bool addCounts, MODEL*d4Table)
+{
+ int nAl=0;
+ const PositionIndex l=es.size()-1, m=fs.size()-1;
+ Array2<LogProb,Vector<LogProb> > dtcount(l+1, m+1), ncount(l+1,
+ MAX_FERTILITY+1);
+ LogProb p0=0, p1=0, all_total=0;
+ for (unsigned int i=0; i<smsc.size(); ++i) {
+ LogProb this_total=0;
+ nAl+=collectCountsOverNeighborhood(*smsc[i].first, smsc[i].second,
+ dtcount, ncount, p1, p0, this_total);
+ all_total+=this_total;
+ }
+ _total=all_total;
+ all_total/=(double)count;
+ double sum2=0;
+ if (addCounts && d4Table) {
+ for (unsigned int i=0; i<smsc.size(); ++i) {
+ //for(WordIndex j=1;j<=m;j++)for(WordIndex ii=0;ii<=l;ii++)
+ // (*smsc[i].first).cmove(ii,j);
+ sum2+=collectCountsOverNeighborhoodForSophisticatedModels(
+ *smsc[i].first, smsc[i].second/all_total, d4Table);
+ }
+ if (!(fabs(count-sum2)<0.05))
+ cerr << "WARNING: DIFFERENT SUMS: (" << count << ") (" << sum2
+ << ")\n";
+ }
- /**
- NOTE! HERE IS THE UPDATE PROCESSï¼
- */
- if (addCounts) {
- for (PositionIndex i=0; i<=l; i++) {
- for (PositionIndex j=1; j<=m; j++) {
- LogProb ijadd=dtcount(i, j)/all_total;
- if (ijadd>COUNTINCREASE_CUTOFF_AL) {
- tTable.incCount(es[i], fs[j], ijadd);
- dCountTable.addValue(j, i, l, m, ijadd);
- aCountTable.addValue(i, j, l, m, ijadd);
- }
- }
- if (i>0)
- for (PositionIndex n=0; n<MAX_FERTILITY; n++)
- nCountTable.addValue(es[i], n, ncount(i, n)/all_total);
- }
- p0count+=p0/all_total;
- p1count+=p1/all_total;
- }
- return nAl;
+ /**
+ NOTE! HERE IS THE UPDATE PROCESSï¼
+ */
+ if (addCounts) {
+ for (PositionIndex i=0; i<=l; i++) {
+ for (PositionIndex j=1; j<=m; j++) {
+ LogProb ijadd=dtcount(i, j)/all_total;
+ if (ijadd>COUNTINCREASE_CUTOFF_AL) {
+ tTable.incCount(es[i], fs[j], ijadd);
+ dCountTable.addValue(j, i, l, m, ijadd);
+ aCountTable.addValue(i, j, l, m, ijadd);
+ }
+ }
+ if (i>0)
+ for (PositionIndex n=0; n<MAX_FERTILITY; n++)
+ nCountTable.addValue(es[i], n, ncount(i, n)/all_total);
+ }
+ p0count+=p0/all_total;
+ p1count+=p1/all_total;
+ }
+ return nAl;
}
diff --git a/mgizapp/src/collCounts.h b/mgizapp/src/collCounts.h
index 9a0529b..0c51b94 100644
--- a/mgizapp/src/collCounts.h
+++ b/mgizapp/src/collCounts.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -31,14 +31,14 @@ USA.
class OneMoveSwap
{
- public:
+public:
short type;
short a,b;
OneMoveSwap(short _type,short _a,short _b)
: type(_type),a(_a),b(_b)
- {}
+ {}
OneMoveSwap()
- : type(0){}
+ : type(0) {}
};
inline bool operator<(const OneMoveSwap&a,const OneMoveSwap&b)
@@ -62,7 +62,7 @@ inline ostream&operator<<(ostream&out,const OneMoveSwap&o)
inline ostream &operator<<(ostream &out,const set<OneMoveSwap>&s)
{
- for(set<OneMoveSwap>::const_iterator i=s.begin();i!=s.end();++i)
+ for(set<OneMoveSwap>::const_iterator i=s.begin(); i!=s.end(); ++i)
cout << *i << ' ';
return out;
}
@@ -71,10 +71,10 @@ bool makeOneMoveSwap(const alignment&a,const alignment&b,set<OneMoveSwap>&oms);
template<class TRANSPAIR,class MODEL>
int collectCountsOverNeighborhood(const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
- Vector<WordIndex>&es,
- Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,
- amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,
- nmodel<COUNT>&nCountTable,double&p1count,double&p0count,
- LogProb&_total,float count,bool addCounts,MODEL*d4Table=0);
+ Vector<WordIndex>&es,
+ Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,
+ amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,
+ nmodel<COUNT>&nCountTable,double&p1count,double&p0count,
+ LogProb&_total,float count,bool addCounts,MODEL*d4Table=0);
#endif
diff --git a/mgizapp/src/d4norm.cxx b/mgizapp/src/d4norm.cxx
index a790a62..b061416 100644
--- a/mgizapp/src/d4norm.cxx
+++ b/mgizapp/src/d4norm.cxx
@@ -51,78 +51,82 @@ GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detai
GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
- "maximal fertility for fertility models", PARLEV_EM, 10);
+ "maximal fertility for fertility models", PARLEV_EM, 10);
using namespace std;
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
- TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
- SourceVocabClassesFilename, TargetVocabClassesFilename,
- a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
-
-
-int main(int argc, char* argv[]){
- if(argc < 5){
- cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
- return 1;
- }
- WordClasses ewc,fwc;
- d4model d4m(MAX_SENTENCE_LENGTH,ewc,fwc);
- Vector<WordEntry> evlist,fvlist;
- vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
- TargetVocabFilename = argv[2];
- SourceVocabFilename = argv[1];
- eTrainVcbList.setName(argv[1]);
- fTrainVcbList.setName(argv[2]);
- eTrainVcbList.readVocabList();
- fTrainVcbList.readVocabList();
- SourceVocabClassesFilename = argv[1];
- TargetVocabClassesFilename = argv[2];
- SourceVocabClassesFilename += ".classes";
- TargetVocabClassesFilename += ".classes";
- d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str(),eTrainVcbList,fTrainVcbList);
- // Start iteration:
- for(int i =4; i< argc ; i++){
- string name = argv[i];
- string nameA = name ;
- string nameB = name + ".b";
- if(d4m.augCount(nameA.c_str(),nameB.c_str())){
- cerr << "Loading (d4) table " << nameA << "/" << nameB << " OK" << endl;
-
- }else{
- cerr << "ERROR Loading (d) table " << nameA << " " << nameB << endl;
- }
- }
-
- d4m.normalizeTable();
- string DiffOPath = argv[3];
- string diff1 = DiffOPath;
- string diff2 = DiffOPath+".b";
- cerr << "Outputing d4 table to " << diff1 << " " << diff2;
- d4m.printProbTable(diff1.c_str(),diff2.c_str());
-
-
+ TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
+ SourceVocabClassesFilename, TargetVocabClassesFilename,
+ a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
+
+
+int main(int argc, char* argv[])
+{
+ if(argc < 5) {
+ cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
+ return 1;
+ }
+ WordClasses ewc,fwc;
+ d4model d4m(MAX_SENTENCE_LENGTH,ewc,fwc);
+ Vector<WordEntry> evlist,fvlist;
+ vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
+ TargetVocabFilename = argv[2];
+ SourceVocabFilename = argv[1];
+ eTrainVcbList.setName(argv[1]);
+ fTrainVcbList.setName(argv[2]);
+ eTrainVcbList.readVocabList();
+ fTrainVcbList.readVocabList();
+ SourceVocabClassesFilename = argv[1];
+ TargetVocabClassesFilename = argv[2];
+ SourceVocabClassesFilename += ".classes";
+ TargetVocabClassesFilename += ".classes";
+ d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str(),eTrainVcbList,fTrainVcbList);
+ // Start iteration:
+ for(int i =4; i< argc ; i++) {
+ string name = argv[i];
+ string nameA = name ;
+ string nameB = name + ".b";
+ if(d4m.augCount(nameA.c_str(),nameB.c_str())) {
+ cerr << "Loading (d4) table " << nameA << "/" << nameB << " OK" << endl;
+
+ } else {
+ cerr << "ERROR Loading (d) table " << nameA << " " << nameB << endl;
+ }
+ }
+
+ d4m.normalizeTable();
+ string DiffOPath = argv[3];
+ string diff1 = DiffOPath;
+ string diff2 = DiffOPath+".b";
+ cerr << "Outputing d4 table to " << diff1 << " " << diff2;
+ d4m.printProbTable(diff1.c_str(),diff2.c_str());
+
+
}
// Some utility functions to get it compile..
ofstream logmsg;
-const string str2Num(int n) {
- string number = "";
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- return (number);
+const string str2Num(int n)
+{
+ string number = "";
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ return (number);
}
double LAMBDA=1.09;
Vector<map< pair<int,int>,char > > ReferenceAlignment;
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
- const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
- int&eventsMissing, int&eventsToomuch, int pair_no){
- return 0;
- }
+ const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
+ int&eventsMissing, int&eventsToomuch, int pair_no)
+{
+ return 0;
+}
-void printGIZAPars(ostream&out){
+void printGIZAPars(ostream&out)
+{
}
diff --git a/mgizapp/src/defs.h b/mgizapp/src/defs.h
index 5fbc31d..98a9ae1 100644
--- a/mgizapp/src/defs.h
+++ b/mgizapp/src/defs.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -46,23 +46,44 @@ extern double LAMBDA; // Lambda that is used to scale cross_entropy factor
typedef float PROB ;
typedef float COUNT ;
-
-class LogProb {
- private:
+
+class LogProb
+{
+private:
double x ;
- public:
- LogProb():x(0){}
- LogProb(double y):x(y){}
- LogProb(float y):x(y){}
- LogProb(int y):x(y){}
- LogProb(WordIndex y):x(y){}
- operator double() const {return x;}
- LogProb operator *= (double y) { x *= y ; return *this;}
- LogProb operator *= (LogProb y) { x *= y.x ; return *this;}
- LogProb operator /= (double y) { x /= y ; return *this;}
- LogProb operator /= (LogProb y) { x /= y.x ; return *this;}
- LogProb operator += (double y) { x += y ; return *this;}
- LogProb operator += (LogProb y) { x += y.x ; return *this;}
+public:
+ LogProb():x(0) {}
+ LogProb(double y):x(y) {}
+ LogProb(float y):x(y) {}
+ LogProb(int y):x(y) {}
+ LogProb(WordIndex y):x(y) {}
+ operator double() const {
+ return x;
+ }
+ LogProb operator *= (double y) {
+ x *= y ;
+ return *this;
+ }
+ LogProb operator *= (LogProb y) {
+ x *= y.x ;
+ return *this;
+ }
+ LogProb operator /= (double y) {
+ x /= y ;
+ return *this;
+ }
+ LogProb operator /= (LogProb y) {
+ x /= y.x ;
+ return *this;
+ }
+ LogProb operator += (double y) {
+ x += y ;
+ return *this;
+ }
+ LogProb operator += (LogProb y) {
+ x += y.x ;
+ return *this;
+ }
};
const int PARLEV_ITER=1;
@@ -75,4 +96,4 @@ const int PARLEV_SPECIAL=7;
const int PARLEV_INPUT=8;
#endif
-
+
diff --git a/mgizapp/src/file_spec.h b/mgizapp/src/file_spec.h
index 4c5e625..8e689b4 100644
--- a/mgizapp/src/file_spec.h
+++ b/mgizapp/src/file_spec.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -30,30 +30,31 @@ USA.
/* This function returns a string, locally called file_spec. This
string is the concatenation of the date and time of execution
and the user who is performing the execution */
-/* Originally implemented in C by Yaser Al-Onaizan;
+/* Originally implemented in C by Yaser Al-Onaizan;
editions for C++ and formatting by Noah A. Smith, 9 July 1999 */
-char *Get_File_Spec (){
+char *Get_File_Spec ()
+{
struct tm *local;
time_t t;
const char *user;
char time_stmp[57];
char *file_spec = 0;
-
+
t = time(NULL);
local = localtime(&t);
-
- sprintf(time_stmp, "%02d-%02d-%02d.%02d%02d%02d.", local->tm_year,
- (local->tm_mon + 1), local->tm_mday, local->tm_hour,
- local->tm_min, local->tm_sec);
+
+ sprintf(time_stmp, "%02d-%02d-%02d.%02d%02d%02d.", local->tm_year,
+ (local->tm_mon + 1), local->tm_mday, local->tm_hour,
+ local->tm_min, local->tm_sec);
#ifdef WIN32
user = "WINUSER";
#else
user = getenv("USER");
#endif
- file_spec = (char *)malloc(sizeof(char) *
- (strlen(time_stmp) + strlen(user) + 1));
+ file_spec = (char *)malloc(sizeof(char) *
+ (strlen(time_stmp) + strlen(user) + 1));
file_spec[0] = '\0';
strcat(file_spec, time_stmp) ;
strcat(file_spec, user);
diff --git a/mgizapp/src/getSentence.cpp b/mgizapp/src/getSentence.cpp
index 24e15db..245cdc5 100644
--- a/mgizapp/src/getSentence.cpp
+++ b/mgizapp/src/getSentence.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -50,336 +50,322 @@ GLOBAL_PARAMETER(double,ManlexMAX_MULTIPLICITY,"manlexMAX_MULTIPLICITY","",PARLE
GLOBAL_PARAMETER(double,Manlexfactor1,"manlexfactor1","",PARLEV_EM,0.0);
GLOBAL_PARAMETER(double,Manlexfactor2,"manlexfactor2","",PARLEV_EM,0.0);
-sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
- vcbList* flist) : realCount(0)
- // This method is the constructor of the class, it also intitializes the
+sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
+ vcbList* flist) : realCount(0)
+ // This method is the constructor of the class, it also intitializes the
// sentence pair sequential number (count) to zero.
{
- readsent_mutex=new boost::mutex();
- setprob_mutex = new boost::mutex();
-
- position = 0;
- readflag = false ;
- allInMemory = false ;
- inputFilename = filename ;
- inputFile = new ifstream(filename);
- pair_no = 0 ;
- if(!(*inputFile)){
- cerr << "\nERROR:(a) Cannot open " << filename;
- exit(1);
- }
- currentSentence = 0;
- totalPairs1 = 0 ;
- totalPairs2 =0;
- pair_no = 0 ;
- noSentInBuffer = 0 ;
- Buffer.clear();
- bool isNegative=0;
- std::set<WordIndex> evoc,fvoc;
- evoc.insert(0);
- fvoc.insert(0);
- if (elist && flist){
- cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
- sentPair s ;
- while (getNextSentence(s, elist, flist))
- {
- for(int i = 0 ; i< s.eSent.size() ; i++){
- evoc.insert(s.eSent[i]);
- }
- for(int i = 0 ; i< s.fSent.size() ; i++){
- fvoc.insert(s.fSent[i]);
- }
- totalPairs1++;
- totalPairs2+=s.realCount;
- // NOTE: this value might change during training
- // for words from the manual dictionary, yet this is ignored!
-
- if( s.noOcc<0 )
- isNegative=1;
- }
- }
- if( isNegative==1 )
- {
- cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
- realCount=new Vector<double>(totalPairs1,1.0);
+ readsent_mutex=new boost::mutex();
+ setprob_mutex = new boost::mutex();
+
+ position = 0;
+ readflag = false ;
+ allInMemory = false ;
+ inputFilename = filename ;
+ inputFile = new ifstream(filename);
+ pair_no = 0 ;
+ if(!(*inputFile)) {
+ cerr << "\nERROR:(a) Cannot open " << filename;
+ exit(1);
+ }
+ currentSentence = 0;
+ totalPairs1 = 0 ;
+ totalPairs2 =0;
+ pair_no = 0 ;
+ noSentInBuffer = 0 ;
+ Buffer.clear();
+ bool isNegative=0;
+ std::set<WordIndex> evoc,fvoc;
+ evoc.insert(0);
+ fvoc.insert(0);
+ if (elist && flist) {
+ cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
+ sentPair s ;
+ while (getNextSentence(s, elist, flist)) {
+ for(int i = 0 ; i< s.eSent.size() ; i++) {
+ evoc.insert(s.eSent[i]);
+ }
+ for(int i = 0 ; i< s.fSent.size() ; i++) {
+ fvoc.insert(s.fSent[i]);
+ }
+ totalPairs1++;
+ totalPairs2+=s.realCount;
+ // NOTE: this value might change during training
+ // for words from the manual dictionary, yet this is ignored!
+
+ if( s.noOcc<0 )
+ isNegative=1;
}
- else
- realCount=0;
- elist->compact(evoc);
- flist->compact(fvoc);
+ }
+ if( isNegative==1 ) {
+ cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
+ realCount=new Vector<double>(totalPairs1,1.0);
+ } else
+ realCount=0;
+ elist->compact(evoc);
+ flist->compact(fvoc);
}
sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
- vcbList* flist,std::set<WordIndex>& eapp, std::set<WordIndex>& fapp) : realCount(0)
- // This method is the constructor of the class, it also intitializes the
- // sentence pair sequential number (count) to z
-{
- readsent_mutex=new boost::mutex();
- setprob_mutex=new boost::mutex();
- position = 0;
- readflag = false ;
- allInMemory = false ;
- inputFilename = filename ;
- inputFile = new ifstream(filename);
- pair_no = 0 ;
- if(!(*inputFile)){
- cerr << "\nERROR:(a) Cannot open " << filename;
- exit(1);
- }
- currentSentence = 0;
- totalPairs1 = 0 ;
- totalPairs2 =0;
- pair_no = 0 ;
- noSentInBuffer = 0 ;
- Buffer.clear();
- bool isNegative=0;
- if (elist && flist){
- cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
- sentPair s ;
- while (getNextSentence(s, elist, flist))
- {
- int k;
- for(k=0;k<s.eSent.size();k++){
- eapp.insert(s.eSent[k]);
- }
- for(k=0;k<s.fSent.size();k++){
- fapp.insert(s.fSent[k]);
- }
- totalPairs1++;
- totalPairs2+=s.realCount;
- // NOTE: this value might change during training
- // for words from the manual dictionary, yet this is ignored!
-
- if( s.noOcc<0 )
- isNegative=1;
- }
- }
- if( isNegative==1 )
- {
- cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
- realCount=new Vector<double>(totalPairs1,1.0);
- }
- else
- realCount=0;
+ vcbList* flist,std::set<WordIndex>& eapp, std::set<WordIndex>& fapp) : realCount(0)
+ // This method is the constructor of the class, it also intitializes the
+ // sentence pair sequential number (count) to z
+{
+ readsent_mutex=new boost::mutex();
+ setprob_mutex=new boost::mutex();
+ position = 0;
+ readflag = false ;
+ allInMemory = false ;
+ inputFilename = filename ;
+ inputFile = new ifstream(filename);
+ pair_no = 0 ;
+ if(!(*inputFile)) {
+ cerr << "\nERROR:(a) Cannot open " << filename;
+ exit(1);
+ }
+ currentSentence = 0;
+ totalPairs1 = 0 ;
+ totalPairs2 =0;
+ pair_no = 0 ;
+ noSentInBuffer = 0 ;
+ Buffer.clear();
+ bool isNegative=0;
+ if (elist && flist) {
+ cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
+ sentPair s ;
+ while (getNextSentence(s, elist, flist)) {
+ int k;
+ for(k=0; k<s.eSent.size(); k++) {
+ eapp.insert(s.eSent[k]);
+ }
+ for(k=0; k<s.fSent.size(); k++) {
+ fapp.insert(s.fSent[k]);
+ }
+ totalPairs1++;
+ totalPairs2+=s.realCount;
+ // NOTE: this value might change during training
+ // for words from the manual dictionary, yet this is ignored!
+
+ if( s.noOcc<0 )
+ isNegative=1;
+ }
+ }
+ if( isNegative==1 ) {
+ cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
+ realCount=new Vector<double>(totalPairs1,1.0);
+ } else
+ realCount=0;
}
void sentenceHandler::rewind()
{
- readsent_mutex->lock();
- position = 0;
- currentSentence = 0;
- readflag = false ;
- if (!allInMemory ||
- !(Buffer.size() >= 1 && Buffer[currentSentence].sentenceNo == 1)){
- // check if the buffer doe not already has the first chunk of pairs
- if (Buffer.size() > 0)
- cerr << ' ' << Buffer[currentSentence].sentenceNo << '\n';
- // totalPairs = 0 ;
- pair_no = 0 ;
- noSentInBuffer = 0 ;
- Buffer.clear();
- }
- if (!allInMemory){
- delete inputFile;
- inputFile = new ifstream(inputFilename);
- if(!(*inputFile)){
- cerr << "\nERROR:(b) Cannot open " << inputFilename << " " << (int)errno;
- }
+ readsent_mutex->lock();
+ position = 0;
+ currentSentence = 0;
+ readflag = false ;
+ if (!allInMemory ||
+ !(Buffer.size() >= 1 && Buffer[currentSentence].sentenceNo == 1)) {
+ // check if the buffer doe not already has the first chunk of pairs
+ if (Buffer.size() > 0)
+ cerr << ' ' << Buffer[currentSentence].sentenceNo << '\n';
+ // totalPairs = 0 ;
+ pair_no = 0 ;
+ noSentInBuffer = 0 ;
+ Buffer.clear();
+ }
+ if (!allInMemory) {
+ delete inputFile;
+ inputFile = new ifstream(inputFilename);
+ if(!(*inputFile)) {
+ cerr << "\nERROR:(b) Cannot open " << inputFilename << " " << (int)errno;
}
- readsent_mutex->unlock();
+ }
+ readsent_mutex->unlock();
}
-
+
int sentenceHandler::getNextSentence(sentPair& sent, vcbList* elist, vcbList* flist)
{
- readsent_mutex->lock();
-
- do{
- sentPair s ;
- if (readflag){
- cerr << "Attempting to read from the end of corpus, rewinding\n";
- //rewind();
- break;
- }
- if (currentSentence >= noSentInBuffer){
- if (allInMemory)
- break;
- /* no more sentences in buffer */
- noSentInBuffer = 0 ;
- currentSentence = 0 ;
- Buffer.clear();
- cout << "Reading more sentence pairs into memory ... \n";
- while((noSentInBuffer < TRAIN_BUFFER_SIZE) && readNextSentence(s)){
- if ((s.fSent.size()-1) > (MAX_FERTILITY-1) * (s.eSent.size()-1)){
- cerr << "WARNING: The following sentence pair has source/target sentence length ration more than\n"<<
- "the maximum allowed limit for a source word fertility\n"<<
- " source length = " << s.eSent.size()-1 << " target length = " << s.fSent.size()-1 <<
- " ratio " << double(s.fSent.size()-1)/ (s.eSent.size()-1) << " ferility limit : " <<
- MAX_FERTILITY-1 << '\n';
- cerr << "Shortening sentence \n";
- cerr << s;
- s.eSent.resize(min(s.eSent.size(),s.fSent.size()));
- s.fSent.resize(min(s.eSent.size(),s.fSent.size()));
- }
- Buffer.push_back(s) ;
- if (elist && flist){
- if ((*elist).size() > 0)
- for (WordIndex i= 0 ; i < s.eSent.size() ; i++){
- if (s.eSent[i] >= (*elist).uniqTokens()){
- if( PrintedTooLong++<100)
- cerr << "ERROR: source word " << s.eSent[i] << " is not in the vocabulary list \n";
- exit(-1);
- }
- (*elist).incFreq(s.eSent[i], s.realCount);
- }
- if ((*flist).size() > 0)
- for (WordIndex j= 1 ; j < s.fSent.size() ; j++){
- if (s.fSent[j] >= (*flist).uniqTokens()){
- cerr << "ERROR: target word " << s.fSent[j] << " is not in the vocabulary list \n";
- exit(-1);
- }
- (*flist).incFreq(s.fSent[j], s.realCount);
- }
- }
- noSentInBuffer++;
+ readsent_mutex->lock();
+
+ do {
+ sentPair s ;
+ if (readflag) {
+ cerr << "Attempting to read from the end of corpus, rewinding\n";
+ //rewind();
+ break;
+ }
+ if (currentSentence >= noSentInBuffer) {
+ if (allInMemory)
+ break;
+ /* no more sentences in buffer */
+ noSentInBuffer = 0 ;
+ currentSentence = 0 ;
+ Buffer.clear();
+ cout << "Reading more sentence pairs into memory ... \n";
+ while((noSentInBuffer < TRAIN_BUFFER_SIZE) && readNextSentence(s)) {
+ if ((s.fSent.size()-1) > (MAX_FERTILITY-1) * (s.eSent.size()-1)) {
+ cerr << "WARNING: The following sentence pair has source/target sentence length ration more than\n"<<
+ "the maximum allowed limit for a source word fertility\n"<<
+ " source length = " << s.eSent.size()-1 << " target length = " << s.fSent.size()-1 <<
+ " ratio " << double(s.fSent.size()-1)/ (s.eSent.size()-1) << " ferility limit : " <<
+ MAX_FERTILITY-1 << '\n';
+ cerr << "Shortening sentence \n";
+ cerr << s;
+ s.eSent.resize(min(s.eSent.size(),s.fSent.size()));
+ s.fSent.resize(min(s.eSent.size(),s.fSent.size()));
+ }
+ Buffer.push_back(s) ;
+ if (elist && flist) {
+ if ((*elist).size() > 0)
+ for (WordIndex i= 0 ; i < s.eSent.size() ; i++) {
+ if (s.eSent[i] >= (*elist).uniqTokens()) {
+ if( PrintedTooLong++<100)
+ cerr << "ERROR: source word " << s.eSent[i] << " is not in the vocabulary list \n";
+ exit(-1);
+ }
+ (*elist).incFreq(s.eSent[i], s.realCount);
}
- if (inputFile->eof()){
- allInMemory = (Buffer.size() >= 1 &&
- Buffer[currentSentence].sentenceNo == 1) ;
- if (allInMemory)
- cout << "Corpus fits in memory, corpus has: " << Buffer.size() <<
- " sentence pairs.\n";
+ if ((*flist).size() > 0)
+ for (WordIndex j= 1 ; j < s.fSent.size() ; j++) {
+ if (s.fSent[j] >= (*flist).uniqTokens()) {
+ cerr << "ERROR: target word " << s.fSent[j] << " is not in the vocabulary list \n";
+ exit(-1);
+ }
+ (*flist).incFreq(s.fSent[j], s.realCount);
}
}
- if(noSentInBuffer <= 0 ){
- //cerr << "# sent in buffer " << noSentInBuffer << '\n';
- readflag = true ;
- break;
- }
- sent = Buffer[currentSentence++] ;
- position ++;
- if( sent.noOcc<0 && realCount ){
- if( Manlexfactor1 && sent.noOcc==-1.0 )
- sent.realCount=Manlexfactor1;
- else if( Manlexfactor2 && sent.noOcc==-2.0 )
- sent.realCount=Manlexfactor2;
- else
- sent.realCount=(*realCount)[sent.getSentenceNo()-1];
- }
- readsent_mutex->unlock();
- return position ;
- }while(false);
+ noSentInBuffer++;
+ }
+ if (inputFile->eof()) {
+ allInMemory = (Buffer.size() >= 1 &&
+ Buffer[currentSentence].sentenceNo == 1) ;
+ if (allInMemory)
+ cout << "Corpus fits in memory, corpus has: " << Buffer.size() <<
+ " sentence pairs.\n";
+ }
+ }
+ if(noSentInBuffer <= 0 ) {
+ //cerr << "# sent in buffer " << noSentInBuffer << '\n';
+ readflag = true ;
+ break;
+ }
+ sent = Buffer[currentSentence++] ;
+ position ++;
+ if( sent.noOcc<0 && realCount ) {
+ if( Manlexfactor1 && sent.noOcc==-1.0 )
+ sent.realCount=Manlexfactor1;
+ else if( Manlexfactor2 && sent.noOcc==-2.0 )
+ sent.realCount=Manlexfactor2;
+ else
+ sent.realCount=(*realCount)[sent.getSentenceNo()-1];
+ }
readsent_mutex->unlock();
- return 0;
+ return position ;
+ } while(false);
+ readsent_mutex->unlock();
+ return 0;
}
bool sentenceHandler::readNextSentence(sentPair& sent)
- /* This method reads in a new pair of sentences, each pair is read from the
- corpus file as line triples. The first line the no of times this line
- pair occured in the corpus, the second line is the source sentence and
- the third is the target sentence. The sentences are represented by a space
- separated positive integer token ids. */
+/* This method reads in a new pair of sentences, each pair is read from the
+ corpus file as line triples. The first line the no of times this line
+ pair occured in the corpus, the second line is the source sentence and
+ the third is the target sentence. The sentences are represented by a space
+ separated positive integer token ids. */
{
string line;
bool fail(false) ;
-
+
sent.clear();
vector<string> splits;
- if (getline(*inputFile, line)){
-
- boost::algorithm::split(splits,line,boost::algorithm::is_any_of("|#*"));
-
- if(splits.size() == 1 || splits.size() == 0){
- // continue, no problem
-
- }else if(splits.size()==3){
- line = splits[0];
- }else{
- fail = true;
- return false;
- }
-
+ if (getline(*inputFile, line)) {
+
+ boost::algorithm::split(splits,line,boost::algorithm::is_any_of("|#*"));
+
+ if(splits.size() == 1 || splits.size() == 0) {
+ // continue, no problem
+
+ } else if(splits.size()==3) {
+ line = splits[0];
+ } else {
+ fail = true;
+ return false;
+ }
+
istrstream buffer(line.c_str());
buffer >> sent.noOcc;
- if( sent.noOcc<0 )
- {
- if( realCount )
- {
- if( Manlexfactor1 && sent.noOcc==-1.0 )
- sent.realCount=Manlexfactor1;
- else if( Manlexfactor2 && sent.noOcc==-2.0 )
- sent.realCount=Manlexfactor2;
- else
- {
- sent.realCount=(*realCount)[pair_no];
- }
- }
- else
- sent.realCount=1.0;
- }
- else
+ if( sent.noOcc<0 ) {
+ if( realCount ) {
+ if( Manlexfactor1 && sent.noOcc==-1.0 )
+ sent.realCount=Manlexfactor1;
+ else if( Manlexfactor2 && sent.noOcc==-2.0 )
+ sent.realCount=Manlexfactor2;
+ else {
+ sent.realCount=(*realCount)[pair_no];
+ }
+ } else
+ sent.realCount=1.0;
+ } else
sent.realCount=sent.noOcc;
- }
- else {
+ } else {
fail = true ;;
}
- if (splits.size()==3 || getline(*inputFile, line)){
- if(splits.size()==3){
- line = splits[1];
- }
+ if (splits.size()==3 || getline(*inputFile, line)) {
+ if(splits.size()==3) {
+ line = splits[1];
+ }
istrstream buffer(line.c_str());
WordIndex w; // w is a local variabe for token id
- sent.eSent.push_back(0); // each source word is assumed to have 0 ==
- // a null word (id 0) at the begining of the sentence.
- while(buffer>>w){ // read source sentece , word by word .
+ sent.eSent.push_back(0); // each source word is assumed to have 0 ==
+ // a null word (id 0) at the begining of the sentence.
+ while(buffer>>w) { // read source sentece , word by word .
if (sent.eSent.size() < MAX_SENTENCE_LENGTH)
- sent.eSent.push_back(w);
+ sent.eSent.push_back(w);
else {
- if( PrintedTooLong++<100)
- cerr << "{WARNING:(a)truncated sentence "<<pair_no<<"}";
- //cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
- //cerr << "The following sentence will be truncated\n" << line;
- break ;
+ if( PrintedTooLong++<100)
+ cerr << "{WARNING:(a)truncated sentence "<<pair_no<<"}";
+ //cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
+ //cerr << "The following sentence will be truncated\n" << line;
+ break ;
}
}
- }
- else {
+ } else {
fail = true ;
}
- if (splits.size()==3 ||getline(*inputFile, line)){
- if(splits.size()==3){
- line = splits[2];
- }
+ if (splits.size()==3 ||getline(*inputFile, line)) {
+ if(splits.size()==3) {
+ line = splits[2];
+ }
istrstream buffer(line.c_str());
WordIndex w; // w is a local variabe for token id
sent.fSent.push_back(0); //0 is inserted for program uniformity
- while(buffer>>w){ // read target sentece , word by word .
+ while(buffer>>w) { // read target sentece , word by word .
if (sent.fSent.size() < MAX_SENTENCE_LENGTH)
- sent.fSent.push_back(w);
+ sent.fSent.push_back(w);
else {
- if( PrintedTooLong++<100)
- cerr << "{WARNING:(b)truncated sentence "<<pair_no<<"}";
- //cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
- //cerr << "The following sentence will be truncated\n" << line;
- break ;
+ if( PrintedTooLong++<100)
+ cerr << "{WARNING:(b)truncated sentence "<<pair_no<<"}";
+ //cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
+ //cerr << "The following sentence will be truncated\n" << line;
+ break ;
}
}
- }
- else {
+ } else {
fail = true ;
}
- if (fail){
+ if (fail) {
sent.eSent.clear();
sent.fSent.clear();
sent.sentenceNo = 0 ;
sent.noOcc = 0 ;
sent.realCount=0;
return(false);
- }
+ }
if( sent.eSent.size()==1||sent.fSent.size()==1 )
cerr << "ERROR: Forbidden zero sentence length " << sent.sentenceNo << endl;
sent.sentenceNo = ++pair_no;
- if(pair_no % 100000 == 0)
+ if(pair_no % 100000 == 0)
cout << "[sent:" << sent.sentenceNo << "]"<< '\n';
return true;
}
@@ -387,54 +373,48 @@ bool sentenceHandler::readNextSentence(sentPair& sent)
double optimize_lambda(Vector<double>&vd)
{
Vector<double> l;
- for(double lambda=1.0;lambda<ManlexMAX_MULTIPLICITY;lambda+=0.33)
- {
- double prod=0.0;
- for(unsigned int i=0;i<vd.size();++i)
- {
- prod += vd[i]*exp(lambda*vd[i])/(exp(lambda*vd[i])-1.0);
- }
- l.push_back(fabs(prod-1.0));
+ for(double lambda=1.0; lambda<ManlexMAX_MULTIPLICITY; lambda+=0.33) {
+ double prod=0.0;
+ for(unsigned int i=0; i<vd.size(); ++i) {
+ prod += vd[i]*exp(lambda*vd[i])/(exp(lambda*vd[i])-1.0);
}
+ l.push_back(fabs(prod-1.0));
+ }
double lam=double(min_element(l.begin(),l.end())-l.begin())*0.33+1.0;
- if( lam<1.0 )
- {
- cerr << "ERROR: lambda is smaller than one: " << lam << endl;
- for(unsigned int i=0;i<vd.size();++i)
- cerr << vd[i] << ' ';
- cerr << endl;
- }
+ if( lam<1.0 ) {
+ cerr << "ERROR: lambda is smaller than one: " << lam << endl;
+ for(unsigned int i=0; i<vd.size(); ++i)
+ cerr << vd[i] << ' ';
+ cerr << endl;
+ }
return lam;
}
void sentenceHandler::setProbOfSentence(const sentPair&s,double d)
{
-
- if( realCount==0 )
- return;
- else{
- setprob_mutex->lock();
- if( s.noOcc<=0 )
- {
- double ed=exp(d);
- if( oldPairs.size()>0&&(oldPairs.back().get_eSent()!=s.get_eSent()||oldPairs.back().getSentenceNo()>=s.getSentenceNo()) )
- {
- double lambda=optimize_lambda(oldProbs);
- for(unsigned int i=0;i<oldPairs.size();++i)
- {
- if( oldProbs[i]<1e-5 )
- (*realCount)[oldPairs[i].getSentenceNo()-1]=1.0;
- else
- (*realCount)[oldPairs[i].getSentenceNo()-1]=lambda*oldProbs[i]/(1-exp(-lambda*oldProbs[i]));
- }
- oldPairs.clear();
- oldProbs.clear();
- }
- oldPairs.push_back(s);
- oldProbs.push_back(ed);
+
+ if( realCount==0 )
+ return;
+ else {
+ setprob_mutex->lock();
+ if( s.noOcc<=0 ) {
+ double ed=exp(d);
+ if( oldPairs.size()>0&&(oldPairs.back().get_eSent()!=s.get_eSent()||oldPairs.back().getSentenceNo()>=s.getSentenceNo()) ) {
+ double lambda=optimize_lambda(oldProbs);
+ for(unsigned int i=0; i<oldPairs.size(); ++i) {
+ if( oldProbs[i]<1e-5 )
+ (*realCount)[oldPairs[i].getSentenceNo()-1]=1.0;
+ else
+ (*realCount)[oldPairs[i].getSentenceNo()-1]=lambda*oldProbs[i]/(1-exp(-lambda*oldProbs[i]));
}
- setprob_mutex->unlock();
+ oldPairs.clear();
+ oldProbs.clear();
+ }
+ oldPairs.push_back(s);
+ oldProbs.push_back(ed);
}
+ setprob_mutex->unlock();
+ }
}
/* ------------- End of Method Definition of Class sentenceHandler ----------*/
diff --git a/mgizapp/src/getSentence.h b/mgizapp/src/getSentence.h
index 6a84abe..806d9c9 100644
--- a/mgizapp/src/getSentence.h
+++ b/mgizapp/src/getSentence.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -50,37 +50,48 @@ USA.
#include "Globals.h"
#include <boost/thread/mutex.hpp>
/*----------------------- Class Prototype Definition ------------------------*
- Class Name: sentenceHandleer
- Objective: This class is defined to handle training sentece pairs from the
- parallel corpus. Each pair has: a target sentece, called here French; a
+ Class Name: sentenceHandleer
+ Objective: This class is defined to handle training sentece pairs from the
+ parallel corpus. Each pair has: a target sentece, called here French; a
source sentece, called here English sentece; and an integer number denoting
- the number of times this pair occured in trining corpus. Both source and
- target senteces are represented as integer vector (variable size arrays),
+ the number of times this pair occured in trining corpus. Both source and
+ target senteces are represented as integer vector (variable size arrays),
each entry is a numeric value which is the token id for the particular token
in the sentece.
*---------------------------------------------------------------------------*/
-class sentPair{
- public:
+class sentPair
+{
+public:
int sentenceNo ;
float noOcc;
float realCount;
Vector<WordIndex> eSent ;
Vector<WordIndex> fSent;
- public:
- sentPair(){};
- void clear(){ eSent.clear(); fSent.clear(); noOcc=0; realCount=0; sentenceNo=0;};
- const Vector<WordIndex>&get_eSent()const
- { return eSent; }
- const Vector<WordIndex>&get_fSent()const
- { return fSent; }
- int getSentenceNo()const
- { return sentenceNo; }
- double getCount()const
- { return realCount; }
-
+public:
+ sentPair() {};
+ void clear() {
+ eSent.clear();
+ fSent.clear();
+ noOcc=0;
+ realCount=0;
+ sentenceNo=0;
+ };
+ const Vector<WordIndex>&get_eSent()const {
+ return eSent;
+ }
+ const Vector<WordIndex>&get_fSent()const {
+ return fSent;
+ }
+ int getSentenceNo()const {
+ return sentenceNo;
+ }
+ double getCount()const {
+ return realCount;
+ }
+
};
inline ostream&operator<<(ostream&of,const sentPair&s)
@@ -99,40 +110,51 @@ inline ostream&operator<<(ostream&of,const sentPair&s)
}
/*Thread-safe version of sentence handler*/
-class sentenceHandler{
+class sentenceHandler
+{
public:
- const char * inputFilename; // parallel corpus file name, similar for all
- // sentence pair objects
- ifstream *inputFile; // parallel corpus file handler
- Vector<sentPair> Buffer;
- int noSentInBuffer ;
- int currentSentence ;
- int position; /*Sentence position (will be returned)*/
- int totalPairs1 ;
- double totalPairs2;
- bool readflag ; // true if you reach the end of file
- bool allInMemory ;
- int pair_no ;
- Vector<double> *realCount;
-
- Vector<sentPair> oldPairs;
- Vector<double> oldProbs;
- sentenceHandler(){readsent_mutex=new boost::mutex();setprob_mutex=new boost::mutex();};
- sentenceHandler(const char* filename, vcbList* elist=0, vcbList* flist=0);
- sentenceHandler(const char* filename, vcbList* elist, vcbList* flist,set<WordIndex>& eapp, set<WordIndex>& fapp);
- ~sentenceHandler(){delete readsent_mutex; delete setprob_mutex;}
- void rewind();
- int getNextSentence(sentPair&, vcbList* = 0, vcbList* = 0); // will be defined in the definition file, this
- int getTotalNoPairs1()const {return totalPairs1;};
- double getTotalNoPairs2()const {return totalPairs2;};
- // method will read the next pair of sentence from memory buffer
- void setProbOfSentence(const sentPair&s,double d);
-private:
-
- boost::mutex* readsent_mutex;
- boost::mutex* setprob_mutex;
- bool readNextSentence(sentPair&); // will be defined in the definition file, this
+ const char * inputFilename; // parallel corpus file name, similar for all
+ // sentence pair objects
+ ifstream *inputFile; // parallel corpus file handler
+ Vector<sentPair> Buffer;
+ int noSentInBuffer ;
+ int currentSentence ;
+ int position; /*Sentence position (will be returned)*/
+ int totalPairs1 ;
+ double totalPairs2;
+ bool readflag ; // true if you reach the end of file
+ bool allInMemory ;
+ int pair_no ;
+ Vector<double> *realCount;
+
+ Vector<sentPair> oldPairs;
+ Vector<double> oldProbs;
+ sentenceHandler() {
+ readsent_mutex=new boost::mutex();
+ setprob_mutex=new boost::mutex();
+ };
+ sentenceHandler(const char* filename, vcbList* elist=0, vcbList* flist=0);
+ sentenceHandler(const char* filename, vcbList* elist, vcbList* flist,set<WordIndex>& eapp, set<WordIndex>& fapp);
+ ~sentenceHandler() {
+ delete readsent_mutex;
+ delete setprob_mutex;
+ }
+ void rewind();
+ int getNextSentence(sentPair&, vcbList* = 0, vcbList* = 0); // will be defined in the definition file, this
+ int getTotalNoPairs1()const {
+ return totalPairs1;
+ };
+ double getTotalNoPairs2()const {
+ return totalPairs2;
+ };
+ // method will read the next pair of sentence from memory buffer
+ void setProbOfSentence(const sentPair&s,double d);
+private:
+
+ boost::mutex* readsent_mutex;
+ boost::mutex* setprob_mutex;
+ bool readNextSentence(sentPair&); // will be defined in the definition file, this
};
#endif
-
+
diff --git a/mgizapp/src/hmm.cpp b/mgizapp/src/hmm.cpp
index 7ae635b..8e28287 100644
--- a/mgizapp/src/hmm.cpp
+++ b/mgizapp/src/hmm.cpp
@@ -9,21 +9,21 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
-#include "hmm.h"
+#include "hmm.h"
#include "Globals.h"
#include "utility.h"
-#include "HMMTables.h"
+#include "HMMTables.h"
#include "ForwardBackward.h"
#include "Parameter.h"
#include <iostream>
@@ -38,34 +38,34 @@ short UniformEntryExit=3;
short HMMTrainingSpecialFlags=0;
GLOBAL_PARAMETER2(int,ModelH_Dump_Freq,
- "HMM DUMP FREQUENCY","th",
- "dump frequency of HMM",
- PARLEV_OUTPUT,0);
+ "HMM DUMP FREQUENCY","th",
+ "dump frequency of HMM",
+ PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(short,CompareAlDeps,"emAlignmentDependencies",
- "lextrain: dependencies in the HMM alignment model. "
- " &1: sentence length; &2: previous class; &4: previous position; "
- " &8: French position; &16: French class"
- ,PARLEV_MODELS,2);
+ "lextrain: dependencies in the HMM alignment model. "
+ " &1: sentence length; &2: previous class; &4: previous position; "
+ " &8: French position; &16: French class"
+ ,PARLEV_MODELS,2);
GLOBAL_PARAMETER(double,GLOBALProbabilityForEmpty,
- "emProbForEmpty","f-b-trn: probability for empty word",
- PARLEV_MODELS,0.4);
+ "emProbForEmpty","f-b-trn: probability for empty word",
+ PARLEV_MODELS,0.4);
GLOBAL_PARAMETER(short,SmoothHMM,"emSmoothHMM",
- "f-b-trn: smooth HMM model &1: modified counts; &2:perform smoothing with -emAlSmooth",
- PARLEV_SPECIAL,2);
+ "f-b-trn: smooth HMM model &1: modified counts; &2:perform smoothing with -emAlSmooth",
+ PARLEV_SPECIAL,2);
GLOBAL_PARAMETER(double,HMMAlignmentModelSmoothFactor,"emAlSmooth",
- "f-b-trn: smoothing factor for HMM alignment model (can be ignored by -emSmoothHMM)",
- PARLEV_SMOOTH,0.2);
+ "f-b-trn: smoothing factor for HMM alignment model (can be ignored by -emSmoothHMM)",
+ PARLEV_SMOOTH,0.2);
/*template<class T>
void smooth_standard(T*a,T*b,double p)
{
int n=b-a;
- if( n==0 )
+ if( n==0 )
return;
double pp=p/n;
for(T*i=a;i!=b;++i)
@@ -74,170 +74,174 @@ void smooth_standard(T*a,T*b,double p)
hmm::hmm(model2&m2,WordClasses &e, WordClasses& f)
-: ewordclasses(e), fwordclasses(f),model2(m2),counts(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses),
-probs(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses)
-{
+ : ewordclasses(e), fwordclasses(f),model2(m2),counts(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses),
+ probs(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses)
+{
}
-void hmm::initialize_table_uniformly(sentenceHandler&){}
+void hmm::initialize_table_uniformly(sentenceHandler&) {}
-struct hmm_em_loop_t{
- hmm *m;
- int done;
- int valid;
- string alignfile;
- int it;
- bool dump_files;
- bool resume;
- pthread_t thread;
- hmm_em_loop_t():m(0),done(0),valid(0){};
+struct hmm_em_loop_t {
+ hmm *m;
+ int done;
+ int valid;
+ string alignfile;
+ int it;
+ bool dump_files;
+ bool resume;
+ pthread_t thread;
+ hmm_em_loop_t():m(0),done(0),valid(0) {};
};
-
-void* hmm_exe_emloop(void *arg){
- hmm_em_loop_t* em =(hmm_em_loop_t *) arg;
- em->m->em_thread(em->it,em->alignfile,em->dump_files,em->resume);
- em->done = -1;
- return arg;
-}
-
-void hmm::em_thread(int it,string alignfile,bool dump_files,bool resume){
- em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
+
+void* hmm_exe_emloop(void *arg)
+{
+ hmm_em_loop_t* em =(hmm_em_loop_t *) arg;
+ em->m->em_thread(em->it,em->alignfile,em->dump_files,em->resume);
+ em->done = -1;
+ return arg;
+}
+
+void hmm::em_thread(int it,string alignfile,bool dump_files,bool resume)
+{
+ em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
}
extern short NCPUS;
-int hmm::em_with_tricks(int noIterations,bool dumpCount,
- const char* dumpCountName, bool useString ,bool resume){
- double minErrors=1.0;int minIter=0;
- string modelName="Hmm",shortModelName="hmm";
- int dumpFreq=ModelH_Dump_Freq;
- time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
- bool dump_files = false ;
- ofstream of2 ;
- st = time(NULL) ;
+int hmm::em_with_tricks(int noIterations,bool dumpCount,
+ const char* dumpCountName, bool useString ,bool resume)
+{
+ double minErrors=1.0;
+ int minIter=0;
+ string modelName="Hmm",shortModelName="hmm";
+ int dumpFreq=ModelH_Dump_Freq;
+ time_t it_st, st, it_fn, fn;
+ string tfile, afile,afileh, number, alignfile, test_alignfile;
+ bool dump_files = false ;
+ ofstream of2 ;
+ st = time(NULL) ;
+ sHandler1.rewind();
+ cout << "\n==========================================================\n";
+ cout << modelName << " Training Started at: " << my_ctime(&st);
+ vector<hmm_em_loop_t> th;
+ th.resize(NCPUS);
+ for(int it=1; it <= noIterations ; it++) {
+ it_st = time(NULL) ;
+ cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
+ dump_files = (dumpFreq != 0) && ((it % dumpFreq) == 0 || it == noIterations) && !NODUMPS;
+
+ cerr << "Dump files " << dump_files << " it " << it << " noIterations " << noIterations << " dumpFreq " << dumpFreq <<endl;
+ //dump_files = true;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ afile = Prefix + ".a" + shortModelName + "." + number ;
+ // acfile = Prefix + ".ac" + shortModelName + "." + number ;
+ afileh = Prefix + ".h" + shortModelName + "." + number ;
+ alignfile = Prefix + ".A" + shortModelName + "." + number ;
+ test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
+ counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
+ aCountTable.clear();
+ initAL();
sHandler1.rewind();
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st);
- vector<hmm_em_loop_t> th;
- th.resize(NCPUS);
- for(int it=1; it <= noIterations ; it++){
- it_st = time(NULL) ;
- cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (dumpFreq != 0) && ((it % dumpFreq) == 0 || it == noIterations) && !NODUMPS;
-
- cerr << "Dump files " << dump_files << " it " << it << " noIterations " << noIterations << " dumpFreq " << dumpFreq <<endl;
- //dump_files = true;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- // acfile = Prefix + ".ac" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".A" + shortModelName + "." + number ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
- aCountTable.clear();
- initAL();
- sHandler1.rewind();
- int k;
- char node[2] ;
- node[1] = '\0';
- for (k=1 ; k< NCPUS ; k++){
- th[k].m = this;
- th[k].done = 0;
- th[k].valid = 0;
- th[k].it = it;
- th[k].resume = resume;
- th[k].alignfile = alignfile + ".part";
- node[0] = '0' + k;
- th[k].alignfile += node;
- th[k].dump_files = dump_files;
- th[k].valid = pthread_create(&(th[k].thread),NULL,hmm_exe_emloop,&(th[k]));
- if(th[k].valid){
- cerr << "Error starting thread " << k << endl;
- }
- }
- node[0] = '0';
- alignfile += ".part";
- alignfile += node;
- em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
- for (k=1;k<NCPUS;k++){
- pthread_join((th[k].thread),NULL);
- cerr << "Thread " << k << "done" << endl;
- }
- perp.record("HMM");
- trainViterbiPerp.record("HMM");
- errorReportAL(cout,"HMM");
-
- sHandler1.rewind();
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- if (testPerp && testHandler){
- testHandler->rewind();
- em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1 && (!resume),it);
- testHandler->rewind();
- }
- if (dump_files&&OutputInAachenFormat==1)
- tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
-
- if(dumpCount && it == noIterations){
- string realTableName = dumpCountName;
- realTableName += ".t.count";
- tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
- string realATableName = dumpCountName;
- realATableName += ".a.count";
- aCountTable.printRealTable(realATableName.c_str());
- string realHTableName = dumpCountName;
- realHTableName += ".h.count";
- string fnamealpha = realHTableName;
- string fnamebeta = realHTableName;
- fnamealpha += ".alpha";
- fnamebeta += ".beta";
- counts.writeJumps(realHTableName.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
-
- }
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- probs=counts;
- cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
- << " PERPLEXITY " << (*testPerp).perplexity()
- << '\n';
- cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
- << " PERPLEXITY " << testViterbiPerp->perplexity()
- << '\n';
- if (dump_files){
- if( OutputInAachenFormat==0)
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- // ofstream afilestream(afileh.c_str());
- string fnamealpha = afileh;
- string fnamebeta = afileh;
- fnamealpha += ".alpha";
- fnamebeta += ".beta";
- probs.writeJumps(afileh.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
+ int k;
+ char node[2] ;
+ node[1] = '\0';
+ for (k=1 ; k< NCPUS ; k++) {
+ th[k].m = this;
+ th[k].done = 0;
+ th[k].valid = 0;
+ th[k].it = it;
+ th[k].resume = resume;
+ th[k].alignfile = alignfile + ".part";
+ node[0] = '0' + k;
+ th[k].alignfile += node;
+ th[k].dump_files = dump_files;
+ th[k].valid = pthread_create(&(th[k].thread),NULL,hmm_exe_emloop,&(th[k]));
+ if(th[k].valid) {
+ cerr << "Error starting thread " << k << endl;
+ }
+ }
+ node[0] = '0';
+ alignfile += ".part";
+ alignfile += node;
+ em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
+ for (k=1; k<NCPUS; k++) {
+ pthread_join((th[k].thread),NULL);
+ cerr << "Thread " << k << "done" << endl;
+ }
+ perp.record("HMM");
+ trainViterbiPerp.record("HMM");
+ errorReportAL(cout,"HMM");
+
+ sHandler1.rewind();
+ if( errorsAL()<minErrors ) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ if (testPerp && testHandler) {
+ testHandler->rewind();
+ em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1 && (!resume),it);
+ testHandler->rewind();
+ }
+ if (dump_files&&OutputInAachenFormat==1)
+ tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
+
+ if(dumpCount && it == noIterations) {
+ string realTableName = dumpCountName;
+ realTableName += ".t.count";
+ tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
+ string realATableName = dumpCountName;
+ realATableName += ".a.count";
+ aCountTable.printRealTable(realATableName.c_str());
+ string realHTableName = dumpCountName;
+ realHTableName += ".h.count";
+ string fnamealpha = realHTableName;
+ string fnamebeta = realHTableName;
+ fnamealpha += ".alpha";
+ fnamebeta += ".beta";
+ counts.writeJumps(realHTableName.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
+
+ }
+ tTable.normalizeTable(Elist, Flist);
+ aCountTable.normalize(aTable);
+ probs=counts;
+ cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
+ << " PERPLEXITY " << perp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
+ << " PERPLEXITY " << (*testPerp).perplexity()
+ << '\n';
+ cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
+ << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
+ << " PERPLEXITY " << testViterbiPerp->perplexity()
+ << '\n';
+ if (dump_files) {
+ if( OutputInAachenFormat==0)
+ tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
+ // ofstream afilestream(afileh.c_str());
+ string fnamealpha = afileh;
+ string fnamebeta = afileh;
+ fnamealpha += ".alpha";
+ fnamebeta += ".beta";
+ probs.writeJumps(afileh.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
// aCountTable.printTable(acfile.c_str());
- aTable.printTable(afile.c_str());
- }
- it_fn = time(NULL) ;
- cout << "\n" << modelName << " Iteration: " << it<< " took: " <<
- difftime(it_fn, it_st) << " seconds\n";
- } // end of iterations
- fn = time(NULL) ;
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- //cout << "tTable contains " << tTable.getHash().bucket_count()
- // << " buckets and " << tTable.getHash().size() << " entries." ;
- cout << "==========================================================\n";
- return minIter;
+ aTable.printTable(afile.c_str());
+ }
+ it_fn = time(NULL) ;
+ cout << "\n" << modelName << " Iteration: " << it<< " took: " <<
+ difftime(it_fn, it_st) << " seconds\n";
+ } // end of iterations
+ fn = time(NULL) ;
+ cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
+ //cout << "tTable contains " << tTable.getHash().bucket_count()
+ // << " buckets and " << tTable.getHash().size() << " entries." ;
+ cout << "==========================================================\n";
+ return minIter;
}
/*template<class T>
@@ -258,627 +262,638 @@ T normalize_if_possible_with_increment(T*a,T*b,int increment)
return sum;
}*/
-void hmm::load_table(const char* aname){
- cout << "Hmm: loading a table not implemented.\n";
- abort();
- ifstream anamefile(aname);
- probs.readJumps(anamefile);
+void hmm::load_table(const char* aname)
+{
+ cout << "Hmm: loading a table not implemented.\n";
+ abort();
+ ifstream anamefile(aname);
+ probs.readJumps(anamefile);
}
HMMNetwork *hmm::makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const
{
- unsigned int i,j;
- unsigned int l = es.size() - 1;
- unsigned int m = fs.size() - 1;
- unsigned int I=2*l,J=m;
- int IJ=I*J;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
- HMMNetwork *net = new HMMNetwork(I,J);
- fill(net->alphainit.begin(),net->alphainit.end(),0.0);
- fill(net->betainit.begin(),net->betainit.end(),0.0);
- for(j=1;j<=m;j++){
- for(i=1;i<=l;i++){
- // cout << es[i] <<" " << fs[j] <<" " << tTable.getProb(es[i], fs[j]) << endl;
- net->n(i-1,j-1)=tTable.getProb(es[i], fs[j]) ;
- }
- double emptyContribution=0;
- emptyContribution=tTable.getProb(es[0],fs[j]) ;
- for(i=1;i<=l;i++)
- net->n(i+l-1,j-1)=emptyContribution;
- net->finalMultiply*=max(normalize_if_possible_with_increment(&net->n(0,j-1),&net->n(0,j-1)+IJ,J),double(1e-12));
+ unsigned int i,j;
+ unsigned int l = es.size() - 1;
+ unsigned int m = fs.size() - 1;
+ unsigned int I=2*l,J=m;
+ int IJ=I*J;
+ bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
+ bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
+ HMMNetwork *net = new HMMNetwork(I,J);
+ fill(net->alphainit.begin(),net->alphainit.end(),0.0);
+ fill(net->betainit.begin(),net->betainit.end(),0.0);
+ for(j=1; j<=m; j++) {
+ for(i=1; i<=l; i++) {
+ // cout << es[i] <<" " << fs[j] <<" " << tTable.getProb(es[i], fs[j]) << endl;
+ net->n(i-1,j-1)=tTable.getProb(es[i], fs[j]) ;
}
- if( DependencyOfJ )
- net->e.resize(m-1);
- else
- net->e.resize(J>1);
- for(j=0;j<net->e.size();j++){
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(j)+1)]);
- net->e[j].resize(I,I,0);
- for(unsigned int i1=0;i1<I;++i1) {
- Array<double> al(l);
- CLASSIFY2(i1,i1real);
- for(unsigned int i2=0;i2<l;i2++)
- al[i2]=probs.getAlProb(i1real,i2,l,m,ewordclasses.getClass(es[1+i1real]),frenchClass
- ,j+1);
- normalize_if_possible(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()));
- if( SmoothHMM&2 )
- smooth_standard(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()),HMMAlignmentModelSmoothFactor);
- for(unsigned int i2=0;i2<I;i2++) {
- CLASSIFY(i2,empty_i2,i2real);
- net->e[j](i1,i2) = al[i2real];
-
- if( empty_i2 )
- if(i1real!=i2real) {
- net->e[j](i1,i2)=0;
- } else{
- net->e[j](i1,i2)=doInit?al[0]:(probs.getProbabilityForEmpty()); // make first HMM iteration like IBM-1
- }
- }
- normalize_if_possible(&net->e[j](i1,0),&net->e[j](i1,0)+I);
- }
+ double emptyContribution=0;
+ emptyContribution=tTable.getProb(es[0],fs[j]) ;
+ for(i=1; i<=l; i++)
+ net->n(i+l-1,j-1)=emptyContribution;
+ net->finalMultiply*=max(normalize_if_possible_with_increment(&net->n(0,j-1),&net->n(0,j-1)+IJ,J),double(1e-12));
+ }
+ if( DependencyOfJ )
+ net->e.resize(m-1);
+ else
+ net->e.resize(J>1);
+ for(j=0; j<net->e.size(); j++) {
+ int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(j)+1)]);
+ net->e[j].resize(I,I,0);
+ for(unsigned int i1=0; i1<I; ++i1) {
+ Array<double> al(l);
+ CLASSIFY2(i1,i1real);
+ for(unsigned int i2=0; i2<l; i2++)
+ al[i2]=probs.getAlProb(i1real,i2,l,m,ewordclasses.getClass(es[1+i1real]),frenchClass
+ ,j+1);
+ normalize_if_possible(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()));
+ if( SmoothHMM&2 )
+ smooth_standard(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()),HMMAlignmentModelSmoothFactor);
+ for(unsigned int i2=0; i2<I; i2++) {
+ CLASSIFY(i2,empty_i2,i2real);
+ net->e[j](i1,i2) = al[i2real];
+
+ if( empty_i2 )
+ if(i1real!=i2real) {
+ net->e[j](i1,i2)=0;
+ } else {
+ net->e[j](i1,i2)=doInit?al[0]:(probs.getProbabilityForEmpty()); // make first HMM iteration like IBM-1
+ }
+ }
+ normalize_if_possible(&net->e[j](i1,0),&net->e[j](i1,0)+I);
}
- if( doInit ){
- for(unsigned int i=0;i<I;++i)
- {
- net->alphainit[i]=net->betainit[i]=(i<I/2)?1:(2.0/I);
- net->betainit[i]=1.0;
- }
- }else{
- if( DependencyOfPrevAJ==0 ){
- for(i=0;i<I;i++){
- CLASSIFY2(i,ireal);
- net->alphainit[i]=probs.getAlProb(-1,ireal,l,m,0,fwordclasses.getClass(fs[1+0]),0);
- }
- }else{
- if( UniformEntryExit&2 )probs.getBetaInit(I,net->betainit);
- if( UniformEntryExit&1 )probs.getAlphaInit(I,net->alphainit);
- }
+ }
+ if( doInit ) {
+ for(unsigned int i=0; i<I; ++i) {
+ net->alphainit[i]=net->betainit[i]=(i<I/2)?1:(2.0/I);
+ net->betainit[i]=1.0;
+ }
+ } else {
+ if( DependencyOfPrevAJ==0 ) {
+ for(i=0; i<I; i++) {
+ CLASSIFY2(i,ireal);
+ net->alphainit[i]=probs.getAlProb(-1,ireal,l,m,0,fwordclasses.getClass(fs[1+0]),0);
+ }
+ } else {
+ if( UniformEntryExit&2 )probs.getBetaInit(I,net->betainit);
+ if( UniformEntryExit&1 )probs.getAlphaInit(I,net->alphainit);
}
- massert( net->alphainit.size()==I );massert( net->betainit.size()==I );
- normalize_if_possible(const_cast<double*>(&(net->alphainit[0])),const_cast<double*>(&(net->alphainit[0])+net->alphainit.size()));
- normalize_if_possible(const_cast<double*>(&(net->betainit[0])),const_cast<double*>(&(net->betainit[0])+net->betainit.size()));
- transform(net->betainit.begin(),net->betainit.end(),net->betainit.begin(),bind1st(multiplies<double>(),2*l));
- return net;
+ }
+ massert( net->alphainit.size()==I );
+ massert( net->betainit.size()==I );
+ normalize_if_possible(const_cast<double*>(&(net->alphainit[0])),const_cast<double*>(&(net->alphainit[0])+net->alphainit.size()));
+ normalize_if_possible(const_cast<double*>(&(net->betainit[0])),const_cast<double*>(&(net->betainit[0])+net->betainit.size()));
+ transform(net->betainit.begin(),net->betainit.end(),net->betainit.begin(),bind1st(multiplies<double>(),2*l));
+ return net;
}
extern float MINCOUNTINCREASE;
-void hmm::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int
-){
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
-
- while(sHandler1.getNextSentence(sent)){
- const Vector<WordIndex>& es = sent.get_eSent();// #
- const Vector<WordIndex>& fs = sent.get_fSent();
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());// #
-
- unsigned int I=2*l,J=m;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
- HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
-
- Array<double> gamma;
- Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
- double trainProb;
- trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
-
- if( !test ){
+void hmm::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
+ bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
+ bool test,bool doInit,int
+ )
+{
+ WordIndex i, j, l, m ;
+ double cross_entropy;
+ int pair_no=0 ;
+ perp.clear();
+ viterbi_perp.clear();
+ ofstream of2;
+ // for each sentence pair in the corpus
+ if (dump_alignment||FEWDUMPS )
+ of2.open(alignfile);
+ sentPair sent ;
+
+ while(sHandler1.getNextSentence(sent)) {
+ const Vector<WordIndex>& es = sent.get_eSent();// #
+ const Vector<WordIndex>& fs = sent.get_fSent();
+ const float so = sent.getCount();
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ cross_entropy = log(1.0);
+ Vector<WordIndex> viterbi_alignment(fs.size());// #
+
+ unsigned int I=2*l,J=m;
+ bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
+ bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
+ HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
+
+ Array<double> gamma;
+ Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
+ double trainProb;
+ trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
+
+ if( !test ) {
#ifdef WIN32
- double *gp=const_cast<double*>(&(gamma[0]));
+ double *gp=const_cast<double*>(&(gamma[0]));
#else
- double *gp=conv<double>(gamma.begin());
+ double *gp=conv<double>(gamma.begin());
#endif
-
- for(unsigned int i2=0;i2<J;i2++)
- for(unsigned int i1=0;i1<I;++i1,++gp){
- if( *gp>MINCOUNTINCREASE ) {
- COUNT add= *gp*so;
- if( i1>=l ){
- tTable.incCount(es[0],fs[1+i2],add);
- aCountTable.addValue(0,i2+1,l,m,add);
- //aCountTable.getRef(0,i2+1,l,m)+=add;
- } else {
- tTable.incCount(es[1+i1],fs[1+i2],add);
- aCountTable.addValue(1+i1,1+i2,l,m,add);
- //aCountTable.getRef(1+i1,1+i2,l,m)+=add;
- }
- }
+
+ for(unsigned int i2=0; i2<J; i2++)
+ for(unsigned int i1=0; i1<I; ++i1,++gp) {
+ if( *gp>MINCOUNTINCREASE ) {
+ COUNT add= *gp*so;
+ if( i1>=l ) {
+ tTable.incCount(es[0],fs[1+i2],add);
+ aCountTable.addValue(0,i2+1,l,m,add);
+ //aCountTable.getRef(0,i2+1,l,m)+=add;
+ } else {
+ tTable.incCount(es[1+i1],fs[1+i2],add);
+ aCountTable.addValue(1+i1,1+i2,l,m,add);
+ //aCountTable.getRef(1+i1,1+i2,l,m)+=add;
}
- double p0c=0.0,np0c=0.0;
- for(unsigned int jj=0;jj<epsilon.size();jj++){
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
- double *ep=epsilon[jj].begin();
- if( ep ){
- //for(i=0;i<I;i++)
- // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
- // for(i=0;i<I*I;++i)
- // ep[i] *= I;
- //if( DependencyOfJ )
- // if( J-1 )
- // for(i=0;i<I*I;++i)
- // ep[i] /= (J-1);
- double mult=1.0;
- mult*=l;
- //if( DependencyOfJ && J-1)
- // mult/=(J-1);
- for(i=0;i<I;i++){
- for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++){
- CLASSIFY(i,i_empty,ireal);
- CLASSIFY2(i_bef,i_befreal);
- if( i_empty )
- p0c+=*ep * mult;
- else{
- int v = ewordclasses.getClass(es[1+i_befreal]);
- //cerr << v <<" " << es.size() << " "<< i_befreal << endl;
- counts.addAlCount(i_befreal,ireal,l,m,v,
- frenchClass ,jj+1,*ep * mult,0.0);
- np0c+=*ep * mult;
- }
- massert( &epsilon[jj](i,i_bef)== ep);
- }
- }
- }
+ }
+ }
+ double p0c=0.0,np0c=0.0;
+ for(unsigned int jj=0; jj<epsilon.size(); jj++) {
+ int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
+ double *ep=epsilon[jj].begin();
+ if( ep ) {
+ //for(i=0;i<I;i++)
+ // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
+ // for(i=0;i<I*I;++i)
+ // ep[i] *= I;
+ //if( DependencyOfJ )
+ // if( J-1 )
+ // for(i=0;i<I*I;++i)
+ // ep[i] /= (J-1);
+ double mult=1.0;
+ mult*=l;
+ //if( DependencyOfJ && J-1)
+ // mult/=(J-1);
+ for(i=0; i<I; i++) {
+ for(unsigned int i_bef=0; i_bef<I; i_bef++,ep++) {
+ CLASSIFY(i,i_empty,ireal);
+ CLASSIFY2(i_bef,i_befreal);
+ if( i_empty )
+ p0c+=*ep * mult;
+ else {
+ int v = ewordclasses.getClass(es[1+i_befreal]);
+ //cerr << v <<" " << es.size() << " "<< i_befreal << endl;
+ counts.addAlCount(i_befreal,ireal,l,m,v,
+ frenchClass ,jj+1,*ep * mult,0.0);
+ np0c+=*ep * mult;
+ }
+ massert( &epsilon[jj](i,i_bef)== ep);
}
+ }
+ }
+ }
#ifdef WIN32
- double *gp1=const_cast<double *>(&(gamma[0])),*gp2=const_cast<double*>(&(gamma[0])+gamma.size())-I;
+ double *gp1=const_cast<double *>(&(gamma[0])),*gp2=const_cast<double*>(&(gamma[0])+gamma.size())-I;
#else
- double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
+ double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
#endif
- hmmentry_type&ai0=counts.doGetAlphaInit(I);
- Array<double>&ai = ai0.first;
- hmmentry_type&bi0=counts.doGetBetaInit(I);
- Array<double>&bi = bi0.first;
- int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
+ hmmentry_type&ai0=counts.doGetAlphaInit(I);
+ Array<double>&ai = ai0.first;
+ hmmentry_type&bi0=counts.doGetBetaInit(I);
+ Array<double>&bi = bi0.first;
+ int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
#ifdef WIN32
- ai0.second->lock();
+ ai0.second->lock();
#else
- ai0.second.lock();
+ ai0.second.lock();
#endif
- for(i=0;i<I;i++,gp1++){
- CLASSIFY(i,i_empty,ireal);
- ai[i]+= *gp1;
- //bi[i]+= *gp2;
- if( DependencyOfPrevAJ==0 ){
- if( i_empty )
- p0c+=*gp1;
- else{
- counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
- np0c+=*gp1;
- }
- }
- }
+ for(i=0; i<I; i++,gp1++) {
+ CLASSIFY(i,i_empty,ireal);
+ ai[i]+= *gp1;
+ //bi[i]+= *gp2;
+ if( DependencyOfPrevAJ==0 ) {
+ if( i_empty )
+ p0c+=*gp1;
+ else {
+ counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
+ np0c+=*gp1;
+ }
+ }
+ }
#ifdef WIN32
- ai0.second->unlock();
- bi0.second->lock();
+ ai0.second->unlock();
+ bi0.second->lock();
#else
- ai0.second.unlock();
- bi0.second.lock();
+ ai0.second.unlock();
+ bi0.second.lock();
#endif
- for(i=0;i<I;i++,gp2++){
- CLASSIFY(i,i_empty,ireal);
- bi[i]+= *gp2;
- }
-#ifdef WIN32
- bi0.second->unlock();
-#else
- bi0.second.unlock();
-#endif
-
- if( Verbose )
- cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
- }
+ for(i=0; i<I; i++,gp2++) {
+ CLASSIFY(i,i_empty,ireal);
+ bi[i]+= *gp2;
+ }
+#ifdef WIN32
+ bi0.second->unlock();
+#else
+ bi0.second.unlock();
+#endif
+
+ if( Verbose )
+ cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
+
+ cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
+ Array<int>vit;
+ double viterbi_score=1.0;
+ if( (HMMTrainingSpecialFlags&1) )
+ HMMViterbi(*net,gamma,vit);
+ else
+ viterbi_score=HMMRealViterbi(*net,vit);
+ for(j=1; j<=m; j++) {
+ viterbi_alignment[j]=vit[j-1]+1;
+ if( viterbi_alignment[j]>l)
+ viterbi_alignment[j]=0;
+ }
+ sHandler1.setProbOfSentence(sent,cross_entropy);
+ perp.addFactor(cross_entropy, so, l, m,1);
+ viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
+ if( Verbose )
+ cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
+
+ delete net;
+ net=0;
+ if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
+ addAL(viterbi_alignment,sent.getSentenceNo(),l);
+
+ pair_no++;
+ } /* of while */
- cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
- Array<int>vit;
- double viterbi_score=1.0;
- if( (HMMTrainingSpecialFlags&1) )
- HMMViterbi(*net,gamma,vit);
- else
- viterbi_score=HMMRealViterbi(*net,vit);
- for(j=1;j<=m;j++){
- viterbi_alignment[j]=vit[j-1]+1;
- if( viterbi_alignment[j]>l)
- viterbi_alignment[j]=0;
- }
- sHandler1.setProbOfSentence(sent,cross_entropy);
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
- if( Verbose )
- cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
-
- delete net;net=0;
- if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
- addAL(viterbi_alignment,sent.getSentenceNo(),l);
-
- pair_no++;
- } /* of while */
-
}
-void hmm::clearCountTable(){counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);}
+void hmm::clearCountTable()
+{
+ counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
+}
#if 0
-CTTableDiff<COUNT,PROB>* hmm::em_loop_1(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int
-){
- CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
- //diff->incCount(1,1,0);
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
- sHandler1.rewind();
- int nnn = 0;
- while(sHandler1.getNextSentence(sent)){
- nnn ++;
- cout << nnn << endl;
- cout << 1 << endl;
- const Vector<WordIndex>& es = sent.get_eSent();
- const Vector<WordIndex>& fs = sent.get_fSent();
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
-
- unsigned int I=2*l,J=m;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
- cout << 2 << endl;
- HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
- Array<double> gamma;
- Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
- double trainProb;
- cout << 2.5 << endl;
- trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
- cout << 3 << endl;
- if( !test ){
- double *gp=conv<double>(gamma.begin());
- cout << 4 << endl;
- for(unsigned int i2=0;i2<J;i2++)for(unsigned int i1=0;i1<I;++i1,++gp){
- if( *gp>MINCOUNTINCREASE ) {
- COUNT add= *gp*so;
- if( i1>=l ){
- diff->incCount(es[0],fs[1+i2],add);
- //tTable.incCount(es[0],fs[1+i2],add);
- aCountTable.getRef(0,i2+1,l,m)+=add;
- } else {
- diff->incCount(es[1+i1],fs[1+i2],add);
- //tTable.incCount(es[1+i1],fs[1+i2],add);
- aCountTable.getRef(1+i1,1+i2,l,m)+=add;
- }
- }
- }
- cout << 5 << endl;
- double p0c=0.0,np0c=0.0;
- for(unsigned int jj=0;jj<epsilon.size();jj++){
- if (nnn==7779) cout << 1 << endl;
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
- if (nnn==7779) cout << 2 << endl;
- double *ep=epsilon[jj].begin();
- if (nnn==7779) cout << 3 << endl;
- if( ep ){
- //for(i=0;i<I;i++)
- // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
- // for(i=0;i<I*I;++i)
- // ep[i] *= I;
- //if( DependencyOfJ )
- // if( J-1 )
- // for(i=0;i<I*I;++i)
- // ep[i] /= (J-1);
- double mult=1.0;
- mult*=l;
- //if( DependencyOfJ && J-1)
- // mult/=(J-1);
- if (nnn==7779) cout << 4 << ":" << I << endl;
- for(i=0;i<I;i++){
- if (nnn==7779) cout << "i:" << i << endl;
- for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++){
- if (nnn==7779) cout << " CL 1" << endl;
- CLASSIFY(i,i_empty,ireal);
- if (nnn==7779) cout << " CL 2 : " << i_bef << " " << (size_t)ep << endl;
- CLASSIFY2(i_bef,i_befreal);
- if((i+1)*(i_bef+1)>epsilon[jj].getLen1()*epsilon[jj].getLen2()){
- continue;
- }
- if( i_empty )
- p0c+=epsilon[jj](i,i_bef)*mult;// p0c+=*ep * mult;
- else{
- if (nnn==7779) cout << "ELSE" << endl;
- if (nnn==7779){
- cout << i_befreal<<" " <<ireal<<" " << l<<" " << m<<" "<< jj<<" "<<epsilon.size()<< " " << epsilon[jj].getLen1() <<" " << epsilon[jj].getLen2()<< endl;
- np0c+=epsilon[jj](i,i_bef)*mult;
- cout <<"..."<<endl;
- cout <<"......"<<ewordclasses.getClass(es[1+i_befreal]) << endl;
- cout <<"......"<<endl;
- counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
- frenchClass ,jj+1,0,0.0);
- np0c+=epsilon[jj](i,i_bef)*mult;
- }
- else{
- counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
- frenchClass ,jj+1,epsilon[jj](i,i_bef)*mult,0.0);
- np0c+=epsilon[jj](i,i_bef)*mult;
- }
- }
- if (nnn==7779) cout << "FI" << endl;
- massert( &epsilon[jj](i,i_bef)== ep);
- }
- }
- if (nnn==7779) cout << 5 << endl;
- }
+CTTableDiff<COUNT,PROB>* hmm::em_loop_1(Perplexity& perp, sentenceHandler& sHandler1,
+ bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
+ bool test,bool doInit,int
+ )
+{
+ CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
+ //diff->incCount(1,1,0);
+ WordIndex i, j, l, m ;
+ double cross_entropy;
+ int pair_no=0 ;
+ perp.clear();
+ viterbi_perp.clear();
+ ofstream of2;
+ // for each sentence pair in the corpus
+ if (dump_alignment||FEWDUMPS )
+ of2.open(alignfile);
+ sentPair sent ;
+ sHandler1.rewind();
+ int nnn = 0;
+ while(sHandler1.getNextSentence(sent)) {
+ nnn ++;
+ cout << nnn << endl;
+ cout << 1 << endl;
+ const Vector<WordIndex>& es = sent.get_eSent();
+ const Vector<WordIndex>& fs = sent.get_fSent();
+ const float so = sent.getCount();
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ cross_entropy = log(1.0);
+ Vector<WordIndex> viterbi_alignment(fs.size());
+
+ unsigned int I=2*l,J=m;
+ bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
+ bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
+ cout << 2 << endl;
+ HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
+ Array<double> gamma;
+ Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
+ double trainProb;
+ cout << 2.5 << endl;
+ trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
+ cout << 3 << endl;
+ if( !test ) {
+ double *gp=conv<double>(gamma.begin());
+ cout << 4 << endl;
+ for(unsigned int i2=0; i2<J; i2++)for(unsigned int i1=0; i1<I; ++i1,++gp) {
+ if( *gp>MINCOUNTINCREASE ) {
+ COUNT add= *gp*so;
+ if( i1>=l ) {
+ diff->incCount(es[0],fs[1+i2],add);
+ //tTable.incCount(es[0],fs[1+i2],add);
+ aCountTable.getRef(0,i2+1,l,m)+=add;
+ } else {
+ diff->incCount(es[1+i1],fs[1+i2],add);
+ //tTable.incCount(es[1+i1],fs[1+i2],add);
+ aCountTable.getRef(1+i1,1+i2,l,m)+=add;
}
- // cout << 6 << endl;
- double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
- Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
- Array<double>&bi=counts.doGetBetaInit(I);
- int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
- for(i=0;i<I;i++,gp1++,gp2++){
- CLASSIFY(i,i_empty,ireal);
- ai[i]+= *gp1;
- bi[i]+= *gp2;
- if( DependencyOfPrevAJ==0 ){
- if( i_empty )
- p0c+=*gp1;
- else{
- counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
- np0c+=*gp1;
- }
+ }
+ }
+ cout << 5 << endl;
+ double p0c=0.0,np0c=0.0;
+ for(unsigned int jj=0; jj<epsilon.size(); jj++) {
+ if (nnn==7779) cout << 1 << endl;
+ int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
+ if (nnn==7779) cout << 2 << endl;
+ double *ep=epsilon[jj].begin();
+ if (nnn==7779) cout << 3 << endl;
+ if( ep ) {
+ //for(i=0;i<I;i++)
+ // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
+ // for(i=0;i<I*I;++i)
+ // ep[i] *= I;
+ //if( DependencyOfJ )
+ // if( J-1 )
+ // for(i=0;i<I*I;++i)
+ // ep[i] /= (J-1);
+ double mult=1.0;
+ mult*=l;
+ //if( DependencyOfJ && J-1)
+ // mult/=(J-1);
+ if (nnn==7779) cout << 4 << ":" << I << endl;
+ for(i=0; i<I; i++) {
+ if (nnn==7779) cout << "i:" << i << endl;
+ for(unsigned int i_bef=0; i_bef<I; i_bef++,ep++) {
+ if (nnn==7779) cout << " CL 1" << endl;
+ CLASSIFY(i,i_empty,ireal);
+ if (nnn==7779) cout << " CL 2 : " << i_bef << " " << (size_t)ep << endl;
+ CLASSIFY2(i_bef,i_befreal);
+ if((i+1)*(i_bef+1)>epsilon[jj].getLen1()*epsilon[jj].getLen2()) {
+ continue;
+ }
+ if( i_empty )
+ p0c+=epsilon[jj](i,i_bef)*mult;// p0c+=*ep * mult;
+ else {
+ if (nnn==7779) cout << "ELSE" << endl;
+ if (nnn==7779) {
+ cout << i_befreal<<" " <<ireal<<" " << l<<" " << m<<" "<< jj<<" "<<epsilon.size()<< " " << epsilon[jj].getLen1() <<" " << epsilon[jj].getLen2()<< endl;
+ np0c+=epsilon[jj](i,i_bef)*mult;
+ cout <<"..."<<endl;
+ cout <<"......"<<ewordclasses.getClass(es[1+i_befreal]) << endl;
+ cout <<"......"<<endl;
+ counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
+ frenchClass ,jj+1,0,0.0);
+ np0c+=epsilon[jj](i,i_bef)*mult;
+ } else {
+ counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
+ frenchClass ,jj+1,epsilon[jj](i,i_bef)*mult,0.0);
+ np0c+=epsilon[jj](i,i_bef)*mult;
}
+ }
+ if (nnn==7779) cout << "FI" << endl;
+ massert( &epsilon[jj](i,i_bef)== ep);
}
- // cout << 7 << endl;
- if( Verbose )
- cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
+ if (nnn==7779) cout << 5 << endl;
}
- //cout << 8 << endl;
- cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
- Array<int>vit;
- double viterbi_score=1.0;
- //cout << 9 << endl;
- if( (HMMTrainingSpecialFlags&1) )
- HMMViterbi(*net,gamma,vit);
- else
- viterbi_score=HMMRealViterbi(*net,vit);
- //cout << 10 << endl;
- for(j=1;j<=m;j++){
- viterbi_alignment[j]=vit[j-1]+1;
- if( viterbi_alignment[j]>l)
- viterbi_alignment[j]=0;
+ }
+ // cout << 6 << endl;
+ double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
+ Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
+ Array<double>&bi=counts.doGetBetaInit(I);
+ int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
+ for(i=0; i<I; i++,gp1++,gp2++) {
+ CLASSIFY(i,i_empty,ireal);
+ ai[i]+= *gp1;
+ bi[i]+= *gp2;
+ if( DependencyOfPrevAJ==0 ) {
+ if( i_empty )
+ p0c+=*gp1;
+ else {
+ counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
+ np0c+=*gp1;
+ }
}
- //cout << 11 << endl;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cout << 12 << endl;
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
- if( Verbose )
- cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
- delete net;net=0;
- //cout << 13 << endl;
- if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
- //cout << 14 << endl;
- addAL(viterbi_alignment,sent.getSentenceNo(),l);
- pair_no++;
- } /* of while */
- sHandler1.rewind();
- perp.record("HMM");
- viterbi_perp.record("HMM");
- errorReportAL(cout,"HMM");
- return diff;
+ }
+ // cout << 7 << endl;
+ if( Verbose )
+ cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
+ //cout << 8 << endl;
+ cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
+ Array<int>vit;
+ double viterbi_score=1.0;
+ //cout << 9 << endl;
+ if( (HMMTrainingSpecialFlags&1) )
+ HMMViterbi(*net,gamma,vit);
+ else
+ viterbi_score=HMMRealViterbi(*net,vit);
+ //cout << 10 << endl;
+ for(j=1; j<=m; j++) {
+ viterbi_alignment[j]=vit[j-1]+1;
+ if( viterbi_alignment[j]>l)
+ viterbi_alignment[j]=0;
+ }
+ //cout << 11 << endl;
+ sHandler1.setProbOfSentence(sent,cross_entropy);
+ //cout << 12 << endl;
+ perp.addFactor(cross_entropy, so, l, m,1);
+ viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
+ if( Verbose )
+ cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
+ delete net;
+ net=0;
+ //cout << 13 << endl;
+ if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
+ //cout << 14 << endl;
+ addAL(viterbi_alignment,sent.getSentenceNo(),l);
+ pair_no++;
+ } /* of while */
+ sHandler1.rewind();
+ perp.record("HMM");
+ viterbi_perp.record("HMM");
+ errorReportAL(cout,"HMM");
+ return diff;
}
#endif
Mutex mu;
#if 0
-void hmm::em_loop_2(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int part
-){
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
- //sHandler1.rewind();
- int nnn = 0;
- while(sHandler1.getNextSentence(sent)){
- //nnn ++;
- //cout << nnn << endl;
- //cout << 1 << endl;
- const Vector<WordIndex>& es = sent.get_eSent();
- const Vector<WordIndex>& fs = sent.get_fSent();
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
-
- unsigned int I=2*l,J=m;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
-
- HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
- Array<double> gamma;
- Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
- double trainProb;
- trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
- if( !test ){
- double *gp=conv<double>(gamma.begin());
- for(unsigned int i2=0;i2<J;i2++)for(unsigned int i1=0;i1<I;++i1,++gp){
- if( *gp>MINCOUNTINCREASE ) {
- COUNT add= *gp*so;
- if( i1>=l ){
- //diff->incCount(es[0],fs[1+i2],add);
- tTable.incCount(es[0],fs[1+i2],add);
- aCountTable.getRef(0,i2+1,l,m)+=add;
- } else {
- //diff->incCount(es[1+i1],fs[1+i2],add);
- tTable.incCount(es[1+i1],fs[1+i2],add);
- aCountTable.getRef(1+i1,1+i2,l,m)+=add;
- }
- }
- }
- double p0c=0.0,np0c=0.0;
- for(unsigned int jj=0;jj<epsilon.size();jj++){
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
- double *ep=epsilon[jj].begin();
- if( ep ){
- double mult=1.0;
- mult*=l;
- //if( DependencyOfJ && J-1)
- // mult/=(J-1);
- for(i=0;i<I;i++){
- for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++){
- CLASSIFY(i,i_empty,ireal);
- CLASSIFY2(i_bef,i_befreal);
- if( i_empty ){
- p0c+=*ep * mult;
- }else{
- //mu.lock();
- //cout<<"\rP "<<part<<" ";
- //cout<<epsilon.size()<<" "<<jj<<" ";
- //cout<<epsilon[jj].h1<<" " << epsilon[jj].h2<<" ";
- //cout<<i<<" "<<i_bef<<" ";
- //cout<<I<<" "<<J<<" ";
-
- cout.flush();
- counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
- frenchClass ,jj+1,*ep * mult,0.0);
- np0c+=*ep * mult;
- //mu.unlock();
- }
- massert( &epsilon[jj](i,i_bef)== ep);
- }
- }
- }
+void hmm::em_loop_2(Perplexity& perp, sentenceHandler& sHandler1,
+ bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
+ bool test,bool doInit,int part
+ )
+{
+ WordIndex i, j, l, m ;
+ double cross_entropy;
+ int pair_no=0 ;
+ perp.clear();
+ viterbi_perp.clear();
+ ofstream of2;
+ // for each sentence pair in the corpus
+ if (dump_alignment||FEWDUMPS )
+ of2.open(alignfile);
+ sentPair sent ;
+ //sHandler1.rewind();
+ int nnn = 0;
+ while(sHandler1.getNextSentence(sent)) {
+ //nnn ++;
+ //cout << nnn << endl;
+ //cout << 1 << endl;
+ const Vector<WordIndex>& es = sent.get_eSent();
+ const Vector<WordIndex>& fs = sent.get_fSent();
+ const float so = sent.getCount();
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ cross_entropy = log(1.0);
+ Vector<WordIndex> viterbi_alignment(fs.size());
+
+ unsigned int I=2*l,J=m;
+ bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
+ bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
+
+ HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
+ Array<double> gamma;
+ Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
+ double trainProb;
+ trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
+ if( !test ) {
+ double *gp=conv<double>(gamma.begin());
+ for(unsigned int i2=0; i2<J; i2++)for(unsigned int i1=0; i1<I; ++i1,++gp) {
+ if( *gp>MINCOUNTINCREASE ) {
+ COUNT add= *gp*so;
+ if( i1>=l ) {
+ //diff->incCount(es[0],fs[1+i2],add);
+ tTable.incCount(es[0],fs[1+i2],add);
+ aCountTable.getRef(0,i2+1,l,m)+=add;
+ } else {
+ //diff->incCount(es[1+i1],fs[1+i2],add);
+ tTable.incCount(es[1+i1],fs[1+i2],add);
+ aCountTable.getRef(1+i1,1+i2,l,m)+=add;
}
- double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
- Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
- Array<double>&bi=counts.doGetBetaInit(I);
- int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
- for(i=0;i<I;i++,gp1++,gp2++){
- CLASSIFY(i,i_empty,ireal);
- ai[i]+= *gp1;
- bi[i]+= *gp2;
- if( DependencyOfPrevAJ==0 ){
- if( i_empty )
- p0c+=*gp1;
- else{
- counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
- np0c+=*gp1;
- }
- }
+ }
+ }
+ double p0c=0.0,np0c=0.0;
+ for(unsigned int jj=0; jj<epsilon.size(); jj++) {
+ int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
+ double *ep=epsilon[jj].begin();
+ if( ep ) {
+ double mult=1.0;
+ mult*=l;
+ //if( DependencyOfJ && J-1)
+ // mult/=(J-1);
+ for(i=0; i<I; i++) {
+ for(unsigned int i_bef=0; i_bef<I; i_bef++,ep++) {
+ CLASSIFY(i,i_empty,ireal);
+ CLASSIFY2(i_bef,i_befreal);
+ if( i_empty ) {
+ p0c+=*ep * mult;
+ } else {
+ //mu.lock();
+ //cout<<"\rP "<<part<<" ";
+ //cout<<epsilon.size()<<" "<<jj<<" ";
+ //cout<<epsilon[jj].h1<<" " << epsilon[jj].h2<<" ";
+ //cout<<i<<" "<<i_bef<<" ";
+ //cout<<I<<" "<<J<<" ";
+
+ cout.flush();
+ counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
+ frenchClass ,jj+1,*ep * mult,0.0);
+ np0c+=*ep * mult;
+ //mu.unlock();
+ }
+ massert( &epsilon[jj](i,i_bef)== ep);
}
- // cout << 7 << endl;
- if( Verbose )
- cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
}
- //cout << 8 << endl;
- cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
- Array<int>vit;
- double viterbi_score=1.0;
- //cout << 9 << endl;
- if( (HMMTrainingSpecialFlags&1) )
- HMMViterbi(*net,gamma,vit);
- else
- viterbi_score=HMMRealViterbi(*net,vit);
- //cout << 10 << endl;
- for(j=1;j<=m;j++){
- viterbi_alignment[j]=vit[j-1]+1;
- if( viterbi_alignment[j]>l)
- viterbi_alignment[j]=0;
+ }
+ double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
+ Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
+ Array<double>&bi=counts.doGetBetaInit(I);
+ int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
+ for(i=0; i<I; i++,gp1++,gp2++) {
+ CLASSIFY(i,i_empty,ireal);
+ ai[i]+= *gp1;
+ bi[i]+= *gp2;
+ if( DependencyOfPrevAJ==0 ) {
+ if( i_empty )
+ p0c+=*gp1;
+ else {
+ counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
+ np0c+=*gp1;
+ }
}
- //cout << 11 << endl;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cout << 12 << endl;
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
- if( Verbose )
- cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
- delete net;net=0;
- //cout << 13 << endl;
- if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
- //cout << 14 << endl;
- addAL(viterbi_alignment,sent.getSentenceNo(),l);
- pair_no++;
- } /* of while */
-
-
- return ;
+ }
+ // cout << 7 << endl;
+ if( Verbose )
+ cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
+ //cout << 8 << endl;
+ cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
+ Array<int>vit;
+ double viterbi_score=1.0;
+ //cout << 9 << endl;
+ if( (HMMTrainingSpecialFlags&1) )
+ HMMViterbi(*net,gamma,vit);
+ else
+ viterbi_score=HMMRealViterbi(*net,vit);
+ //cout << 10 << endl;
+ for(j=1; j<=m; j++) {
+ viterbi_alignment[j]=vit[j-1]+1;
+ if( viterbi_alignment[j]>l)
+ viterbi_alignment[j]=0;
+ }
+ //cout << 11 << endl;
+ sHandler1.setProbOfSentence(sent,cross_entropy);
+ //cout << 12 << endl;
+ perp.addFactor(cross_entropy, so, l, m,1);
+ viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
+ if( Verbose )
+ cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
+ delete net;
+ net=0;
+ //cout << 13 << endl;
+ if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
+ //cout << 14 << endl;
+ addAL(viterbi_alignment,sent.getSentenceNo(),l);
+ pair_no++;
+ } /* of while */
+
+
+ return ;
}
-CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it){
- double minErrors=1.0;int minIter=0;
- string modelName="Hmm",shortModelName="hmm";
- int dumpFreq=ModelH_Dump_Freq;
- time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
- int pair_no = 0;
- bool dump_files = false ;
- ofstream of2 ;
- st = time(NULL) ;
- sHandler1.rewind();
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st);
- pair_no = 0;
-
- cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".AH" ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
- aCountTable.clear();
- initAL();
- CTTableDiff<COUNT,PROB>* diff =em_loop_1(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,it);
-
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- // if (testPerp && testHandler)
-// em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1,it);
+CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it)
+{
+ double minErrors=1.0;
+ int minIter=0;
+ string modelName="Hmm",shortModelName="hmm";
+ int dumpFreq=ModelH_Dump_Freq;
+ time_t it_st, st, it_fn, fn;
+ string tfile, afile,afileh, number, alignfile, test_alignfile;
+ int pair_no = 0;
+ bool dump_files = false ;
+ ofstream of2 ;
+ st = time(NULL) ;
+ sHandler1.rewind();
+ cout << "\n==========================================================\n";
+ cout << modelName << " Training Started at: " << my_ctime(&st);
+ pair_no = 0;
+
+ cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
+ dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ afile = Prefix + ".a" + shortModelName + "." + number ;
+ afileh = Prefix + ".h" + shortModelName + "." + number ;
+ alignfile = Prefix + ".AH" ;
+ test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
+ counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
+ aCountTable.clear();
+ initAL();
+ CTTableDiff<COUNT,PROB>* diff =em_loop_1(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,it);
+
+ if( errorsAL()<minErrors ) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ // if (testPerp && testHandler)
+// em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1,it);
// if (dump_files&&OutputInAachenFormat==1)
// tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
// tTable.normalizeTable(Elist, Flist);
@@ -888,231 +903,235 @@ CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it){
// << " PERPLEXITY " << perp.perplexity() << '\n';
// if (testPerp && testHandler)
// cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
-// << " PERPLEXITY " << (*testPerp).perplexity()
+// << " PERPLEXITY " << (*testPerp).perplexity()
// << '\n';
// cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
// << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
// if (testPerp && testHandler)
// cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
-// << " PERPLEXITY " << testViterbiPerp->perplexity()
+// << " PERPLEXITY " << testViterbiPerp->perplexity()
// << '\n';
// if (dump_files){
// if( OutputInAachenFormat==0)
/// tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- // ofstream afilestream(afileh.c_str());
- // probs.writeJumps(afilestream);
- // aCountTable.printTable(afile.c_str());
-
- fn = time(NULL) ;
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- //cout << "tTable contains " << tTable.getHash().bucket_count()
- // << " buckets and " << tTable.getHash().size() << " entries." ;
- cout << "==========================================================\n";
- return diff;
+// ofstream afilestream(afileh.c_str());
+// probs.writeJumps(afilestream);
+// aCountTable.printTable(afile.c_str());
+
+ fn = time(NULL) ;
+ cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
+ //cout << "tTable contains " << tTable.getHash().bucket_count()
+ // << " buckets and " << tTable.getHash().size() << " entries." ;
+ cout << "==========================================================\n";
+ return diff;
}
-void hmm::em_one_step_2(int it,int part){
- double minErrors=1.0;int minIter=0;
- string modelName="Hmm",shortModelName="hmm";
- int dumpFreq=ModelH_Dump_Freq;
- time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
- int pair_no = 0;
- bool dump_files = false ;
- ofstream of2 ;
-
- pair_no = 0;
+void hmm::em_one_step_2(int it,int part)
+{
+ double minErrors=1.0;
+ int minIter=0;
+ string modelName="Hmm",shortModelName="hmm";
+ int dumpFreq=ModelH_Dump_Freq;
+ time_t it_st, st, it_fn, fn;
+ string tfile, afile,afileh, number, alignfile, test_alignfile;
+ int pair_no = 0;
+ bool dump_files = false ;
+ ofstream of2 ;
+ pair_no = 0;
- dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".Ahmm." ;
- char v[2];
- v[1] = 0;
- v[0] = '0' + it;
- alignfile += v;
- alignfile += ".part";
- v[0] = '0' + part;
- alignfile += v;
-
- counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
- aCountTable.clear();
- initAL();
- em_loop_2(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,part);
-
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- return ;
+
+ dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ afile = Prefix + ".a" + shortModelName + "." + number ;
+ afileh = Prefix + ".h" + shortModelName + "." + number ;
+ alignfile = Prefix + ".Ahmm." ;
+ char v[2];
+ v[1] = 0;
+ v[0] = '0' + it;
+ alignfile += v;
+ alignfile += ".part";
+ v[0] = '0' + part;
+ alignfile += v;
+
+ counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
+ aCountTable.clear();
+ initAL();
+ em_loop_2(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,part);
+
+ if( errorsAL()<minErrors ) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ return ;
}
-struct hmm_align_struct{
- hmm *h;
- int part;
- int iter;
- int valid;
- pthread_t thread;
- int done;
+struct hmm_align_struct {
+ hmm *h;
+ int part;
+ int iter;
+ int valid;
+ pthread_t thread;
+ int done;
};
-void* em_thread(void *arg){
- hmm_align_struct * hm = (hmm_align_struct*) arg;
- hm->h->em_one_step_2(hm->iter,hm->part);
- hm->done = 1;
- return hm;
+void* em_thread(void *arg)
+{
+ hmm_align_struct * hm = (hmm_align_struct*) arg;
+ hm->h->em_one_step_2(hm->iter,hm->part);
+ hm->done = 1;
+ return hm;
}
-int multi_thread_em(int noIter, int noThread, hmm* base){
- // First, do one-step EM
- int i;
- int j;
- time_t it_st, st, it_fn, fn;
- fn = time(NULL);
- int dumpFreq=ModelH_Dump_Freq;
- bool dump_files = false ;
- string modelName = "HMM",shortModelName="hmm";
- string tfile, afile,acfile,afileh, number, alignfile, test_alignfile;
- vector<amodel<COUNT> > counts;
- vector<model2 *> m2;
- counts.resize(noThread);
- m2.resize(noThread);
- for(j=1;j<noThread;j++){
- m2[j] = new model2(*((model1*)base),base->aTable,counts[j]);
+int multi_thread_em(int noIter, int noThread, hmm* base)
+{
+ // First, do one-step EM
+ int i;
+ int j;
+ time_t it_st, st, it_fn, fn;
+ fn = time(NULL);
+ int dumpFreq=ModelH_Dump_Freq;
+ bool dump_files = false ;
+ string modelName = "HMM",shortModelName="hmm";
+ string tfile, afile,acfile,afileh, number, alignfile, test_alignfile;
+ vector<amodel<COUNT> > counts;
+ vector<model2 *> m2;
+ counts.resize(noThread);
+ m2.resize(noThread);
+ for(j=1; j<noThread; j++) {
+ m2[j] = new model2(*((model1*)base),base->aTable,counts[j]);
+ }
+ st = time(NULL);
+ cout << "\n==========================================================\n";
+ cout << modelName << " Training Started at: " << my_ctime(&st);
+
+ for(i=1; i<=noIter; i++) {
+ base->perp.clear();
+ base->trainViterbiPerp.clear();
+ if (base->testPerp && base->testHandler) {
+ base->testHandler->rewind();
+ base->testPerp->clear();
+ base->testViterbiPerp->clear();
}
- st = time(NULL);
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st);
-
- for(i=1;i<=noIter;i++){
- base->perp.clear();
- base->trainViterbiPerp.clear();
- if (base->testPerp && base->testHandler){
- base->testHandler->rewind();
- base->testPerp->clear();
- base->testViterbiPerp->clear();
- }
-
- it_st = time(NULL) ;
-
- cout << endl << "-----------\n" << modelName << ": Iteration " << i << '\n';
- dump_files = (dumpFreq != 0) && ((i % dumpFreq) == 0) && !NODUMPS;
- dump_files = true;
- string number = "";
- int n = i;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- acfile = Prefix + ".ac" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
-
- alignfile = Prefix + ".A" + shortModelName + "." + number ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- base->initAL();
- // except the current thread
- vector<hmm_align_struct> args;
- base->sHandler1.rewind();
- args.resize(noThread);
- for(j=1;j<noThread;j++){
- args[j].iter = i;
- args[j].part = j;
- args[j].done = 0;
- counts[j].clear();
- args[j].h = new hmm(*m2[j],base->ewordclasses,base->fwordclasses);
- args[j].h->probs = base->probs;
- args[j].valid = pthread_create(&(args[j].thread),NULL,em_thread,&(args[j]));
- if(args[j].valid){
- cerr << "Error starting thread " << j << endl;
- }
- }
- base->em_one_step_2(i,0);
- //ofstream afilestream(afileh.c_str());
- while(1){
- bool done = true;
- for (j=1;j<noThread;j++){
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if(args[j].done==1){
- args[j].done = 2;
- base->aCountTable.merge(args[j].h->aCountTable);
- //afilestream << "BEFORE MERGE"<<endl;
- //base->counts.writeJumps(afilestream);
- //afilestream << "MERGING"<<endl;
- //args[j].h->counts.writeJumps(afilestream);
- //afilestream << "MERGED"<<endl;
- base->counts.merge(args[j].h->counts);
- //base->counts.writeJumps(afilestream);
- delete args[j].h;
- args[j].h = 0;
- }else if(args[j].done==2){
- // Nothing
- }else if(args[j].done==0){
- done = false;
- }
- }
- if(done) break;
+
+ it_st = time(NULL) ;
+
+ cout << endl << "-----------\n" << modelName << ": Iteration " << i << '\n';
+ dump_files = (dumpFreq != 0) && ((i % dumpFreq) == 0) && !NODUMPS;
+ dump_files = true;
+ string number = "";
+ int n = i;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ afile = Prefix + ".a" + shortModelName + "." + number ;
+ acfile = Prefix + ".ac" + shortModelName + "." + number ;
+ afileh = Prefix + ".h" + shortModelName + "." + number ;
+
+ alignfile = Prefix + ".A" + shortModelName + "." + number ;
+ test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
+ base->initAL();
+ // except the current thread
+ vector<hmm_align_struct> args;
+ base->sHandler1.rewind();
+ args.resize(noThread);
+ for(j=1; j<noThread; j++) {
+ args[j].iter = i;
+ args[j].part = j;
+ args[j].done = 0;
+ counts[j].clear();
+ args[j].h = new hmm(*m2[j],base->ewordclasses,base->fwordclasses);
+ args[j].h->probs = base->probs;
+ args[j].valid = pthread_create(&(args[j].thread),NULL,em_thread,&(args[j]));
+ if(args[j].valid) {
+ cerr << "Error starting thread " << j << endl;
+ }
+ }
+ base->em_one_step_2(i,0);
+ //ofstream afilestream(afileh.c_str());
+ while(1) {
+ bool done = true;
+ for (j=1; j<noThread; j++) {
+ //pthread_join((args[j].thread),NULL);
+ // Start normalization as soon as possible
+ if(args[j].done==1) {
+ args[j].done = 2;
+ base->aCountTable.merge(args[j].h->aCountTable);
+ //afilestream << "BEFORE MERGE"<<endl;
+ //base->counts.writeJumps(afilestream);
+ //afilestream << "MERGING"<<endl;
+ //args[j].h->counts.writeJumps(afilestream);
+ //afilestream << "MERGED"<<endl;
+ base->counts.merge(args[j].h->counts);
+ //base->counts.writeJumps(afilestream);
+ delete args[j].h;
+ args[j].h = 0;
+ } else if(args[j].done==2) {
+ // Nothing
+ } else if(args[j].done==0) {
+ done = false;
}
- base->perp.record("HMM");
- base->trainViterbiPerp.record("HMM");
- base->errorReportAL(cout,"HMM");
-
- // Normalize
+ }
+ if(done) break;
+ }
+ base->perp.record("HMM");
+ base->trainViterbiPerp.record("HMM");
+ base->errorReportAL(cout,"HMM");
+
+ // Normalize
// cout <<" Writing " << afileh <<"\n";
- base->probs = base->counts;
+ base->probs = base->counts;
// cout <<" Writing " << afileh <<"\n";
// ofstream afilestream(afileh.c_str());
// base->probs.writeJumps(afilestream);
- base->tTable.normalizeTable(base->Elist, base->Flist);
- base->aCountTable.normalize(base->aTable);
- base->aCountTable.clear();
- if (base->testPerp && base->testHandler)
- base->em_loop(*base->testPerp, *base->testHandler, dump_files, test_alignfile.c_str(), *base->testViterbiPerp, true,i==1,i);
- if (dump_files&&OutputInAachenFormat==1)
- base->tTable.printCountTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),1);
- cout << modelName << ": ("<<i<<") TRAIN CROSS-ENTROPY " << base->perp.cross_entropy()
- << " PERPLEXITY " << base->perp.perplexity() << '\n';
- if (base->testPerp && base->testHandler)
- cout << modelName << ": ("<<i<<") TEST CROSS-ENTROPY " << base->testPerp->cross_entropy()
- << " PERPLEXITY " << base->testPerp->perplexity()
- << '\n';
- cout << modelName << ": ("<<i<<") VITERBI TRAIN CROSS-ENTROPY " << base->trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << base->trainViterbiPerp.perplexity() << '\n';
- if (base->testPerp && base->testHandler)
- cout << modelName << ": ("<<i<<") VITERBI TEST CROSS-ENTROPY " << base->testViterbiPerp->cross_entropy()
- << " PERPLEXITY " << base->testViterbiPerp->perplexity()
- << '\n';
- dump_files = true;
- if (dump_files){
- if( OutputInAachenFormat==0)
- base->tTable.printProbTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),OutputInAachenFormat);
- ofstream afilestream(afileh.c_str());
- base->counts.writeJumps(afilestream);
- //base->counts.clear();
- base->aCountTable.printTable(acfile.c_str());
- base->aTable.printTable(afile.c_str());
- }
- it_fn = time(NULL) ;
-
- cout << "\n" << modelName << " Iteration: " << i<< " took: " <<
- difftime(it_fn, it_st) << " seconds\n";
-
- }
- for(j=1;j<noThread;j++){
- delete m2[j];
+ base->tTable.normalizeTable(base->Elist, base->Flist);
+ base->aCountTable.normalize(base->aTable);
+ base->aCountTable.clear();
+ if (base->testPerp && base->testHandler)
+ base->em_loop(*base->testPerp, *base->testHandler, dump_files, test_alignfile.c_str(), *base->testViterbiPerp, true,i==1,i);
+ if (dump_files&&OutputInAachenFormat==1)
+ base->tTable.printCountTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),1);
+ cout << modelName << ": ("<<i<<") TRAIN CROSS-ENTROPY " << base->perp.cross_entropy()
+ << " PERPLEXITY " << base->perp.perplexity() << '\n';
+ if (base->testPerp && base->testHandler)
+ cout << modelName << ": ("<<i<<") TEST CROSS-ENTROPY " << base->testPerp->cross_entropy()
+ << " PERPLEXITY " << base->testPerp->perplexity()
+ << '\n';
+ cout << modelName << ": ("<<i<<") VITERBI TRAIN CROSS-ENTROPY " << base->trainViterbiPerp.cross_entropy()
+ << " PERPLEXITY " << base->trainViterbiPerp.perplexity() << '\n';
+ if (base->testPerp && base->testHandler)
+ cout << modelName << ": ("<<i<<") VITERBI TEST CROSS-ENTROPY " << base->testViterbiPerp->cross_entropy()
+ << " PERPLEXITY " << base->testViterbiPerp->perplexity()
+ << '\n';
+ dump_files = true;
+ if (dump_files) {
+ if( OutputInAachenFormat==0)
+ base->tTable.printProbTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),OutputInAachenFormat);
+ ofstream afilestream(afileh.c_str());
+ base->counts.writeJumps(afilestream);
+ //base->counts.clear();
+ base->aCountTable.printTable(acfile.c_str());
+ base->aTable.printTable(afile.c_str());
}
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- return 1;
+ it_fn = time(NULL) ;
+
+ cout << "\n" << modelName << " Iteration: " << i<< " took: " <<
+ difftime(it_fn, it_st) << " seconds\n";
+
+ }
+ for(j=1; j<noThread; j++) {
+ delete m2[j];
+ }
+ cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
+ return 1;
}
@@ -1120,4 +1139,4 @@ int multi_thread_em(int noIter, int noThread, hmm* base){
#endif
#include "HMMTables.cpp"
template class HMMTables<int,WordClasses>;
-
+
diff --git a/mgizapp/src/hmm.h b/mgizapp/src/hmm.h
index b50b0be..e362a6f 100644
--- a/mgizapp/src/hmm.h
+++ b/mgizapp/src/hmm.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -23,7 +23,7 @@ USA.
#define _hmm_h 1
#include <cassert>
-
+
#include <iostream>
#include <algorithm>
#include <functional>
@@ -43,7 +43,7 @@ using __gnu_cxx::hash_map;
#include <ctime>
#include "TTables.h"
-#include "ATables.h"
+#include "ATables.h"
#include "getSentence.h"
#include "defs.h"
#include "model2.h"
@@ -54,48 +54,49 @@ using __gnu_cxx::hash_map;
#include "ForwardBackward.h"
#include "ttableDiff.hpp"
-class hmm : public model2{
+class hmm : public model2
+{
+public:
+ WordClasses& ewordclasses;
+ WordClasses& fwordclasses;
public:
- WordClasses& ewordclasses;
- WordClasses& fwordclasses;
-public:
- HMMTables<int,WordClasses> counts,probs;
+ HMMTables<int,WordClasses> counts,probs;
public:
- template<class MAPPER>
- void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile){
- ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
- if( !estrm ) {
- cerr << "ERROR: can not read " << efile << endl;
- }else
- ewordclasses.read(estrm,m1,Elist);
- if( !fstrm )
- cerr << "ERROR: can not read " << ffile << endl;
- else
- fwordclasses.read(fstrm,m2,Flist);
- }
- hmm(model2&m2,WordClasses &e, WordClasses& f);
- void initialize_table_uniformly(sentenceHandler&);
- int em_with_tricks(int iterations, bool dumpCount = false,
- const char* dumpCountName = NULL, bool useString = false,bool resume=false);
- CTTableDiff<COUNT,PROB>* em_one_step(int it);
- // void em_one_step_2(int it,int part);
- void load_table(const char* aname);
+ template<class MAPPER>
+ void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile) {
+ ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
+ if( !estrm ) {
+ cerr << "ERROR: can not read " << efile << endl;
+ } else
+ ewordclasses.read(estrm,m1,Elist);
+ if( !fstrm )
+ cerr << "ERROR: can not read " << ffile << endl;
+ else
+ fwordclasses.read(fstrm,m2,Flist);
+ }
+ hmm(model2&m2,WordClasses &e, WordClasses& f);
+ void initialize_table_uniformly(sentenceHandler&);
+ int em_with_tricks(int iterations, bool dumpCount = false,
+ const char* dumpCountName = NULL, bool useString = false,bool resume=false);
+ CTTableDiff<COUNT,PROB>* em_one_step(int it);
+ // void em_one_step_2(int it,int part);
+ void load_table(const char* aname);
- // void em_loop(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
- // const char* alignfile, Perplexity&, bool test,bool doInit,int iter);
- /* CTTableDiff<COUNT,PROB>* em_loop_1(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
- const char* alignfile, Perplexity&, bool test,bool doInit,int iter);*/
- /* void em_loop_2( Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int part);*/
- void em_loop(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int
- );
- void em_thread(int it,string alignfile,bool dump_files,bool resume=false);
- HMMNetwork *makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const;
- void clearCountTable();
- friend class model3;
+ // void em_loop(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
+ // const char* alignfile, Perplexity&, bool test,bool doInit,int iter);
+ /* CTTableDiff<COUNT,PROB>* em_loop_1(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
+ const char* alignfile, Perplexity&, bool test,bool doInit,int iter);*/
+ /* void em_loop_2( Perplexity& perp, sentenceHandler& sHandler1,
+ bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
+ bool test,bool doInit,int part);*/
+ void em_loop(Perplexity& perp, sentenceHandler& sHandler1,
+ bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
+ bool test,bool doInit,int
+ );
+ void em_thread(int it,string alignfile,bool dump_files,bool resume=false);
+ HMMNetwork *makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const;
+ void clearCountTable();
+ friend class model3;
};
//int multi_thread_em(int noIter, int noThread, hmm* base);
diff --git a/mgizapp/src/hmmnorm.cxx b/mgizapp/src/hmmnorm.cxx
index 2643102..d0c9015 100644
--- a/mgizapp/src/hmmnorm.cxx
+++ b/mgizapp/src/hmmnorm.cxx
@@ -50,88 +50,92 @@ GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detai
GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
- "maximal fertility for fertility models", PARLEV_EM, 10);
+ "maximal fertility for fertility models", PARLEV_EM, 10);
using namespace std;
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
- TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
- SourceVocabClassesFilename, TargetVocabClassesFilename,
- a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
-
-
-int main(int argc, char* argv[]){
- if(argc < 5){
- cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
- return 1;
- }
- Vector<WordEntry> evlist,fvlist;
- vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
- TargetVocabFilename = argv[2];
- SourceVocabFilename = argv[1];
- eTrainVcbList.setName(argv[1]);
- fTrainVcbList.setName(argv[2]);
- eTrainVcbList.readVocabList();
- fTrainVcbList.readVocabList();
- Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp;
- tmodel<float, float> tTable;
- sentenceHandler *corpus = new sentenceHandler();
-
-
- model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList, tTable,
- trainPerp, *corpus, &testPerp, corpus, trainViterbiPerp,
- &testViterbiPerp);
- amodel<float> aTable(false);
- amodel<float> aCountTable(false);
- model2 m2(m1, aTable, aCountTable);
- WordClasses french,english;
- hmm h(m2,english,french);
- SourceVocabClassesFilename = argv[1];
- TargetVocabClassesFilename = argv[2];
- SourceVocabClassesFilename += ".classes";
- TargetVocabClassesFilename += ".classes";
- h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str());
- string base = argv[4];
- string baseA = base+".alpha";
- string baseB = base+".beta";
- string output = argv[3];
- string outputA = output+".alpha";
- string outputB = output+".beta";
- h.probs.readJumps(base.c_str(),NULL,baseA.c_str(), baseB.c_str());
- // Start iteration:
- for(int i = 5; i< argc ; i++){
- string name = argv[i];
- string nameA = name + ".alpha";
- string nameB = name + ".beta";
- if(h.counts.readJumps(name.c_str(),NULL,nameA.c_str(), nameB.c_str()))
- h.probs.merge(h.counts);
- else
- cerr << "Error, cannot load name.c_str()";
- h.clearCountTable();
- }
- h.probs.writeJumps(output.c_str(),NULL,outputA.c_str(), outputB.c_str());
- delete corpus;
+ TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
+ SourceVocabClassesFilename, TargetVocabClassesFilename,
+ a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
+
+
+int main(int argc, char* argv[])
+{
+ if(argc < 5) {
+ cerr << "Usage: " << argv[0] << " vcb1 vcb2 outputFile baseFile [additional1 ]..." << endl;
+ return 1;
+ }
+ Vector<WordEntry> evlist,fvlist;
+ vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
+ TargetVocabFilename = argv[2];
+ SourceVocabFilename = argv[1];
+ eTrainVcbList.setName(argv[1]);
+ fTrainVcbList.setName(argv[2]);
+ eTrainVcbList.readVocabList();
+ fTrainVcbList.readVocabList();
+ Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp;
+ tmodel<float, float> tTable;
+ sentenceHandler *corpus = new sentenceHandler();
+
+
+ model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList, tTable,
+ trainPerp, *corpus, &testPerp, corpus, trainViterbiPerp,
+ &testViterbiPerp);
+ amodel<float> aTable(false);
+ amodel<float> aCountTable(false);
+ model2 m2(m1, aTable, aCountTable);
+ WordClasses french,english;
+ hmm h(m2,english,french);
+ SourceVocabClassesFilename = argv[1];
+ TargetVocabClassesFilename = argv[2];
+ SourceVocabClassesFilename += ".classes";
+ TargetVocabClassesFilename += ".classes";
+ h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str());
+ string base = argv[4];
+ string baseA = base+".alpha";
+ string baseB = base+".beta";
+ string output = argv[3];
+ string outputA = output+".alpha";
+ string outputB = output+".beta";
+ h.probs.readJumps(base.c_str(),NULL,baseA.c_str(), baseB.c_str());
+ // Start iteration:
+ for(int i = 5; i< argc ; i++) {
+ string name = argv[i];
+ string nameA = name + ".alpha";
+ string nameB = name + ".beta";
+ if(h.counts.readJumps(name.c_str(),NULL,nameA.c_str(), nameB.c_str()))
+ h.probs.merge(h.counts);
+ else
+ cerr << "Error, cannot load name.c_str()";
+ h.clearCountTable();
+ }
+ h.probs.writeJumps(output.c_str(),NULL,outputA.c_str(), outputB.c_str());
+ delete corpus;
}
// Some utility functions to get it compile..
ofstream logmsg;
-const string str2Num(int n) {
- string number = "";
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- return (number);
+const string str2Num(int n)
+{
+ string number = "";
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ return (number);
}
double LAMBDA=1.09;
Vector<map< pair<int,int>,char > > ReferenceAlignment;
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
- const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
- int&eventsMissing, int&eventsToomuch, int pair_no){
- return 0;
- }
+ const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
+ int&eventsMissing, int&eventsToomuch, int pair_no)
+{
+ return 0;
+}
-void printGIZAPars(ostream&out){
+void printGIZAPars(ostream&out)
+{
}
diff --git a/mgizapp/src/logprob.cpp b/mgizapp/src/logprob.cpp
index a6130a5..5ddcf77 100644
--- a/mgizapp/src/logprob.cpp
+++ b/mgizapp/src/logprob.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -76,24 +76,21 @@ int LogProb::Initialize()
int i;
std::cerr << "Building integer logs conversion tables\n";
ntof[0] = 0 ;
-
- for (i=nmin+1; i<=nmax; ++i)
- {
- double x = i;
- ntof[i-nmin] = exp(x*logb2);
-
- }
- for (i=tblbnd; i<=0; ++i)
- {
- double x = 1.0 + pow(b, i);
- addtbl[i-tblbnd] = round(log(x)/logb2);
- }
+
+ for (i=nmin+1; i<=nmax; ++i) {
+ double x = i;
+ ntof[i-nmin] = exp(x*logb2);
+
+ }
+ for (i=tblbnd; i<=0; ++i) {
+ double x = 1.0 + pow(b, i);
+ addtbl[i-tblbnd] = round(log(x)/logb2);
+ }
double sqrtb = exp(0.5*logb2);
- for (i=0; i<=-tblbnd; ++i)
- {
- double x = sqrtb * pow(b, i) - 1.0;
- subtbl[i] = round(log(x)/logb2);
- }
+ for (i=0; i<=-tblbnd; ++i) {
+ double x = sqrtb * pow(b, i) - 1.0;
+ subtbl[i] = round(log(x)/logb2);
+ }
// if (toolsRoot)
// {
// ofstream ofs(filename.c_str());
@@ -118,9 +115,9 @@ int LogProb::Initialize()
void LogProb::FreeTables()
{
- delete [] addtbl;
- delete [] subtbl;
- delete [] ntof;
+ delete [] addtbl;
+ delete [] subtbl;
+ delete [] ntof;
}
//---------------------------------------------------------------------------
@@ -129,22 +126,20 @@ void LogProb::FreeTables()
// Subtract two logarithm numbers. Use the following method:
-// b**n - b**m = b**m( b**(n-m) - 1 ), assuming n >= m.
-LogProb& LogProb::operator-=(const LogProb &subs)
+// b**n - b**m = b**m( b**(n-m) - 1 ), assuming n >= m.
+LogProb& LogProb::operator-=(const LogProb &subs)
{
if (subs.logr == zeron)
return *this;
int a = logr - subs.logr;
- if (a <= 0)
- {
- if (a < 0)
- {
- std::cerr << "WARNING(logprob): Invalid arguments to nsub" <<(*this)<< " " << subs << std::endl;
- //abort();
- }
- logr = zeron;
- return *this;
+ if (a <= 0) {
+ if (a < 0) {
+ std::cerr << "WARNING(logprob): Invalid arguments to nsub" <<(*this)<< " " << subs << std::endl;
+ //abort();
}
+ logr = zeron;
+ return *this;
+ }
if (a > -tblbnd)
return *this;
logr = subs.logr + subtbl[a];
diff --git a/mgizapp/src/logprob.h b/mgizapp/src/logprob.h
index 38b414d..f275498 100644
--- a/mgizapp/src/logprob.h
+++ b/mgizapp/src/logprob.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,13 +36,14 @@ USA.
#ifdef WIN32
#define round(x) floor(x+0.5)
#endif
-class LogProb {
+class LogProb
+{
public:
// mj for cross entropy
double base2() const {
return (logr * logb2 / log((double)2));
}
-
+
// Constructors
LogProb() : logr(zeron) {}
LogProb(const LogProb &obj) : logr(obj.logr) {}
@@ -50,111 +51,149 @@ public:
// destructor
~LogProb() {} // default destructor
- operator double() const // converts logr to (double) b**logr
- {
- if (logr < nmin) return ntof[0];
- if (logr > nmax) return ntof[nmax-nmin];
- return ntof[logr-nmin];
- }
+ operator double() const { // converts logr to (double) b**logr
+ if (logr < nmin) return ntof[0];
+ if (logr > nmax) return ntof[nmax-nmin];
+ return ntof[logr-nmin];
+ }
+
+ LogProb &operator=(const LogProb &obj) {
+ logr = obj.logr;
+ return *this;
+ }
+ int operator!() const {
+ return logr == zeron;
+ }
- LogProb &operator=(const LogProb &obj) { logr = obj.logr; return *this; }
- int operator!() const { return logr == zeron; }
-
// iostream friend specifications
friend std::ostream& operator<<(std::ostream& os, const LogProb &obj);
friend std::istream& operator>>(std::istream& is, LogProb &obj);
friend std::ostream& operator<<=(std::ostream& os, const LogProb &obj);
friend std::istream& operator>>=(std::istream& is, LogProb &obj);
- // arithmetic operators
+ // arithmetic operators
LogProb &operator+=(const LogProb &add) // logr2 = logb ( b**logr2 + b**logr1 )
- // Add two numbers represented as logarithms. Use the following method:
- // b**n + b**m = b**n(1 + b**(m-n)), assuming n >= m.
- {
- if (add.logr == zeron)
- return *this;
- if (logr == zeron)
- {
- logr = add.logr;
- return *this;
- }
- int a = add.logr - logr;
- if (a > 0)
- {
- a = -a;
- logr = add.logr;
- }
- if (a < tblbnd)
- return *this;
- logr += addtbl[a-tblbnd];
+ // Add two numbers represented as logarithms. Use the following method:
+ // b**n + b**m = b**n(1 + b**(m-n)), assuming n >= m.
+ {
+ if (add.logr == zeron)
return *this;
- }
-
- LogProb &operator-=(const LogProb &); // logr2 = logb ( b**logr2 + b**logr1 )
- LogProb operator*(const LogProb &mul) const // logr3 = logr2 + logr1
- {
- LogProb result; // start out with result == 0
- if ((logr != zeron) && (mul.logr != zeron))
- result.logr = std::max(logr+mul.logr, zeron);
- return result;
- }
- LogProb operator*(double x) const // logr3 = logr2 + logr1
- {
- return (*this)*(LogProb)x;
- }
- LogProb operator^(const int i) const // logr2 = logr1 * i
- {
- LogProb result; // start out with result == 0
- // if ((logr != zeron) && (mul.logr != zeron))
- result.logr = logr * i ;
- return result;
- }
- LogProb &operator*=(const LogProb &mul) // logr2 += logr1
- {
- if ((logr == zeron) || (mul.logr == zeron))
- logr = zeron;
- else
- logr = std::max(logr+mul.logr, zeron);
+ if (logr == zeron) {
+ logr = add.logr;
return *this;
}
- LogProb operator/(const LogProb &div) const // logr3 = logr2 -logr1
- {
- LogProb result;
- if (logr != zeron)
- result.logr = std::max(logr - div.logr, zeron);
- return result;
+ int a = add.logr - logr;
+ if (a > 0) {
+ a = -a;
+ logr = add.logr;
}
- LogProb &operator/=(const LogProb &div) // logr2 -= logr1
- {
- if (logr != zeron)
- logr = std::max(logr - div.logr, zeron);
+ if (a < tblbnd)
return *this;
- }
- LogProb operator+(const LogProb &l) const // logr3 = logb ( b**logr2 + b**logr1 )
- { LogProb result(*this); result += l; return result; }
- LogProb operator-(const LogProb &l) const // logr3 = logb ( b**logr2 - b**logr1 )
- { LogProb result(*this); result -= l; return result; }
- LogProb power(const int n) const // logr2 = logr1 * int
- { LogProb result(*this); result.logr *= n; return result; }
-
+ logr += addtbl[a-tblbnd];
+ return *this;
+ }
+
+ LogProb &operator-=(const LogProb &); // logr2 = logb ( b**logr2 + b**logr1 )
+ LogProb operator*(const LogProb &mul) const { // logr3 = logr2 + logr1
+ LogProb result; // start out with result == 0
+ if ((logr != zeron) && (mul.logr != zeron))
+ result.logr = std::max(logr+mul.logr, zeron);
+ return result;
+ }
+ LogProb operator*(double x) const { // logr3 = logr2 + logr1
+ return (*this)*(LogProb)x;
+ }
+ LogProb operator^(const int i) const { // logr2 = logr1 * i
+ LogProb result; // start out with result == 0
+ // if ((logr != zeron) && (mul.logr != zeron))
+ result.logr = logr * i ;
+ return result;
+ }
+ LogProb &operator*=(const LogProb &mul) { // logr2 += logr1
+ if ((logr == zeron) || (mul.logr == zeron))
+ logr = zeron;
+ else
+ logr = std::max(logr+mul.logr, zeron);
+ return *this;
+ }
+ LogProb operator/(const LogProb &div) const { // logr3 = logr2 -logr1
+ LogProb result;
+ if (logr != zeron)
+ result.logr = std::max(logr - div.logr, zeron);
+ return result;
+ }
+ LogProb &operator/=(const LogProb &div) { // logr2 -= logr1
+ if (logr != zeron)
+ logr = std::max(logr - div.logr, zeron);
+ return *this;
+ }
+ LogProb operator+(const LogProb &l) const { // logr3 = logb ( b**logr2 + b**logr1 )
+ LogProb result(*this);
+ result += l;
+ return result;
+ }
+ LogProb operator-(const LogProb &l) const { // logr3 = logb ( b**logr2 - b**logr1 )
+ LogProb result(*this);
+ result -= l;
+ return result;
+ }
+ LogProb power(const int n) const { // logr2 = logr1 * int
+ LogProb result(*this);
+ result.logr *= n;
+ return result;
+ }
+
// Conditional operators
- int operator<(const LogProb &obj) const { return logr < obj.logr; }
- int operator<=(const LogProb &obj) const { return logr <= obj.logr; }
- int operator>(const LogProb &obj) const { return logr > obj.logr; }
- int operator>=(const LogProb &obj) const { return logr >= obj.logr; }
- int operator==(const LogProb &obj) const { return logr == obj.logr; }
- int operator!=(const LogProb &obj) const { return logr != obj.logr; }
- int operator<(double d) const { return ((double)*this) < d; }
- int operator<=(double d) const { return ((double)*this) <= d; }
- int operator>(double d) const { return ((double)*this) > d; }
- int operator>=(double d) const { return ((double)*this) >= d; }
- int operator==(double d) const { return ((double)*this) == d; }
- int operator!=(double d) const { return ((double)*this) != d; }
-
-
- LogProb &SetZero() { logr = zeron; return *this; } // representation of 0,
- LogProb &SetOne() { logr = onen; return *this; } // 1, and
- LogProb &SetInf() { logr = infn; return *this; } // inf in logarithm domain
+ int operator<(const LogProb &obj) const {
+ return logr < obj.logr;
+ }
+ int operator<=(const LogProb &obj) const {
+ return logr <= obj.logr;
+ }
+ int operator>(const LogProb &obj) const {
+ return logr > obj.logr;
+ }
+ int operator>=(const LogProb &obj) const {
+ return logr >= obj.logr;
+ }
+ int operator==(const LogProb &obj) const {
+ return logr == obj.logr;
+ }
+ int operator!=(const LogProb &obj) const {
+ return logr != obj.logr;
+ }
+ int operator<(double d) const {
+ return ((double)*this) < d;
+ }
+ int operator<=(double d) const {
+ return ((double)*this) <= d;
+ }
+ int operator>(double d) const {
+ return ((double)*this) > d;
+ }
+ int operator>=(double d) const {
+ return ((double)*this) >= d;
+ }
+ int operator==(double d) const {
+ return ((double)*this) == d;
+ }
+ int operator!=(double d) const {
+ return ((double)*this) != d;
+ }
+
+
+ LogProb &SetZero() {
+ logr = zeron; // representation of 0,
+ return *this;
+ }
+ LogProb &SetOne() {
+ logr = onen; // 1, and
+ return *this;
+ }
+ LogProb &SetInf() {
+ logr = infn; // inf in logarithm domain
+ return *this;
+ }
private:
int logr; // a representation of logarithm
@@ -166,7 +205,7 @@ private:
static const int tblbnd;
static const int zeron, onen, infn; // zero, one, and inf in log domain
static const int max_2byte_integer, min_2byte_integer;
-
+
// Arithmetic computation Tables
static double *ntof;
static int *addtbl;
diff --git a/mgizapp/src/main.cpp b/mgizapp/src/main.cpp
index bc44656..f06c8e8 100644
--- a/mgizapp/src/main.cpp
+++ b/mgizapp/src/main.cpp
@@ -8,14 +8,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,7 +32,7 @@
#include "vocab.h"
#include "Perplexity.h"
#include "Dictionary.h"
-#include "utility.h"
+#include "utility.h"
#include "Parameter.h"
#include "myassert.h"
#include "D4Tables.h"
@@ -45,7 +45,7 @@
#define ITER_MH 5
/**
- Here we can see that Every model is iterated several times, and we do not need to do it
+ Here we can see that Every model is iterated several times, and we do not need to do it
on all the corpora, instead we will only start a few.
*/
GLOBAL_PARAMETER3(int,Model1_Iterations,"Model1_Iterations","NO. ITERATIONS MODEL 1","m1","number of iterations for Model 1",PARLEV_ITER,5);
@@ -91,16 +91,16 @@ GLOBAL_PARAMETER(short,CompactAlignmentFormat,"CompactAlignmentFormat","0: detai
GLOBAL_PARAMETER2(bool,NODUMPS,"NODUMPS","NO FILE DUMPS? (Y/N)","1: do not write any files",PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
- "maximal fertility for fertility models", PARLEV_EM, 10);
+ "maximal fertility for fertility models", PARLEV_EM, 10);
Vector<map< pair<int,int>,char > > ReferenceAlignment;
bool useDict = false;
string CoocurrenceFile;
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
- SourceVocabClassesFilename(""), TargetVocabClassesFilename(""),
- TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
- a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
+ SourceVocabClassesFilename(""), TargetVocabClassesFilename(""),
+ TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
+ a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
// QIN: Variables required for reloading model and continue training
@@ -112,12 +112,13 @@ string countPrefix;
Mutex logmsg_lock;
ofstream logmsg;
-const string str2Num(int n) {
- string number = "";
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- return (number);
+const string str2Num(int n)
+{
+ string number = "";
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ return (number);
}
double LAMBDA=1.09;
@@ -126,1035 +127,1045 @@ Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp;
string ReadTablePrefix;
-void printGIZAPars(ostream&out) {
- out << "general parameters:\n"
- "-------------------\n";
- printPars(out, getGlobalParSet(), 0);
- out << '\n';
-
- out << "No. of iterations:\n-"
- "------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_ITER);
- out << '\n';
-
- out
- << "parameter for various heuristics in GIZA++ for efficient training:\n"
- "------------------------------------------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_OPTHEUR);
- out << '\n';
-
- out << "parameters for describing the type and amount of output:\n"
- "-----------------------------------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_OUTPUT);
- out << '\n';
-
- out << "parameters describing input files:\n"
- "----------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_INPUT);
- out << '\n';
-
- out << "smoothing parameters:\n"
- "---------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_SMOOTH);
- out << '\n';
-
- out << "parameters modifying the models:\n"
- "--------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_MODELS);
- out << '\n';
-
- out << "parameters modifying the EM-algorithm:\n"
- "--------------------------------------\n";
- printPars(out, getGlobalParSet(), PARLEV_EM);
- out << '\n';
+void printGIZAPars(ostream&out)
+{
+ out << "general parameters:\n"
+ "-------------------\n";
+ printPars(out, getGlobalParSet(), 0);
+ out << '\n';
+
+ out << "No. of iterations:\n-"
+ "------------------\n";
+ printPars(out, getGlobalParSet(), PARLEV_ITER);
+ out << '\n';
+
+ out
+ << "parameter for various heuristics in GIZA++ for efficient training:\n"
+ "------------------------------------------------------------------\n";
+ printPars(out, getGlobalParSet(), PARLEV_OPTHEUR);
+ out << '\n';
+
+ out << "parameters for describing the type and amount of output:\n"
+ "-----------------------------------------------------------\n";
+ printPars(out, getGlobalParSet(), PARLEV_OUTPUT);
+ out << '\n';
+
+ out << "parameters describing input files:\n"
+ "----------------------------------\n";
+ printPars(out, getGlobalParSet(), PARLEV_INPUT);
+ out << '\n';
+
+ out << "smoothing parameters:\n"
+ "---------------------\n";
+ printPars(out, getGlobalParSet(), PARLEV_SMOOTH);
+ out << '\n';
+
+ out << "parameters modifying the models:\n"
+ "--------------------------------\n";
+ printPars(out, getGlobalParSet(), PARLEV_MODELS);
+ out << '\n';
+
+ out << "parameters modifying the EM-algorithm:\n"
+ "--------------------------------------\n";
+ printPars(out, getGlobalParSet(), PARLEV_EM);
+ out << '\n';
}
const char*stripPath(const char*fullpath)
-// strip the path info from the file name
+// strip the path info from the file name
{
- const char *ptr = fullpath + strlen(fullpath) - 1;
- while (ptr && ptr > fullpath && *ptr != '/') {
- ptr--;
- }
- if ( *ptr=='/')
- return (ptr+1);
- else
- return ptr;
+ const char *ptr = fullpath + strlen(fullpath) - 1;
+ while (ptr && ptr > fullpath && *ptr != '/') {
+ ptr--;
+ }
+ if ( *ptr=='/')
+ return (ptr+1);
+ else
+ return ptr;
}
-void printDecoderConfigFile() {
- string decoder_config_file = Prefix + ".Decoder.config";
- cerr << "writing decoder configuration file to "
- << decoder_config_file.c_str() <<'\n';
- ofstream decoder(decoder_config_file.c_str());
- if (!decoder) {
- cerr << "\nCannot write to " << decoder_config_file <<'\n';
- exit(1);
- }
- decoder
- << "# Template for Configuration File for the Rewrite Decoder\n# Syntax:\n"
- << "# <Variable> = <value>\n# '#' is the comment character\n"
- << "#================================================================\n"
- << "#================================================================\n"
- << "# LANGUAGE MODEL FILE\n# The full path and file name of the language model file:\n";
- decoder << "LanguageModelFile =\n";
-
- decoder
- << "#================================================================\n"
- << "#================================================================\n"
- << "# TRANSLATION MODEL FILES\n# The directory where the translation model tables as created\n"
- << "# by Giza are located:\n#\n"
- << "# Notes: - All translation model \"source\" files are assumed to be in\n"
- << "# TM_RawDataDir, the binaries will be put in TM_BinDataDir\n"
- << "#\n# - Attention: RELATIVE PATH NAMES DO NOT WORK!!!\n"
- << "#\n# - Absolute paths (file name starts with /) will override\n"
- << "# the default directory.\n\n";
- // strip file prefix info and leave only the path name in Prefix
- string path = Prefix.substr(0, Prefix.find_last_of("/")+1);
- if (path=="")
- path=".";
- decoder << "TM_RawDataDir = " << path << '\n';
- decoder << "TM_BinDataDir = " << path << '\n' << '\n';
- decoder << "# file names of the TM tables\n# Notes:\n"
- << "# 1. TTable and InversTTable are expected to use word IDs not\n"
- << "# strings (Giza produces both, whereby the *.actual.* files\n"
- << "# use strings and are THE WRONG CHOICE.\n"
- << "# 2. FZeroWords, on the other hand, is a simple list of strings\n"
- << "# with one word per line. This file is typically edited\n"
- << "# manually. Hoeever, this one listed here is generated by GIZA\n\n";
-
- int lastmodel;
- if (Model5_Iterations>0)
- lastmodel = 5;
- else if (Model4_Iterations>0)
- lastmodel = 4;
- else if (Model3_Iterations>0)
- lastmodel = 3;
- else if (Model2_Iterations>0)
- lastmodel = 2;
- else
- lastmodel = 1;
- string lastModelName = str2Num(lastmodel);
- string p=Prefix + ".t" + /*lastModelName*/"3" +".final";
- decoder << "TTable = " << stripPath(p.c_str()) << '\n';
- p = Prefix + ".ti.final";
- decoder << "InverseTTable = " << stripPath(p.c_str()) << '\n';
- p=Prefix + ".n" + /*lastModelName*/"3" + ".final";
- decoder << "NTable = " << stripPath(p.c_str()) << '\n';
- p=Prefix + ".d" + /*lastModelName*/"3" + ".final";
- decoder << "D3Table = " << stripPath(p.c_str()) << '\n';
- p=Prefix + ".D4.final";
- decoder << "D4Table = " << stripPath(p.c_str()) << '\n';
- p=Prefix + ".p0_"+ /*lastModelName*/"3" + ".final";
- decoder << "PZero = " << stripPath(p.c_str()) << '\n';
- decoder << "Source.vcb = " << SourceVocabFilename << '\n';
- decoder << "Target.vcb = " << TargetVocabFilename << '\n';
- // decoder << "Source.classes = " << SourceVocabFilename + ".classes" << '\n';
- // decoder << "Target.classes = " << TargetVocabFilename + ".classes" <<'\n';
- decoder << "Source.classes = " << SourceVocabClassesFilename << '\n';
- decoder << "Target.classes = " << TargetVocabClassesFilename <<'\n';
- p=Prefix + ".fe0_"+ /*lastModelName*/"3" + ".final";
- decoder << "FZeroWords = " <<stripPath(p.c_str()) << '\n';
-
- /* decoder << "# Translation Parameters\n"
- << "# Note: TranslationModel and LanguageModelMode must have NUMBERS as\n"
- << "# values, not words\n"
- << "# CORRECT: LanguageModelMode = 2\n"
- << "# WRONG: LanguageModelMode = bigrams # WRONG, WRONG, WRONG!!!\n";
- decoder << "TMWeight = 0.6 # weight of TM for calculating alignment probability\n";
- decoder << "TranslationModel = "<<lastmodel<<" # which model to use (3 or 4)\n";
- decoder << "LanguageModelMode = 2 # (2 (bigrams) or 3 (trigrams)\n\n";
- decoder << "# Output Options\n"
- << "TellWhatYouAreDoing = TRUE # print diagnostic messages to stderr\n"
- << "PrintOriginal = TRUE # repeat original sentence in the output\n"
- << "TopTranslations = 3 # number of n best translations to be returned\n"
- << "PrintProbabilities = TRUE # give the probabilities for the translations\n\n";
-
- decoder << "# LOGGING OPTIONS\n"
- << "LogFile = - # empty means: no log, dash means: STDOUT\n"
- << "LogLM = true # log language model lookups\n"
- << "LogTM = true # log translation model lookups\n";
- */
+void printDecoderConfigFile()
+{
+ string decoder_config_file = Prefix + ".Decoder.config";
+ cerr << "writing decoder configuration file to "
+ << decoder_config_file.c_str() <<'\n';
+ ofstream decoder(decoder_config_file.c_str());
+ if (!decoder) {
+ cerr << "\nCannot write to " << decoder_config_file <<'\n';
+ exit(1);
+ }
+ decoder
+ << "# Template for Configuration File for the Rewrite Decoder\n# Syntax:\n"
+ << "# <Variable> = <value>\n# '#' is the comment character\n"
+ << "#================================================================\n"
+ << "#================================================================\n"
+ << "# LANGUAGE MODEL FILE\n# The full path and file name of the language model file:\n";
+ decoder << "LanguageModelFile =\n";
+
+ decoder
+ << "#================================================================\n"
+ << "#================================================================\n"
+ << "# TRANSLATION MODEL FILES\n# The directory where the translation model tables as created\n"
+ << "# by Giza are located:\n#\n"
+ << "# Notes: - All translation model \"source\" files are assumed to be in\n"
+ << "# TM_RawDataDir, the binaries will be put in TM_BinDataDir\n"
+ << "#\n# - Attention: RELATIVE PATH NAMES DO NOT WORK!!!\n"
+ << "#\n# - Absolute paths (file name starts with /) will override\n"
+ << "# the default directory.\n\n";
+ // strip file prefix info and leave only the path name in Prefix
+ string path = Prefix.substr(0, Prefix.find_last_of("/")+1);
+ if (path=="")
+ path=".";
+ decoder << "TM_RawDataDir = " << path << '\n';
+ decoder << "TM_BinDataDir = " << path << '\n' << '\n';
+ decoder << "# file names of the TM tables\n# Notes:\n"
+ << "# 1. TTable and InversTTable are expected to use word IDs not\n"
+ << "# strings (Giza produces both, whereby the *.actual.* files\n"
+ << "# use strings and are THE WRONG CHOICE.\n"
+ << "# 2. FZeroWords, on the other hand, is a simple list of strings\n"
+ << "# with one word per line. This file is typically edited\n"
+ << "# manually. Hoeever, this one listed here is generated by GIZA\n\n";
+
+ int lastmodel;
+ if (Model5_Iterations>0)
+ lastmodel = 5;
+ else if (Model4_Iterations>0)
+ lastmodel = 4;
+ else if (Model3_Iterations>0)
+ lastmodel = 3;
+ else if (Model2_Iterations>0)
+ lastmodel = 2;
+ else
+ lastmodel = 1;
+ string lastModelName = str2Num(lastmodel);
+ string p=Prefix + ".t" + /*lastModelName*/"3" +".final";
+ decoder << "TTable = " << stripPath(p.c_str()) << '\n';
+ p = Prefix + ".ti.final";
+ decoder << "InverseTTable = " << stripPath(p.c_str()) << '\n';
+ p=Prefix + ".n" + /*lastModelName*/"3" + ".final";
+ decoder << "NTable = " << stripPath(p.c_str()) << '\n';
+ p=Prefix + ".d" + /*lastModelName*/"3" + ".final";
+ decoder << "D3Table = " << stripPath(p.c_str()) << '\n';
+ p=Prefix + ".D4.final";
+ decoder << "D4Table = " << stripPath(p.c_str()) << '\n';
+ p=Prefix + ".p0_"+ /*lastModelName*/"3" + ".final";
+ decoder << "PZero = " << stripPath(p.c_str()) << '\n';
+ decoder << "Source.vcb = " << SourceVocabFilename << '\n';
+ decoder << "Target.vcb = " << TargetVocabFilename << '\n';
+ // decoder << "Source.classes = " << SourceVocabFilename + ".classes" << '\n';
+ // decoder << "Target.classes = " << TargetVocabFilename + ".classes" <<'\n';
+ decoder << "Source.classes = " << SourceVocabClassesFilename << '\n';
+ decoder << "Target.classes = " << TargetVocabClassesFilename <<'\n';
+ p=Prefix + ".fe0_"+ /*lastModelName*/"3" + ".final";
+ decoder << "FZeroWords = " <<stripPath(p.c_str()) << '\n';
+
+ /* decoder << "# Translation Parameters\n"
+ << "# Note: TranslationModel and LanguageModelMode must have NUMBERS as\n"
+ << "# values, not words\n"
+ << "# CORRECT: LanguageModelMode = 2\n"
+ << "# WRONG: LanguageModelMode = bigrams # WRONG, WRONG, WRONG!!!\n";
+ decoder << "TMWeight = 0.6 # weight of TM for calculating alignment probability\n";
+ decoder << "TranslationModel = "<<lastmodel<<" # which model to use (3 or 4)\n";
+ decoder << "LanguageModelMode = 2 # (2 (bigrams) or 3 (trigrams)\n\n";
+ decoder << "# Output Options\n"
+ << "TellWhatYouAreDoing = TRUE # print diagnostic messages to stderr\n"
+ << "PrintOriginal = TRUE # repeat original sentence in the output\n"
+ << "TopTranslations = 3 # number of n best translations to be returned\n"
+ << "PrintProbabilities = TRUE # give the probabilities for the translations\n\n";
+
+ decoder << "# LOGGING OPTIONS\n"
+ << "LogFile = - # empty means: no log, dash means: STDOUT\n"
+ << "LogLM = true # log language model lookups\n"
+ << "LogTM = true # log translation model lookups\n";
+ */
}
void printAllTables(vcbList& eTrainVcbList, vcbList& eTestVcbList,
- vcbList& fTrainVcbList, vcbList& fTestVcbList, model1& m1) {
- cerr << "writing Final tables to Disk \n";
- string t_inv_file = Prefix + ".ti.final";
- if ( !FEWDUMPS)
- m1.getTTable().printProbTableInverse(t_inv_file.c_str(),
- m1.getEnglishVocabList(), m1.getFrenchVocabList(),
- m1.getETotalWCount(), m1.getFTotalWCount());
- t_inv_file = Prefix + ".actual.ti.final";
- if ( !FEWDUMPS)
- m1.getTTable().printProbTableInverse(t_inv_file.c_str(),
- eTrainVcbList.getVocabList(), fTrainVcbList.getVocabList(),
- m1.getETotalWCount(), m1.getFTotalWCount(), true);
-
- string perp_filename = Prefix + ".perp";
- ofstream of_perp(perp_filename.c_str());
-
- cout << "Writing PERPLEXITY report to: " << perp_filename << '\n';
- if (!of_perp) {
- cerr << "\nERROR: Cannot write to " << perp_filename <<'\n';
- exit(1);
- }
-
- if (testCorpus)
- generatePerplexityReport(trainPerp, testPerp, trainViterbiPerp,
- testViterbiPerp, of_perp, (*corpus).getTotalNoPairs1(), (*testCorpus).getTotalNoPairs1(), true);
- else
- generatePerplexityReport(trainPerp, testPerp, trainViterbiPerp,
- testViterbiPerp, of_perp, (*corpus).getTotalNoPairs1(), 0, true);
-
- string eTrainVcbFile = Prefix + ".trn.src.vcb";
- ofstream of_eTrainVcb(eTrainVcbFile.c_str());
- cout << "Writing source vocabulary list to : " << eTrainVcbFile << '\n';
- if (!of_eTrainVcb) {
- cerr << "\nERROR: Cannot write to " << eTrainVcbFile <<'\n';
- exit(1);
- }
- eTrainVcbList.printVocabList(of_eTrainVcb) ;
-
- string fTrainVcbFile = Prefix + ".trn.trg.vcb";
- ofstream of_fTrainVcb(fTrainVcbFile.c_str());
- cout << "Writing source vocabulary list to : " << fTrainVcbFile << '\n';
- if (!of_fTrainVcb) {
- cerr << "\nERROR: Cannot write to " << fTrainVcbFile <<'\n';
- exit(1);
- }
- fTrainVcbList.printVocabList(of_fTrainVcb) ;
-
- //print test vocabulary list
-
- string eTestVcbFile = Prefix + ".tst.src.vcb";
- ofstream of_eTestVcb(eTestVcbFile.c_str());
- cout << "Writing source vocabulary list to : " << eTestVcbFile << '\n';
- if (!of_eTestVcb) {
- cerr << "\nERROR: Cannot write to " << eTestVcbFile <<'\n';
- exit(1);
- }
- eTestVcbList.printVocabList(of_eTestVcb) ;
-
- string fTestVcbFile = Prefix + ".tst.trg.vcb";
- ofstream of_fTestVcb(fTestVcbFile.c_str());
- cout << "Writing source vocabulary list to : " << fTestVcbFile << '\n';
- if (!of_fTestVcb) {
- cerr << "\nERROR: Cannot write to " << fTestVcbFile <<'\n';
- exit(1);
- }
- fTestVcbList.printVocabList(of_fTestVcb) ;
- printDecoderConfigFile();
- if (testCorpus)
- printOverlapReport(m1.getTTable(), *testCorpus, eTrainVcbList,
- fTrainVcbList, eTestVcbList, fTestVcbList);
+ vcbList& fTrainVcbList, vcbList& fTestVcbList, model1& m1)
+{
+ cerr << "writing Final tables to Disk \n";
+ string t_inv_file = Prefix + ".ti.final";
+ if ( !FEWDUMPS)
+ m1.getTTable().printProbTableInverse(t_inv_file.c_str(),
+ m1.getEnglishVocabList(), m1.getFrenchVocabList(),
+ m1.getETotalWCount(), m1.getFTotalWCount());
+ t_inv_file = Prefix + ".actual.ti.final";
+ if ( !FEWDUMPS)
+ m1.getTTable().printProbTableInverse(t_inv_file.c_str(),
+ eTrainVcbList.getVocabList(), fTrainVcbList.getVocabList(),
+ m1.getETotalWCount(), m1.getFTotalWCount(), true);
+
+ string perp_filename = Prefix + ".perp";
+ ofstream of_perp(perp_filename.c_str());
+
+ cout << "Writing PERPLEXITY report to: " << perp_filename << '\n';
+ if (!of_perp) {
+ cerr << "\nERROR: Cannot write to " << perp_filename <<'\n';
+ exit(1);
+ }
+
+ if (testCorpus)
+ generatePerplexityReport(trainPerp, testPerp, trainViterbiPerp,
+ testViterbiPerp, of_perp, (*corpus).getTotalNoPairs1(), (*testCorpus).getTotalNoPairs1(), true);
+ else
+ generatePerplexityReport(trainPerp, testPerp, trainViterbiPerp,
+ testViterbiPerp, of_perp, (*corpus).getTotalNoPairs1(), 0, true);
+
+ string eTrainVcbFile = Prefix + ".trn.src.vcb";
+ ofstream of_eTrainVcb(eTrainVcbFile.c_str());
+ cout << "Writing source vocabulary list to : " << eTrainVcbFile << '\n';
+ if (!of_eTrainVcb) {
+ cerr << "\nERROR: Cannot write to " << eTrainVcbFile <<'\n';
+ exit(1);
+ }
+ eTrainVcbList.printVocabList(of_eTrainVcb) ;
+
+ string fTrainVcbFile = Prefix + ".trn.trg.vcb";
+ ofstream of_fTrainVcb(fTrainVcbFile.c_str());
+ cout << "Writing source vocabulary list to : " << fTrainVcbFile << '\n';
+ if (!of_fTrainVcb) {
+ cerr << "\nERROR: Cannot write to " << fTrainVcbFile <<'\n';
+ exit(1);
+ }
+ fTrainVcbList.printVocabList(of_fTrainVcb) ;
+
+ //print test vocabulary list
+
+ string eTestVcbFile = Prefix + ".tst.src.vcb";
+ ofstream of_eTestVcb(eTestVcbFile.c_str());
+ cout << "Writing source vocabulary list to : " << eTestVcbFile << '\n';
+ if (!of_eTestVcb) {
+ cerr << "\nERROR: Cannot write to " << eTestVcbFile <<'\n';
+ exit(1);
+ }
+ eTestVcbList.printVocabList(of_eTestVcb) ;
+
+ string fTestVcbFile = Prefix + ".tst.trg.vcb";
+ ofstream of_fTestVcb(fTestVcbFile.c_str());
+ cout << "Writing source vocabulary list to : " << fTestVcbFile << '\n';
+ if (!of_fTestVcb) {
+ cerr << "\nERROR: Cannot write to " << fTestVcbFile <<'\n';
+ exit(1);
+ }
+ fTestVcbList.printVocabList(of_fTestVcb) ;
+ printDecoderConfigFile();
+ if (testCorpus)
+ printOverlapReport(m1.getTTable(), *testCorpus, eTrainVcbList,
+ fTrainVcbList, eTestVcbList, fTestVcbList);
}
-bool readNextSent(istream&is, map< pair<int,int>,char >&s, int&number) {
- string x;
- if ( !(is >> x))
- return 0;
- if (x=="SENT:")
- is >> x;
- int n=atoi(x.c_str());
- if (number==-1)
- number=n;
- else if (number!=n) {
- cerr << "ERROR: readNextSent: DIFFERENT NUMBERS: " << number << " "
- << n << '\n';
- return 0;
- }
- int nS, nP, nO;
- nS=nP=nO=0;
- while (is >> x) {
- if (x=="SENT:")
- return 1;
- int n1, n2;
- is >> n1 >> n2;
- map< pair<int,int>,char >::const_iterator i=s.find(pair<int, int>(n1,
- n2));
- if (i==s.end()||i->second=='P')
- s[pair<int,int>(n1,n2)]=x[0];
- massert(x[0]=='S'||x[0]=='P');
- nS+= (x[0]=='S');
- nP+= (x[0]=='P');
- nO+= (!(x[0]=='S'||x[0]=='P'));
- }
- return 1;
+bool readNextSent(istream&is, map< pair<int,int>,char >&s, int&number)
+{
+ string x;
+ if ( !(is >> x))
+ return 0;
+ if (x=="SENT:")
+ is >> x;
+ int n=atoi(x.c_str());
+ if (number==-1)
+ number=n;
+ else if (number!=n) {
+ cerr << "ERROR: readNextSent: DIFFERENT NUMBERS: " << number << " "
+ << n << '\n';
+ return 0;
+ }
+ int nS, nP, nO;
+ nS=nP=nO=0;
+ while (is >> x) {
+ if (x=="SENT:")
+ return 1;
+ int n1, n2;
+ is >> n1 >> n2;
+ map< pair<int,int>,char >::const_iterator i=s.find(pair<int, int>(n1,
+ n2));
+ if (i==s.end()||i->second=='P')
+ s[pair<int,int>(n1,n2)]=x[0];
+ massert(x[0]=='S'||x[0]=='P');
+ nS+= (x[0]=='S');
+ nP+= (x[0]=='P');
+ nO+= (!(x[0]=='S'||x[0]=='P'));
+ }
+ return 1;
}
-bool emptySent(map< pair<int,int>,char >&x) {
- x = map<pair<int,int>, char>();
- return 1;
+bool emptySent(map< pair<int,int>,char >&x)
+{
+ x = map<pair<int,int>, char>();
+ return 1;
}
-void ReadAlignment(const string&x, Vector<map< pair<int,int>,char > >&a) {
- ifstream infile(x.c_str());
- a.clear();
- map< pair<int,int>,char > sent;
- int number=0;
- while (emptySent(sent) && (readNextSent(infile, sent, number))) {
- if (int(a.size())!=number)
- cerr << "ERROR: ReadAlignment: " << a.size() << " " << number
- << '\n';
- a.push_back(sent);
- number++;
- }
- cout << "Read: " << a.size() << " sentences in reference alignment."
- << '\n';
+void ReadAlignment(const string&x, Vector<map< pair<int,int>,char > >&a)
+{
+ ifstream infile(x.c_str());
+ a.clear();
+ map< pair<int,int>,char > sent;
+ int number=0;
+ while (emptySent(sent) && (readNextSent(infile, sent, number))) {
+ if (int(a.size())!=number)
+ cerr << "ERROR: ReadAlignment: " << a.size() << " " << number
+ << '\n';
+ a.push_back(sent);
+ number++;
+ }
+ cout << "Read: " << a.size() << " sentences in reference alignment."
+ << '\n';
}
-void initGlobals(void) {
- cerr << "DEBUG: Enter";
- NODUMPS = false;
- Prefix = Get_File_Spec();
- cerr << "DEBUG: Prefix";
- LogFilename= Prefix + ".log";
- cerr << "DEBUG: Log";
- MAX_SENTENCE_LENGTH = MAX_SENTENCE_LENGTH_ALLOWED;
+void initGlobals(void)
+{
+ cerr << "DEBUG: Enter";
+ NODUMPS = false;
+ Prefix = Get_File_Spec();
+ cerr << "DEBUG: Prefix";
+ LogFilename= Prefix + ".log";
+ cerr << "DEBUG: Log";
+ MAX_SENTENCE_LENGTH = MAX_SENTENCE_LENGTH_ALLOWED;
}
-void convert(const map< pair<int,int>,char >&reference, alignment&x) {
- int l=x.get_l();
- int m=x.get_m();
- for (map< pair<int,int>,char >::const_iterator i=reference.begin(); i
- !=reference.end(); ++i) {
- if (i->first.first+1>int(m)) {
- cerr << "ERROR m to big: " << i->first.first << " "
- << i->first.second+1 << " " << l << " " << m
- << " is wrong.\n";
- continue;
- }
- if (i->first.second+1>int(l)) {
- cerr << "ERROR l to big: " << i->first.first << " "
- << i->first.second+1 << " " << l << " " << m
- << " is wrong.\n";
- continue;
- }
- if (x(i->first.first+1)!=0)
- cerr << "ERROR: position " << i->first.first+1 << " already set\n";
- x.set(i->first.first+1, i->first.second+1);
- }
+void convert(const map< pair<int,int>,char >&reference, alignment&x)
+{
+ int l=x.get_l();
+ int m=x.get_m();
+ for (map< pair<int,int>,char >::const_iterator i=reference.begin(); i
+ !=reference.end(); ++i) {
+ if (i->first.first+1>int(m)) {
+ cerr << "ERROR m to big: " << i->first.first << " "
+ << i->first.second+1 << " " << l << " " << m
+ << " is wrong.\n";
+ continue;
+ }
+ if (i->first.second+1>int(l)) {
+ cerr << "ERROR l to big: " << i->first.first << " "
+ << i->first.second+1 << " " << l << " " << m
+ << " is wrong.\n";
+ continue;
+ }
+ if (x(i->first.first+1)!=0)
+ cerr << "ERROR: position " << i->first.first+1 << " already set\n";
+ x.set(i->first.first+1, i->first.second+1);
+ }
}
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,
- const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
- int&eventsMissing, int&eventsToomuch, int pair_no) {
- int err=0;
- for (unsigned int j=1; j<test.size(); j++) {
- if (test[j]>0) {
- map< pair<int,int>,char >::const_iterator i=
- reference.find(make_pair(test[j]-1, j-1));
- if (i==reference.end() ) {
- toomuch++;
- err++;
- } else {
- if ( !(i->second=='S' || i->second=='P')) {
- cerr << "ERROR: wrong symbol in reference alignment '"
- << i->second << ' ' << int(i->second) << " no:" << pair_no<< "'\n";
- }
- }
- eventsToomuch++;
- }
- }
- for (map< pair<int,int>,char >::const_iterator i=reference.begin(); i
- !=reference.end(); ++i) {
- if (i->second=='S') {
- unsigned int J=i->first.second+1;
- unsigned int I=i->first.first+1;
- if (int(J)>=int(test.size())||int(I)>int(l)||int(J)<1||int(I)<1)
- cerr
- << "ERROR: alignment outside of range in reference alignment"
- << J << " " << test.size() << " (" << I << " " << l
- << ") no:" << pair_no << '\n';
- else {
- if (test[J]!=I) {
- missing++;
- err++;
- }
- }
- eventsMissing++;
- }
- }
- if (Verbose)
- cout << err << " errors in sentence\n";
- if (eventsToomuch+eventsMissing)
- return (toomuch+missing)/(eventsToomuch+eventsMissing);
- else
- return 1.0;
+ const Vector<WordIndex>&test, int l, int&missing, int&toomuch,
+ int&eventsMissing, int&eventsToomuch, int pair_no)
+{
+ int err=0;
+ for (unsigned int j=1; j<test.size(); j++) {
+ if (test[j]>0) {
+ map< pair<int,int>,char >::const_iterator i=
+ reference.find(make_pair(test[j]-1, j-1));
+ if (i==reference.end() ) {
+ toomuch++;
+ err++;
+ } else {
+ if ( !(i->second=='S' || i->second=='P')) {
+ cerr << "ERROR: wrong symbol in reference alignment '"
+ << i->second << ' ' << int(i->second) << " no:" << pair_no<< "'\n";
+ }
+ }
+ eventsToomuch++;
+ }
+ }
+ for (map< pair<int,int>,char >::const_iterator i=reference.begin(); i
+ !=reference.end(); ++i) {
+ if (i->second=='S') {
+ unsigned int J=i->first.second+1;
+ unsigned int I=i->first.first+1;
+ if (int(J)>=int(test.size())||int(I)>int(l)||int(J)<1||int(I)<1)
+ cerr
+ << "ERROR: alignment outside of range in reference alignment"
+ << J << " " << test.size() << " (" << I << " " << l
+ << ") no:" << pair_no << '\n';
+ else {
+ if (test[J]!=I) {
+ missing++;
+ err++;
+ }
+ }
+ eventsMissing++;
+ }
+ }
+ if (Verbose)
+ cout << err << " errors in sentence\n";
+ if (eventsToomuch+eventsMissing)
+ return (toomuch+missing)/(eventsToomuch+eventsMissing);
+ else
+ return 1.0;
}
vcbList *globeTrainVcbList, *globfTrainVcbList;
-double StartTraining(int&result) {
- double errors=0.0;
- Vector<WordEntry> evlist,fvlist;
- vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
- globeTrainVcbList=&eTrainVcbList;
- globfTrainVcbList=&fTrainVcbList;
-
- // What is being done here?
- string repFilename = Prefix + ".gizacfg";
- ofstream of2(repFilename.c_str());
- writeParameters(of2, getGlobalParSet(), -1) ;
- // Write another copy of configure file
-
- cout << "reading vocabulary files \n";
- eTrainVcbList.setName(SourceVocabFilename.c_str());
- fTrainVcbList.setName(TargetVocabFilename.c_str());
- eTrainVcbList.readVocabList();
- fTrainVcbList.readVocabList();
-
- // Vocabulary can be optional ?!
-
- cout << "Source vocabulary list has " << eTrainVcbList.uniqTokens()
- << " unique tokens \n";
-
- cout << "Target vocabulary list has " << fTrainVcbList.uniqTokens()
- << " unique tokens \n";
-
-
-
- corpus = new sentenceHandler(CorpusFilename.c_str(), &eTrainVcbList, &fTrainVcbList);
- vcbList eTestVcbList(eTrainVcbList); // Copied directly
- vcbList fTestVcbList(fTrainVcbList);
- // This portion of code should not be copied to model one
- // training
- if (TestCorpusFilename == "NONE")
- TestCorpusFilename = "";
- /////////////////////////// MODULE_TEST_START //////////////////
- if (TestCorpusFilename != "") {
- cout << "Test corpus will be read from: " << TestCorpusFilename << '\n';
-
- testCorpus= new sentenceHandler(
- TestCorpusFilename.c_str(),
- &eTestVcbList, &fTestVcbList);
-
- cout << " Test total # sentence pairs : " <<(*testCorpus).getTotalNoPairs1() <<" weighted:" <<(*testCorpus).getTotalNoPairs2() <<'\n';
-
- cout << "Size of the source portion of test corpus: "
- << eTestVcbList.totalVocab() << " tokens\n";
- cout << "Size of the target portion of test corpus: "
- << fTestVcbList.totalVocab() << " tokens \n";
- cout << "In source portion of the test corpus, only "
- << eTestVcbList.uniqTokensInCorpus()
- << " unique tokens appeared\n";
- cout << "In target portion of the test corpus, only "
- << fTestVcbList.uniqTokensInCorpus()
- << " unique tokens appeared\n";
- cout << "ratio (target/source) : " << double(fTestVcbList.totalVocab()) / eTestVcbList.totalVocab() << '\n';
- }
- cout << " Train total # sentence pairs (weighted): "
- << corpus->getTotalNoPairs2() << '\n';
- cout << "Size of source portion of the training corpus: "
- << eTrainVcbList.totalVocab()-corpus->getTotalNoPairs2()
- << " tokens\n";
- cout << "Size of the target portion of the training corpus: "
- << fTrainVcbList.totalVocab() << " tokens \n";
- cout << "In source portion of the training corpus, only "
- << eTrainVcbList.uniqTokensInCorpus()
- << " unique tokens appeared\n";
- cout << "In target portion of the training corpus, only "
- << fTrainVcbList.uniqTokensInCorpus()
- << " unique tokens appeared\n";
- cout << "lambda for PP calculation in IBM-1,IBM-2,HMM:= " << double(fTrainVcbList.totalVocab()) << "/(" << eTrainVcbList.totalVocab() << "-"
- << corpus->getTotalNoPairs2() << ")=";
- LAMBDA = double(fTrainVcbList.totalVocab())
- / (eTrainVcbList.totalVocab()-corpus->getTotalNoPairs2());
- cout << "= " << LAMBDA << '\n';
- /////////////////////////// MODULE_TEST_FINISH /////////////////
- // load dictionary
- Dictionary *dictionary;
- if (useDict)
- dictionary = new Dictionary(dictionary_Filename.c_str());
- else
- dictionary = new Dictionary("");
-
- int minIter=0;
- cerr << "Dictionary Loading complete" << endl;
-
- if (CoocurrenceFile.length()==0) {
- cerr << "ERROR: NO COOCURRENCE FILE GIVEN!\n";
- abort();
- }
-
- //ifstream coocs(CoocurrenceFile.c_str());
- tmodel<COUNT, PROB> tTable(CoocurrenceFile);
- cerr << "cooc file loading completed" << endl;
-
-
- // Need to rule out some bad logic
-
- if(restart == 1 && Model1_Iterations == 0) { // Restart on model 1 but not train on model one
- cerr << "You specified to load model 1 and train model 1 (restart == 1) but you specified zero Model 1 iteration, please revise your parameters";
- exit(1);
- }
- if(restart == 2 && Model2_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load model 1 and train model 2 (restart == 2) but you specified zero Model 2 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 3 && Model2_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load model 2 and train model 2 (restart == 3) but you specified zero Model 2 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 4 && HMM_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load model 1 and train hmm (restart == 4) but you specified zero HMM iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 5 && HMM_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load model 2 and train hmm (restart == 5) but you specified zero HMM iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 6 && HMM_Iterations == 0) { // Restart on model 2 but not train on model 2
- cerr << "You specified to load HMM and train hmm (restart == 6) but you specified zero HMM iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 7 && Model3_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load HMM and train model 3 (restart == 7) but you specified zero Model 3 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 8 && Model3_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load model 2 and train model 3 (restart == 8) but you specified zero Model 3 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 9 && Model3_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load model 3 and train model 3 (restart == 9) but you specified zero Model 3 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 10 && Model4_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load model 3 and train model 4 (restart == 10) but you specified zero Model 4 iteration, please revise your parameters";
- exit(1);
- }
-
- if(restart == 11 && Model4_Iterations == 0) { // Restart on model 3 but not train on model 3
- cerr << "You specified to load model 4 and train model 4 (restart == 10) but you specified zero Model 4 iteration, please revise your parameters";
- exit(1);
- }
-
- //QIN: If restart level is larger than 0, then we need to load
- if (restart > 0){
- cerr << "We are going to load previous model " << prev_t << endl;
- if(!tTable.readProbTable(prev_t.c_str())){
- cerr << "Failed reading " << prev_t << endl;
- exit(1);
- }
- }
-
-
-
- cerr << "TTable initialization OK" << endl;
- // TModel is important!
- model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList, tTable,
- trainPerp, *corpus, &testPerp, testCorpus, trainViterbiPerp,
- &testViterbiPerp);
- cerr << "Model one initalization OK" << endl;
- amodel<PROB> aTable(false);
-
- if (restart >2 && restart != 4 ){ // 1 is model 1, 2 is model 2 init, both just need t-table, 4 is directly train HMM from model one
- // and we do not need a model
- cerr << "We are going to load previous model from " << prev_a << endl;
- if(!aTable.readTable(prev_a.c_str())){
- cerr << "Failed reading " << prev_a << endl;
- exit(1);
- }
- }
-
- amodel<COUNT> aCountTable(false);
- model2 m2(m1, aTable, aCountTable);
- WordClasses french,english;
- hmm h(m2,english,french);
-
- bool hmmvalid = false;
-
- if (restart == 6 || restart ==7){ // If we want to initialize model 3 or continue train hmm, need to read jumps
- string al = prev_hmm + ".alpha";
- string be = prev_hmm + ".beta";
- cerr << "We are going to load previous (HMM) model from " << prev_hmm <<"," << al << "," << be << endl;
- if(!h.probs.readJumps(prev_hmm.c_str(),NULL,al.c_str(),be.c_str())){
- cerr << "Failed reading" << prev_hmm <<"," << al << "," << be << endl;
- exit(1);
- }
- hmmvalid = true;
- }else if (restart > 7){
- if (prev_hmm.length() > 0){
- string al = prev_hmm + ".alpha";
- string be = prev_hmm + ".beta";
- cerr << "We are going to load previous (HMM) model from " << prev_hmm <<"," << al << "," << be << endl;
- if(!h.probs.readJumps(prev_hmm.c_str(),NULL,al.c_str(),be.c_str())){
- cerr << "Failed reading" << prev_hmm <<"," << al << "," << be << endl ;
- cerr << "Continue without hmm" << endl;
- hmmvalid = false;
- }else
- hmmvalid = true;
- }
- }
- nmodel<PROB> nTable(m2.getNoEnglishWords()+1, MAX_FERTILITY);
- amodel<PROB> dTable(true);
-
- if(restart > 8){ // 9, 10, 11 requires ntable and d table,
- cerr << "We are going to load previous N model from " << prev_n << endl;
- if(!nTable.readNTable(prev_n.c_str())){
- cerr << "Failed reading " << prev_n << endl;
- exit(1);
- }
- cerr << "We are going to load previous D model from " << prev_d << endl;
- if(!dTable.readTable(prev_d.c_str())){
- cerr << "Failed reading " << prev_d << endl;
- exit(1);
- }
-
- }
-
-
-
- model3 m3(m2, dTable, nTable);
- if(restart > 8){
- double p0,p1;
- if (P0!=-1.0||prev_p0.length()==0) {
- p0 = P0;
- p1 = 1-P0;
- }else{
- cerr << "We are going to load previous P0 Value model from " << prev_p0 << endl;
- ifstream ifs(prev_p0.c_str());
- ifs >> p0;
- p1 = 1-p0;
- }
- m3.p0 = p0;
- m3.p1 = p1;
- }
-
- // For loading d4 table, we postpone it to model 4 iterations in the line marked with #LOADM4#
-
- if (ReadTablePrefix.length() ) {
- string number = "final";
- string tfile, afilennfile, dfile, d4file, p0file, afile, nfile; //d5file
- tfile = ReadTablePrefix + ".t3." + number;
- afile = ReadTablePrefix + ".a3." + number;
- nfile = ReadTablePrefix + ".n3." + number;
- dfile = ReadTablePrefix + ".d3." + number;
- d4file = ReadTablePrefix + ".d4." + number;
- //d5file = ReadTablePrefix + ".d5." + number ;
- p0file = ReadTablePrefix + ".p0_3." + number;
- tTable.readProbTable(tfile.c_str());
- aTable.readTable(afile.c_str());
- m3.dTable.readTable(dfile.c_str());
- m3.nTable.readNTable(nfile.c_str());
- sentPair sent;
- double p0;
- ifstream p0f(p0file.c_str());
- p0f >> p0;
- d4model d4m(MAX_SENTENCE_LENGTH,*(new WordClasses()), *(new WordClasses()));
-
- //d4m.readProbTable(d4file.c_str());
- //d5model d5m(d4m);
- //d5m.makeWordClasses(m1.Elist,m1.Flist,SourceVocabFilename+".classes",TargetVocabFilename+".classes");
- //d5m.readProbTable(d5file.c_str());
- makeSetCommand("model4smoothfactor", "0.0", getGlobalParSet(), 2);
- //makeSetCommand("model5smoothfactor","0.0",getGlobalParSet(),2);
- if (corpus||testCorpus) {
- sentenceHandler *x=corpus;
- if (x==0)
- x=testCorpus;
- cout << "Text corpus exists.\n";
- x->rewind();
- while (x&&x->getNextSentence(sent)) {
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- int l=es.size()-1;
- int m=fs.size()-1;
- transpair_model4 tm4(es, fs, m1.tTable, m2.aTable, m3.dTable,
- m3.nTable, 1-p0, p0, &d4m);
- alignment al(l, m);
- cout << "I use the alignment " << sent.sentenceNo-1 << '\n';
- //convert(ReferenceAlignment[sent.sentenceNo-1],al);
- transpair_model3 tm3(es, fs, m1.tTable, m2.aTable, m3.dTable,
- m3.nTable, 1-p0, p0, 0);
- double p=tm3.prob_of_target_and_alignment_given_source(al, 1);
- cout << "Sentence " << sent.sentenceNo << " has IBM-3 prob "
- << p << '\n';
- p=tm4.prob_of_target_and_alignment_given_source(al, 3, 1);
- cout << "Sentence " << sent.sentenceNo << " has IBM-4 prob "
- << p << '\n';
- //transpair_model5 tm5(es,fs,m1.tTable,m2.aTable,m3.dTable,m3.nTable,1-p0,p0,&d5m);
- //p=tm5.prob_of_target_and_alignment_given_source(al,3,1);
- //cout << "Sentence " << sent.sentenceNo << " has IBM-5 prob " << p << '\n';
- }
- } else {
- cout << "No corpus exists.\n";
- }
- } else {
- // initialize model1
- bool seedModel1 = false;
- if (Model1_Iterations > 0 && restart < 2) {
- if (t_Filename != "NONE" && t_Filename != "") {
- seedModel1 = true;
- m1.load_table(t_Filename.c_str());
- }
-
- if(restart ==1) seedModel1 = true;
- if(Model2_Iterations == 0 && HMM_Iterations == 0 &&
- Model3_Iterations == 0 && Model4_Iterations == 0 &&
- Model5_Iterations == 0 && dumpCount){ // OK we need to output!
- minIter=m1.em_with_tricks(Model1_Iterations, seedModel1,
- *dictionary, useDict,true,
- countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
- dumpCountUsingWordString
- );
- }else{
- minIter=m1.em_with_tricks(Model1_Iterations, true,
- *dictionary, useDict);
- }
-
-
- errors=m1.errorsAL();
- }
- {
- if (Model2_Iterations > 0 && (restart < 2 || restart ==2 || restart == 3)) {
- if(restart == 2) m2.initialize_table_uniformly(*corpus);
- if(HMM_Iterations == 0 &&
- Model3_Iterations == 0 && Model4_Iterations == 0 &&
- Model5_Iterations == 0 && dumpCount){
- minIter=m2.em_with_tricks(Model2_Iterations,true,
- countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
- dumpCountUsingWordString);
- }else{
- minIter=m2.em_with_tricks(Model2_Iterations);
- }
- errors=m2.errorsAL();
- }
- //cout << tTable.getProb(2, 2) << endl;
-
-
- if (HMM_Iterations > 0 && (restart < 2 || restart == 4 || restart == 5 || restart == 6)) {
- cout << "NOTE: I am doing iterations with the HMM model!\n";
-
- h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename
- , TargetVocabClassesFilename);
- if(restart != 6) h.initialize_table_uniformly(*corpus);
-
- if(Model3_Iterations == 0 && Model4_Iterations == 0 &&
- Model5_Iterations == 0 && dumpCount){
- minIter=h.em_with_tricks(HMM_Iterations,true,
- countPrefix.length() == 0 ? NULL : countPrefix.c_str(),
- dumpCountUsingWordString, restart == 6);
- }else{
- minIter=h.em_with_tricks(HMM_Iterations,false,NULL,false,restart==6);
- }
- //multi_thread_em(HMM_Iterations, NCPUS, &h);
- errors=h.errorsAL();
- }
- if ( ((Transfer2to3 && Model2_Iterations>0)||(HMM_Iterations==0&&Model2_Iterations>0)||restart==8) && (restart!=7 && restart < 9)) {
- if (HMM_Iterations>0)
- cout << "WARNING: transfor is not needed, as results "
- "are overwritten bei transfer from HMM.\n";
- string test_alignfile = Prefix +".tst.A2to3";
- if (testCorpus)
- m2.em_loop(testPerp, *testCorpus, Transfer_Dump_Freq==1
- &&!NODUMPS, test_alignfile.c_str(),
- testViterbiPerp, true);
- if (testCorpus)
- cout << "\nTransfer: TEST CROSS-ENTROPY "
- << testPerp.cross_entropy() << " PERPLEXITY "
- << testPerp.perplexity() << "\n\n";
- if (Transfer == TRANSFER_SIMPLE)
- m3.transferSimple(*corpus, Transfer_Dump_Freq==1&&!NODUMPS,
- trainPerp, trainViterbiPerp);
- else
- m3.transfer(*corpus, Transfer_Dump_Freq==1&&!NODUMPS,
- trainPerp, trainViterbiPerp);
- errors=m3.errorsAL();
- }
- if(restart >= 7 && hmmvalid){
- h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename
- , TargetVocabClassesFilename);
- }
- if (HMM_Iterations>0 || restart == 7)
- m3.setHMM(&h);
- else if (restart > 7 && hmmvalid){
- m3.setHMM(&h);
- }
-
- if (Model3_Iterations > 0 || Model4_Iterations > 0
- || Model5_Iterations || Model6_Iterations) {
-
- if(restart == 11){ // Need to load model 4
- if (Model5_Iterations==0 && Model6_Iterations==0 && dumpCount){
- minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,prev_d4.c_str(),prev_d4_2.c_str()
- ,true,
- countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
- dumpCountUsingWordString); // #LOADM4#
- }else{
- minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,prev_d4.c_str(),prev_d4_2.c_str());
- }
- }else{
- if (Model5_Iterations==0 && Model6_Iterations==0 && dumpCount){
- minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,NULL,NULL
- ,true,
- countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
- dumpCountUsingWordString); // #LOADM4#
- }else{
- minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,NULL,NULL);
- }
- }
- /*multi_thread_m34_em(m3, NCPUS, Model3_Iterations,
- Model4_Iterations);*/
- errors=m3.errorsAL();
- }
- if (FEWDUMPS||!NODUMPS) {
- printAllTables(eTrainVcbList, eTestVcbList, fTrainVcbList,
- fTestVcbList, m1);
- }
- }
- }
- result=minIter;
- return errors;
+double StartTraining(int&result)
+{
+ double errors=0.0;
+ Vector<WordEntry> evlist,fvlist;
+ vcbList eTrainVcbList(evlist), fTrainVcbList(fvlist);
+ globeTrainVcbList=&eTrainVcbList;
+ globfTrainVcbList=&fTrainVcbList;
+
+ // What is being done here?
+ string repFilename = Prefix + ".gizacfg";
+ ofstream of2(repFilename.c_str());
+ writeParameters(of2, getGlobalParSet(), -1) ;
+ // Write another copy of configure file
+
+ cout << "reading vocabulary files \n";
+ eTrainVcbList.setName(SourceVocabFilename.c_str());
+ fTrainVcbList.setName(TargetVocabFilename.c_str());
+ eTrainVcbList.readVocabList();
+ fTrainVcbList.readVocabList();
+
+ // Vocabulary can be optional ?!
+
+ cout << "Source vocabulary list has " << eTrainVcbList.uniqTokens()
+ << " unique tokens \n";
+
+ cout << "Target vocabulary list has " << fTrainVcbList.uniqTokens()
+ << " unique tokens \n";
+
+
+
+ corpus = new sentenceHandler(CorpusFilename.c_str(), &eTrainVcbList, &fTrainVcbList);
+ vcbList eTestVcbList(eTrainVcbList); // Copied directly
+ vcbList fTestVcbList(fTrainVcbList);
+ // This portion of code should not be copied to model one
+ // training
+ if (TestCorpusFilename == "NONE")
+ TestCorpusFilename = "";
+ /////////////////////////// MODULE_TEST_START //////////////////
+ if (TestCorpusFilename != "") {
+ cout << "Test corpus will be read from: " << TestCorpusFilename << '\n';
+
+ testCorpus= new sentenceHandler(
+ TestCorpusFilename.c_str(),
+ &eTestVcbList, &fTestVcbList);
+
+ cout << " Test total # sentence pairs : " <<(*testCorpus).getTotalNoPairs1() <<" weighted:" <<(*testCorpus).getTotalNoPairs2() <<'\n';
+
+ cout << "Size of the source portion of test corpus: "
+ << eTestVcbList.totalVocab() << " tokens\n";
+ cout << "Size of the target portion of test corpus: "
+ << fTestVcbList.totalVocab() << " tokens \n";
+ cout << "In source portion of the test corpus, only "
+ << eTestVcbList.uniqTokensInCorpus()
+ << " unique tokens appeared\n";
+ cout << "In target portion of the test corpus, only "
+ << fTestVcbList.uniqTokensInCorpus()
+ << " unique tokens appeared\n";
+ cout << "ratio (target/source) : " << double(fTestVcbList.totalVocab()) / eTestVcbList.totalVocab() << '\n';
+ }
+ cout << " Train total # sentence pairs (weighted): "
+ << corpus->getTotalNoPairs2() << '\n';
+ cout << "Size of source portion of the training corpus: "
+ << eTrainVcbList.totalVocab()-corpus->getTotalNoPairs2()
+ << " tokens\n";
+ cout << "Size of the target portion of the training corpus: "
+ << fTrainVcbList.totalVocab() << " tokens \n";
+ cout << "In source portion of the training corpus, only "
+ << eTrainVcbList.uniqTokensInCorpus()
+ << " unique tokens appeared\n";
+ cout << "In target portion of the training corpus, only "
+ << fTrainVcbList.uniqTokensInCorpus()
+ << " unique tokens appeared\n";
+ cout << "lambda for PP calculation in IBM-1,IBM-2,HMM:= " << double(fTrainVcbList.totalVocab()) << "/(" << eTrainVcbList.totalVocab() << "-"
+ << corpus->getTotalNoPairs2() << ")=";
+ LAMBDA = double(fTrainVcbList.totalVocab())
+ / (eTrainVcbList.totalVocab()-corpus->getTotalNoPairs2());
+ cout << "= " << LAMBDA << '\n';
+ /////////////////////////// MODULE_TEST_FINISH /////////////////
+ // load dictionary
+ Dictionary *dictionary;
+ if (useDict)
+ dictionary = new Dictionary(dictionary_Filename.c_str());
+ else
+ dictionary = new Dictionary("");
+
+ int minIter=0;
+ cerr << "Dictionary Loading complete" << endl;
+
+ if (CoocurrenceFile.length()==0) {
+ cerr << "ERROR: NO COOCURRENCE FILE GIVEN!\n";
+ abort();
+ }
+
+ //ifstream coocs(CoocurrenceFile.c_str());
+ tmodel<COUNT, PROB> tTable(CoocurrenceFile);
+ cerr << "cooc file loading completed" << endl;
+
+
+ // Need to rule out some bad logic
+
+ if(restart == 1 && Model1_Iterations == 0) { // Restart on model 1 but not train on model one
+ cerr << "You specified to load model 1 and train model 1 (restart == 1) but you specified zero Model 1 iteration, please revise your parameters";
+ exit(1);
+ }
+ if(restart == 2 && Model2_Iterations == 0) { // Restart on model 2 but not train on model 2
+ cerr << "You specified to load model 1 and train model 2 (restart == 2) but you specified zero Model 2 iteration, please revise your parameters";
+ exit(1);
+ }
+
+ if(restart == 3 && Model2_Iterations == 0) { // Restart on model 2 but not train on model 2
+ cerr << "You specified to load model 2 and train model 2 (restart == 3) but you specified zero Model 2 iteration, please revise your parameters";
+ exit(1);
+ }
+
+ if(restart == 4 && HMM_Iterations == 0) { // Restart on model 2 but not train on model 2
+ cerr << "You specified to load model 1 and train hmm (restart == 4) but you specified zero HMM iteration, please revise your parameters";
+ exit(1);
+ }
+
+ if(restart == 5 && HMM_Iterations == 0) { // Restart on model 2 but not train on model 2
+ cerr << "You specified to load model 2 and train hmm (restart == 5) but you specified zero HMM iteration, please revise your parameters";
+ exit(1);
+ }
+
+ if(restart == 6 && HMM_Iterations == 0) { // Restart on model 2 but not train on model 2
+ cerr << "You specified to load HMM and train hmm (restart == 6) but you specified zero HMM iteration, please revise your parameters";
+ exit(1);
+ }
+
+ if(restart == 7 && Model3_Iterations == 0) { // Restart on model 3 but not train on model 3
+ cerr << "You specified to load HMM and train model 3 (restart == 7) but you specified zero Model 3 iteration, please revise your parameters";
+ exit(1);
+ }
+
+ if(restart == 8 && Model3_Iterations == 0) { // Restart on model 3 but not train on model 3
+ cerr << "You specified to load model 2 and train model 3 (restart == 8) but you specified zero Model 3 iteration, please revise your parameters";
+ exit(1);
+ }
+
+ if(restart == 9 && Model3_Iterations == 0) { // Restart on model 3 but not train on model 3
+ cerr << "You specified to load model 3 and train model 3 (restart == 9) but you specified zero Model 3 iteration, please revise your parameters";
+ exit(1);
+ }
+
+ if(restart == 10 && Model4_Iterations == 0) { // Restart on model 3 but not train on model 3
+ cerr << "You specified to load model 3 and train model 4 (restart == 10) but you specified zero Model 4 iteration, please revise your parameters";
+ exit(1);
+ }
+
+ if(restart == 11 && Model4_Iterations == 0) { // Restart on model 3 but not train on model 3
+ cerr << "You specified to load model 4 and train model 4 (restart == 10) but you specified zero Model 4 iteration, please revise your parameters";
+ exit(1);
+ }
+
+ //QIN: If restart level is larger than 0, then we need to load
+ if (restart > 0) {
+ cerr << "We are going to load previous model " << prev_t << endl;
+ if(!tTable.readProbTable(prev_t.c_str())) {
+ cerr << "Failed reading " << prev_t << endl;
+ exit(1);
+ }
+ }
+
+
+
+ cerr << "TTable initialization OK" << endl;
+ // TModel is important!
+ model1 m1(CorpusFilename.c_str(), eTrainVcbList, fTrainVcbList, tTable,
+ trainPerp, *corpus, &testPerp, testCorpus, trainViterbiPerp,
+ &testViterbiPerp);
+ cerr << "Model one initalization OK" << endl;
+ amodel<PROB> aTable(false);
+
+ if (restart >2 && restart != 4 ) { // 1 is model 1, 2 is model 2 init, both just need t-table, 4 is directly train HMM from model one
+ // and we do not need a model
+ cerr << "We are going to load previous model from " << prev_a << endl;
+ if(!aTable.readTable(prev_a.c_str())) {
+ cerr << "Failed reading " << prev_a << endl;
+ exit(1);
+ }
+ }
+
+ amodel<COUNT> aCountTable(false);
+ model2 m2(m1, aTable, aCountTable);
+ WordClasses french,english;
+ hmm h(m2,english,french);
+
+ bool hmmvalid = false;
+
+ if (restart == 6 || restart ==7) { // If we want to initialize model 3 or continue train hmm, need to read jumps
+ string al = prev_hmm + ".alpha";
+ string be = prev_hmm + ".beta";
+ cerr << "We are going to load previous (HMM) model from " << prev_hmm <<"," << al << "," << be << endl;
+ if(!h.probs.readJumps(prev_hmm.c_str(),NULL,al.c_str(),be.c_str())) {
+ cerr << "Failed reading" << prev_hmm <<"," << al << "," << be << endl;
+ exit(1);
+ }
+ hmmvalid = true;
+ } else if (restart > 7) {
+ if (prev_hmm.length() > 0) {
+ string al = prev_hmm + ".alpha";
+ string be = prev_hmm + ".beta";
+ cerr << "We are going to load previous (HMM) model from " << prev_hmm <<"," << al << "," << be << endl;
+ if(!h.probs.readJumps(prev_hmm.c_str(),NULL,al.c_str(),be.c_str())) {
+ cerr << "Failed reading" << prev_hmm <<"," << al << "," << be << endl ;
+ cerr << "Continue without hmm" << endl;
+ hmmvalid = false;
+ } else
+ hmmvalid = true;
+ }
+ }
+ nmodel<PROB> nTable(m2.getNoEnglishWords()+1, MAX_FERTILITY);
+ amodel<PROB> dTable(true);
+
+ if(restart > 8) { // 9, 10, 11 requires ntable and d table,
+ cerr << "We are going to load previous N model from " << prev_n << endl;
+ if(!nTable.readNTable(prev_n.c_str())) {
+ cerr << "Failed reading " << prev_n << endl;
+ exit(1);
+ }
+ cerr << "We are going to load previous D model from " << prev_d << endl;
+ if(!dTable.readTable(prev_d.c_str())) {
+ cerr << "Failed reading " << prev_d << endl;
+ exit(1);
+ }
+
+ }
+
+
+
+ model3 m3(m2, dTable, nTable);
+ if(restart > 8) {
+ double p0,p1;
+ if (P0!=-1.0||prev_p0.length()==0) {
+ p0 = P0;
+ p1 = 1-P0;
+ } else {
+ cerr << "We are going to load previous P0 Value model from " << prev_p0 << endl;
+ ifstream ifs(prev_p0.c_str());
+ ifs >> p0;
+ p1 = 1-p0;
+ }
+ m3.p0 = p0;
+ m3.p1 = p1;
+ }
+
+ // For loading d4 table, we postpone it to model 4 iterations in the line marked with #LOADM4#
+
+ if (ReadTablePrefix.length() ) {
+ string number = "final";
+ string tfile, afilennfile, dfile, d4file, p0file, afile, nfile; //d5file
+ tfile = ReadTablePrefix + ".t3." + number;
+ afile = ReadTablePrefix + ".a3." + number;
+ nfile = ReadTablePrefix + ".n3." + number;
+ dfile = ReadTablePrefix + ".d3." + number;
+ d4file = ReadTablePrefix + ".d4." + number;
+ //d5file = ReadTablePrefix + ".d5." + number ;
+ p0file = ReadTablePrefix + ".p0_3." + number;
+ tTable.readProbTable(tfile.c_str());
+ aTable.readTable(afile.c_str());
+ m3.dTable.readTable(dfile.c_str());
+ m3.nTable.readNTable(nfile.c_str());
+ sentPair sent;
+ double p0;
+ ifstream p0f(p0file.c_str());
+ p0f >> p0;
+ d4model d4m(MAX_SENTENCE_LENGTH,*(new WordClasses()), *(new WordClasses()));
+
+ //d4m.readProbTable(d4file.c_str());
+ //d5model d5m(d4m);
+ //d5m.makeWordClasses(m1.Elist,m1.Flist,SourceVocabFilename+".classes",TargetVocabFilename+".classes");
+ //d5m.readProbTable(d5file.c_str());
+ makeSetCommand("model4smoothfactor", "0.0", getGlobalParSet(), 2);
+ //makeSetCommand("model5smoothfactor","0.0",getGlobalParSet(),2);
+ if (corpus||testCorpus) {
+ sentenceHandler *x=corpus;
+ if (x==0)
+ x=testCorpus;
+ cout << "Text corpus exists.\n";
+ x->rewind();
+ while (x&&x->getNextSentence(sent)) {
+ Vector<WordIndex>& es = sent.eSent;
+ Vector<WordIndex>& fs = sent.fSent;
+ int l=es.size()-1;
+ int m=fs.size()-1;
+ transpair_model4 tm4(es, fs, m1.tTable, m2.aTable, m3.dTable,
+ m3.nTable, 1-p0, p0, &d4m);
+ alignment al(l, m);
+ cout << "I use the alignment " << sent.sentenceNo-1 << '\n';
+ //convert(ReferenceAlignment[sent.sentenceNo-1],al);
+ transpair_model3 tm3(es, fs, m1.tTable, m2.aTable, m3.dTable,
+ m3.nTable, 1-p0, p0, 0);
+ double p=tm3.prob_of_target_and_alignment_given_source(al, 1);
+ cout << "Sentence " << sent.sentenceNo << " has IBM-3 prob "
+ << p << '\n';
+ p=tm4.prob_of_target_and_alignment_given_source(al, 3, 1);
+ cout << "Sentence " << sent.sentenceNo << " has IBM-4 prob "
+ << p << '\n';
+ //transpair_model5 tm5(es,fs,m1.tTable,m2.aTable,m3.dTable,m3.nTable,1-p0,p0,&d5m);
+ //p=tm5.prob_of_target_and_alignment_given_source(al,3,1);
+ //cout << "Sentence " << sent.sentenceNo << " has IBM-5 prob " << p << '\n';
+ }
+ } else {
+ cout << "No corpus exists.\n";
+ }
+ } else {
+ // initialize model1
+ bool seedModel1 = false;
+ if (Model1_Iterations > 0 && restart < 2) {
+ if (t_Filename != "NONE" && t_Filename != "") {
+ seedModel1 = true;
+ m1.load_table(t_Filename.c_str());
+ }
+
+ if(restart ==1) seedModel1 = true;
+ if(Model2_Iterations == 0 && HMM_Iterations == 0 &&
+ Model3_Iterations == 0 && Model4_Iterations == 0 &&
+ Model5_Iterations == 0 && dumpCount) { // OK we need to output!
+ minIter=m1.em_with_tricks(Model1_Iterations, seedModel1,
+ *dictionary, useDict,true,
+ countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
+ dumpCountUsingWordString
+ );
+ } else {
+ minIter=m1.em_with_tricks(Model1_Iterations, true,
+ *dictionary, useDict);
+ }
+
+
+ errors=m1.errorsAL();
+ }
+ {
+ if (Model2_Iterations > 0 && (restart < 2 || restart ==2 || restart == 3)) {
+ if(restart == 2) m2.initialize_table_uniformly(*corpus);
+ if(HMM_Iterations == 0 &&
+ Model3_Iterations == 0 && Model4_Iterations == 0 &&
+ Model5_Iterations == 0 && dumpCount) {
+ minIter=m2.em_with_tricks(Model2_Iterations,true,
+ countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
+ dumpCountUsingWordString);
+ } else {
+ minIter=m2.em_with_tricks(Model2_Iterations);
+ }
+ errors=m2.errorsAL();
+ }
+ //cout << tTable.getProb(2, 2) << endl;
+
+
+ if (HMM_Iterations > 0 && (restart < 2 || restart == 4 || restart == 5 || restart == 6)) {
+ cout << "NOTE: I am doing iterations with the HMM model!\n";
+
+ h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename
+ , TargetVocabClassesFilename);
+ if(restart != 6) h.initialize_table_uniformly(*corpus);
+
+ if(Model3_Iterations == 0 && Model4_Iterations == 0 &&
+ Model5_Iterations == 0 && dumpCount) {
+ minIter=h.em_with_tricks(HMM_Iterations,true,
+ countPrefix.length() == 0 ? NULL : countPrefix.c_str(),
+ dumpCountUsingWordString, restart == 6);
+ } else {
+ minIter=h.em_with_tricks(HMM_Iterations,false,NULL,false,restart==6);
+ }
+ //multi_thread_em(HMM_Iterations, NCPUS, &h);
+ errors=h.errorsAL();
+ }
+ if ( ((Transfer2to3 && Model2_Iterations>0)||(HMM_Iterations==0&&Model2_Iterations>0)||restart==8) && (restart!=7 && restart < 9)) {
+ if (HMM_Iterations>0)
+ cout << "WARNING: transfor is not needed, as results "
+ "are overwritten bei transfer from HMM.\n";
+ string test_alignfile = Prefix +".tst.A2to3";
+ if (testCorpus)
+ m2.em_loop(testPerp, *testCorpus, Transfer_Dump_Freq==1
+ &&!NODUMPS, test_alignfile.c_str(),
+ testViterbiPerp, true);
+ if (testCorpus)
+ cout << "\nTransfer: TEST CROSS-ENTROPY "
+ << testPerp.cross_entropy() << " PERPLEXITY "
+ << testPerp.perplexity() << "\n\n";
+ if (Transfer == TRANSFER_SIMPLE)
+ m3.transferSimple(*corpus, Transfer_Dump_Freq==1&&!NODUMPS,
+ trainPerp, trainViterbiPerp);
+ else
+ m3.transfer(*corpus, Transfer_Dump_Freq==1&&!NODUMPS,
+ trainPerp, trainViterbiPerp);
+ errors=m3.errorsAL();
+ }
+ if(restart >= 7 && hmmvalid) {
+ h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename
+ , TargetVocabClassesFilename);
+ }
+ if (HMM_Iterations>0 || restart == 7)
+ m3.setHMM(&h);
+ else if (restart > 7 && hmmvalid) {
+ m3.setHMM(&h);
+ }
+
+ if (Model3_Iterations > 0 || Model4_Iterations > 0
+ || Model5_Iterations || Model6_Iterations) {
+
+ if(restart == 11) { // Need to load model 4
+ if (Model5_Iterations==0 && Model6_Iterations==0 && dumpCount) {
+ minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,prev_d4.c_str(),prev_d4_2.c_str()
+ ,true,
+ countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
+ dumpCountUsingWordString); // #LOADM4#
+ } else {
+ minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,prev_d4.c_str(),prev_d4_2.c_str());
+ }
+ } else {
+ if (Model5_Iterations==0 && Model6_Iterations==0 && dumpCount) {
+ minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,NULL,NULL
+ ,true,
+ countPrefix.length() == 0 ? "./" : countPrefix.c_str(),
+ dumpCountUsingWordString); // #LOADM4#
+ } else {
+ minIter=m3.viterbi(Model3_Iterations,Model4_Iterations,Model5_Iterations,Model6_Iterations,NULL,NULL);
+ }
+ }
+ /*multi_thread_m34_em(m3, NCPUS, Model3_Iterations,
+ Model4_Iterations);*/
+ errors=m3.errorsAL();
+ }
+ if (FEWDUMPS||!NODUMPS) {
+ printAllTables(eTrainVcbList, eTestVcbList, fTrainVcbList,
+ fTestVcbList, m1);
+ }
+ }
+ }
+ result=minIter;
+ return errors;
}
/*!
Starts here
*/
-int main(int argc, char* argv[]) {
- ////////////////////////////////////////////////////////
- // Setup parameters
- ///////////////////////////////////////////////////////
- cerr << "Starting MGIZA " << endl;
- getGlobalParSet().insert(new Parameter<string>(
- "CoocurrenceFile",
- ParameterChangedFlag,
- "",
- CoocurrenceFile,
- PARLEV_SPECIAL));
- getGlobalParSet().insert(new Parameter<string>(
- "ReadTablePrefix",
- ParameterChangedFlag,
- "optimized",
- ReadTablePrefix,-1));
-
- getGlobalParSet().insert(new Parameter<string>("S",
- ParameterChangedFlag,
- "source vocabulary file name",
- SourceVocabFilename,
- PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "SOURCE VOCABULARY FILE",
- ParameterChangedFlag,
- "source vocabulary file name",
- SourceVocabFilename,-1));
-
- getGlobalParSet().insert(new Parameter<string>("T",
- ParameterChangedFlag,
- "target vocabulary file name",
- TargetVocabFilename,
- PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "TARGET VOCABULARY FILE",
- ParameterChangedFlag,
- "target vocabulary file name",
- TargetVocabFilename,-1));
- getGlobalParSet().insert(new Parameter<string>(
- "Source Vocabulary Classes",
- ParameterChangedFlag,
- "source vocabulary classes file name",
- SourceVocabClassesFilename,
- PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "Target Vocabulary Classes",
- ParameterChangedFlag,
- "target vocabulary classes file name",
- TargetVocabClassesFilename,
- PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "C",
- ParameterChangedFlag,
- "training corpus file name",
- CorpusFilename,PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "CORPUS FILE",
- ParameterChangedFlag,
- "training corpus file name",
- CorpusFilename,-1));
- getGlobalParSet().insert(new Parameter<string>("TC",
- ParameterChangedFlag,
- "test corpus file name",
- TestCorpusFilename,
- PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "TEST CORPUS FILE",
- ParameterChangedFlag,
- "test corpus file name",
- TestCorpusFilename,-1));
-
- getGlobalParSet().insert(new Parameter<string>("d",
- ParameterChangedFlag,
- "dictionary file name",
- dictionary_Filename,
- PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "DICTIONARY",
- ParameterChangedFlag,
- "dictionary file name",
- dictionary_Filename,-1));
-
- getGlobalParSet().insert(new Parameter<string>("l",
- ParameterChangedFlag,
- "log file name",
- LogFilename,PARLEV_OUTPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "LOG FILE",
- ParameterChangedFlag,
- "log file name",
- LogFilename,-1));
-
- getGlobalParSet().insert(new Parameter<string>("o",
- ParameterChangedFlag,
- "output file prefix",
- Prefix,PARLEV_OUTPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "OUTPUT FILE PREFIX",
- ParameterChangedFlag,
- "output file prefix",Prefix,-1));
-
- getGlobalParSet().insert(new Parameter<string>(
- "OUTPUT PATH",
- ParameterChangedFlag,
- "output path",
- OPath,PARLEV_OUTPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous T",
- ParameterChangedFlag,
- "The t-table of previous step",
- prev_t,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous A",
- ParameterChangedFlag,
- "The a-table of previous step",
- prev_a,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous D",
- ParameterChangedFlag,
- "The d-table of previous step",
- prev_d,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous N",
- ParameterChangedFlag,
- "The n-table of previous step",
- prev_n,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous D4",
- ParameterChangedFlag,
- "The d4-table of previous step",
- prev_d4,PARLEV_INPUT));
- getGlobalParSet().insert(new Parameter<string>(
- "Previous D42",
- ParameterChangedFlag,
- "The d4-table (2) of previous step",
- prev_d4_2,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous P0",
- ParameterChangedFlag,
- "The P0 previous step",
- prev_p0,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Previous HMM",
- ParameterChangedFlag,
- "The hmm-table of previous step",
- prev_hmm,PARLEV_INPUT));
-
- getGlobalParSet().insert(new Parameter<string>(
- "Count Output Prefix",
- ParameterChangedFlag,
- "The prefix for output counts",
- countPrefix,PARLEV_OUTPUT));
- // Timers
- time_t st1, fn;
- st1 = time(NULL); // starting time
-
- // Program Name
-
- string temp(argv[0]);
- Usage = temp + " <config_file> [options]\n";
-
- // At least, config file should be provided.
- if (argc < 2) {
- printHelp();
- exit(1);
- }
- cerr << "Initializing Global Paras " << endl;
- //
- initGlobals() ;
-
- cerr << "Parsing Arguments " << endl;
- //
- parseArguments(argc, argv);
-
- if (SourceVocabClassesFilename=="") {
- makeSetCommand("sourcevocabularyclasses",SourceVocabFilename+".classes",getGlobalParSet(),2);
- }
-
- if (TargetVocabClassesFilename=="") {
- makeSetCommand("targetvocabularyclasses",TargetVocabFilename+".classes",getGlobalParSet(),2);
- }
-
- // Determine number of threads
-
- if(NCPUS == 0){
- cerr << "Trying to detect number of CPUS...";
- NCPUS = boost::thread::hardware_concurrency();
- if(NCPUS==0){
- cerr << "failed, default to 2 threads" << std::endl;
- NCPUS = 2;
- }
- else{
- cerr << NCPUS << std::endl;
- }
+int main(int argc, char* argv[])
+{
+ ////////////////////////////////////////////////////////
+ // Setup parameters
+ ///////////////////////////////////////////////////////
+ cerr << "Starting MGIZA " << endl;
+ getGlobalParSet().insert(new Parameter<string>(
+ "CoocurrenceFile",
+ ParameterChangedFlag,
+ "",
+ CoocurrenceFile,
+ PARLEV_SPECIAL));
+ getGlobalParSet().insert(new Parameter<string>(
+ "ReadTablePrefix",
+ ParameterChangedFlag,
+ "optimized",
+ ReadTablePrefix,-1));
+
+ getGlobalParSet().insert(new Parameter<string>("S",
+ ParameterChangedFlag,
+ "source vocabulary file name",
+ SourceVocabFilename,
+ PARLEV_INPUT));
+ getGlobalParSet().insert(new Parameter<string>(
+ "SOURCE VOCABULARY FILE",
+ ParameterChangedFlag,
+ "source vocabulary file name",
+ SourceVocabFilename,-1));
+
+ getGlobalParSet().insert(new Parameter<string>("T",
+ ParameterChangedFlag,
+ "target vocabulary file name",
+ TargetVocabFilename,
+ PARLEV_INPUT));
+ getGlobalParSet().insert(new Parameter<string>(
+ "TARGET VOCABULARY FILE",
+ ParameterChangedFlag,
+ "target vocabulary file name",
+ TargetVocabFilename,-1));
+ getGlobalParSet().insert(new Parameter<string>(
+ "Source Vocabulary Classes",
+ ParameterChangedFlag,
+ "source vocabulary classes file name",
+ SourceVocabClassesFilename,
+ PARLEV_INPUT));
+ getGlobalParSet().insert(new Parameter<string>(
+ "Target Vocabulary Classes",
+ ParameterChangedFlag,
+ "target vocabulary classes file name",
+ TargetVocabClassesFilename,
+ PARLEV_INPUT));
+ getGlobalParSet().insert(new Parameter<string>(
+ "C",
+ ParameterChangedFlag,
+ "training corpus file name",
+ CorpusFilename,PARLEV_INPUT));
+ getGlobalParSet().insert(new Parameter<string>(
+ "CORPUS FILE",
+ ParameterChangedFlag,
+ "training corpus file name",
+ CorpusFilename,-1));
+ getGlobalParSet().insert(new Parameter<string>("TC",
+ ParameterChangedFlag,
+ "test corpus file name",
+ TestCorpusFilename,
+ PARLEV_INPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "TEST CORPUS FILE",
+ ParameterChangedFlag,
+ "test corpus file name",
+ TestCorpusFilename,-1));
+
+ getGlobalParSet().insert(new Parameter<string>("d",
+ ParameterChangedFlag,
+ "dictionary file name",
+ dictionary_Filename,
+ PARLEV_INPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "DICTIONARY",
+ ParameterChangedFlag,
+ "dictionary file name",
+ dictionary_Filename,-1));
+
+ getGlobalParSet().insert(new Parameter<string>("l",
+ ParameterChangedFlag,
+ "log file name",
+ LogFilename,PARLEV_OUTPUT));
+ getGlobalParSet().insert(new Parameter<string>(
+ "LOG FILE",
+ ParameterChangedFlag,
+ "log file name",
+ LogFilename,-1));
+
+ getGlobalParSet().insert(new Parameter<string>("o",
+ ParameterChangedFlag,
+ "output file prefix",
+ Prefix,PARLEV_OUTPUT));
+ getGlobalParSet().insert(new Parameter<string>(
+ "OUTPUT FILE PREFIX",
+ ParameterChangedFlag,
+ "output file prefix",Prefix,-1));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "OUTPUT PATH",
+ ParameterChangedFlag,
+ "output path",
+ OPath,PARLEV_OUTPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "Previous T",
+ ParameterChangedFlag,
+ "The t-table of previous step",
+ prev_t,PARLEV_INPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "Previous A",
+ ParameterChangedFlag,
+ "The a-table of previous step",
+ prev_a,PARLEV_INPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "Previous D",
+ ParameterChangedFlag,
+ "The d-table of previous step",
+ prev_d,PARLEV_INPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "Previous N",
+ ParameterChangedFlag,
+ "The n-table of previous step",
+ prev_n,PARLEV_INPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "Previous D4",
+ ParameterChangedFlag,
+ "The d4-table of previous step",
+ prev_d4,PARLEV_INPUT));
+ getGlobalParSet().insert(new Parameter<string>(
+ "Previous D42",
+ ParameterChangedFlag,
+ "The d4-table (2) of previous step",
+ prev_d4_2,PARLEV_INPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "Previous P0",
+ ParameterChangedFlag,
+ "The P0 previous step",
+ prev_p0,PARLEV_INPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "Previous HMM",
+ ParameterChangedFlag,
+ "The hmm-table of previous step",
+ prev_hmm,PARLEV_INPUT));
+
+ getGlobalParSet().insert(new Parameter<string>(
+ "Count Output Prefix",
+ ParameterChangedFlag,
+ "The prefix for output counts",
+ countPrefix,PARLEV_OUTPUT));
+ // Timers
+ time_t st1, fn;
+ st1 = time(NULL); // starting time
+
+ // Program Name
+
+ string temp(argv[0]);
+ Usage = temp + " <config_file> [options]\n";
+
+ // At least, config file should be provided.
+ if (argc < 2) {
+ printHelp();
+ exit(1);
+ }
+ cerr << "Initializing Global Paras " << endl;
+ //
+ initGlobals() ;
+
+ cerr << "Parsing Arguments " << endl;
+ //
+ parseArguments(argc, argv);
+
+ if (SourceVocabClassesFilename=="") {
+ makeSetCommand("sourcevocabularyclasses",SourceVocabFilename+".classes",getGlobalParSet(),2);
+ }
+
+ if (TargetVocabClassesFilename=="") {
+ makeSetCommand("targetvocabularyclasses",TargetVocabFilename+".classes",getGlobalParSet(),2);
+ }
+
+ // Determine number of threads
+
+ if(NCPUS == 0) {
+ cerr << "Trying to detect number of CPUS...";
+ NCPUS = boost::thread::hardware_concurrency();
+ if(NCPUS==0) {
+ cerr << "failed, default to 2 threads" << std::endl;
+ NCPUS = 2;
+ } else {
+ cerr << NCPUS << std::endl;
}
-
- cerr << "Opening Log File " << endl;
- if (Log) {
- logmsg.open(LogFilename.c_str(), ios::out);
- }
+ }
+
+ cerr << "Opening Log File " << endl;
+ if (Log) {
+ logmsg.open(LogFilename.c_str(), ios::out);
+ }
- cerr << "Printing parameters " << endl;
+ cerr << "Printing parameters " << endl;
- printGIZAPars(cout);
+ printGIZAPars(cout);
- int a=-1;
+ int a=-1;
- if (OldADBACKOFF!=0)
- cerr
- << "WARNING: Parameter -adBackOff does not exist further; use CompactADTable instead.\n";
+ if (OldADBACKOFF!=0)
+ cerr
+ << "WARNING: Parameter -adBackOff does not exist further; use CompactADTable instead.\n";
- if (MAX_SENTENCE_LENGTH > MAX_SENTENCE_LENGTH_ALLOWED)
- cerr << "ERROR: MAX_SENTENCE_LENGTH is too big " << MAX_SENTENCE_LENGTH
- << " > " << MAX_SENTENCE_LENGTH_ALLOWED << '\n';
+ if (MAX_SENTENCE_LENGTH > MAX_SENTENCE_LENGTH_ALLOWED)
+ cerr << "ERROR: MAX_SENTENCE_LENGTH is too big " << MAX_SENTENCE_LENGTH
+ << " > " << MAX_SENTENCE_LENGTH_ALLOWED << '\n';
- // Actually word is done here
- StartTraining(a);
+ // Actually word is done here
+ StartTraining(a);
- fn = time(NULL); // finish time
+ fn = time(NULL); // finish time
- cout << '\n' << "Entire Training took: " << difftime(fn, st1)
- << " seconds\n";
- cout << "Program Finished at: "<< my_ctime(&fn) << '\n';
- cout << "==========================================================\n";
- return 0;
+ cout << '\n' << "Entire Training took: " << difftime(fn, st1)
+ << " seconds\n";
+ cout << "Program Finished at: "<< my_ctime(&fn) << '\n';
+ cout << "==========================================================\n";
+ return 0;
}
diff --git a/mgizapp/src/mkcls/Array.h b/mgizapp/src/mkcls/Array.h
index 5647fd0..c444993 100644
--- a/mgizapp/src/mkcls/Array.h
+++ b/mgizapp/src/mkcls/Array.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -38,8 +38,8 @@ using namespace std;
template<class T> class Array
{
- private:
- T *p;
+private:
+ T *p;
int realSize;
int maxWritten;
char a;
@@ -47,247 +47,229 @@ template<class T> class Array
void copy(T *a,const T *b,int n);
void copy(T *a,T *b,int n);
void _expand();
-
- public:
- Array()
- : p(0),realSize(0),maxWritten(-1) ,a(1)
- {
+
+public:
+ Array()
+ : p(0),realSize(0),maxWritten(-1) ,a(1) {
#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY: " << this<<" "<<(void*)p << endl;
+ cout << "MAKE ARRAY: " << this<<" "<<(void*)p << endl;
#endif
- }
+ }
Array(const Array<T> &x)
- : p(new T[x.maxWritten+1]),realSize(x.maxWritten+1),maxWritten(x.maxWritten),a(x.a)
- {
- copy(p,x.p,realSize);
+ : p(new T[x.maxWritten+1]),realSize(x.maxWritten+1),maxWritten(x.maxWritten),a(x.a) {
+ copy(p,x.p,realSize);
#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< endl;
+ cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< endl;
#endif
- }
+ }
explicit Array(int n)
- : p(new T[n]),realSize(n),maxWritten(n-1),a(0)
- {
+ : p(new T[n]),realSize(n),maxWritten(n-1),a(0) {
#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
+ cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << endl;
+#endif
+ }
Array(int n,const T&_init,int _a=0)
- : p(new T[n]),realSize(n),maxWritten(n-1),a(_a)
- {
- for(int iii=0;iii<n;iii++)p[iii]=_init;
+ : p(new T[n]),realSize(n),maxWritten(n-1),a(_a) {
+ for(int iii=0; iii<n; iii++)p[iii]=_init;
#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
-
- ~Array()
- {
+ cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << endl;
+#endif
+ }
+
+ ~Array() {
#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- delete [] p;
- }
-
- Array<T>& operator=(const Array<T>&x)
- {
- if( this!= &x )
- {
+ cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << endl;
+#endif
+ delete [] p;
+ }
+
+ Array<T>& operator=(const Array<T>&x) {
+ if( this!= &x ) {
#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
-
- delete [] p;
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- a = x.a;
- p = new T[realSize];
- copy(p,x.p,realSize);
+ cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
+#endif
+
+ delete [] p;
+ realSize = x.maxWritten+1;
+ maxWritten = x.maxWritten;
+ a = x.a;
+ p = new T[realSize];
+ copy(p,x.p,realSize);
#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- return *this;
+ cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
+#endif
}
-
- Array<T>& operator=(Array<T>&x)
- {
- if( this!= &x )
- {
+ return *this;
+ }
+
+ Array<T>& operator=(Array<T>&x) {
+ if( this!= &x ) {
#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- delete [] p;
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- a = x.a;
- p = new T[realSize];
- copy(p,x.p,realSize);
+ cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
+#endif
+ delete [] p;
+ realSize = x.maxWritten+1;
+ maxWritten = x.maxWritten;
+ a = x.a;
+ p = new T[realSize];
+ copy(p,x.p,realSize);
#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- return *this;
- }
-
- void allowAccess(int n)
- {
- while( realSize<=n )
- _expand();
- maxWritten=max(maxWritten,n);
- massert( maxWritten<realSize );
- }
- void resize(int n)
- {
- while( realSize<n )
- _expand();
- maxWritten=n-1;
- }
- void sort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until);
- }
- void invsort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until,greater<T>());
+ cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
+#endif
}
- void init(int n,const T&_init,bool _a=0)
- {
+ return *this;
+ }
+
+ void allowAccess(int n) {
+ while( realSize<=n )
+ _expand();
+ maxWritten=max(maxWritten,n);
+ massert( maxWritten<realSize );
+ }
+ void resize(int n) {
+ while( realSize<n )
+ _expand();
+ maxWritten=n-1;
+ }
+ void sort(int until=-1) {
+ if( until== -1 ) until=size();
+ std::sort(p,p+until);
+ }
+ void invsort(int until=-1) {
+ if( until== -1 ) until=size();
+ std::sort(p,p+until,greater<T>());
+ }
+ void init(int n,const T&_init,bool _a=0) {
#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- delete []p;
- p=new T[n];
- realSize=n;
- a=_a;
- maxWritten=n-1;
- for(int iii=0;iii<n;iii++)p[iii]=_init;
+ cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
+#endif
+ delete []p;
+ p=new T[n];
+ realSize=n;
+ a=_a;
+ maxWritten=n-1;
+ for(int iii=0; iii<n; iii++)p[iii]=_init;
#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- inline int size() const
- {massert( maxWritten<realSize );
- return maxWritten+1;}
- inline int low() const
- { return 0; }
- inline int high() const
- { return maxWritten; }
- inline bool autoexpand() const
- {return a;}
- inline void autoexpand(bool autoExp)
- {a=autoExp;}
+ cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
+#endif
+ }
+ inline int size() const {
+ massert( maxWritten<realSize );
+ return maxWritten+1;
+ }
+ inline int low() const {
+ return 0;
+ }
+ inline int high() const {
+ return maxWritten;
+ }
+ inline bool autoexpand() const {
+ return a;
+ }
+ inline void autoexpand(bool autoExp) {
+ a=autoExp;
+ }
int findMax() const;
int findMin() const;
const void errorAccess(int n) const;
- inline T*getPointerToData(){return p;}
-
- inline T& operator[](int n)
- {
- if( a && n==maxWritten+1 )
- allowAccess(n);
- if( n<0 || n>maxWritten )
- errorAccess(n);
- return p[n];
- }
- inline const T& operator[](int n) const
- {
- if(n<0 || n>maxWritten )
- errorAccess(n);
- return p[n];
+ inline T*getPointerToData() {
+ return p;
+ }
+
+ inline T& operator[](int n) {
+ if( a && n==maxWritten+1 )
+ allowAccess(n);
+ if( n<0 || n>maxWritten )
+ errorAccess(n);
+ return p[n];
+ }
+ inline const T& operator[](int n) const {
+ if(n<0 || n>maxWritten )
+ errorAccess(n);
+ return p[n];
+ }
+ const T&top(int n=0) const {
+ return (*this)[maxWritten-n];
+ }
+ T&top(int n=0) {
+ return (*this)[maxWritten-n];
+ }
+ T&push(const T&x) {
+ (*this)[maxWritten+1]=x;
+ return top();
+ }
+ bool writeTo(ostream&out) const {
+ out << "Array ";
+ out << size() << " ";
+ out << a << endl;
+ for(int iv=0; iv<=maxWritten; iv++) {
+ writeOb(out,(*this)[iv]);
+ out << endl;
}
- const T&top(int n=0) const
- {return (*this)[maxWritten-n];}
- T&top(int n=0)
- {return (*this)[maxWritten-n];}
- T&push(const T&x)
- {
- (*this)[maxWritten+1]=x;
- return top();
+ return 1;
+ }
+ bool readFrom(istream&in) {
+ string s;
+ if( !in ) {
+ cerr << "ERROR(Array): file cannot be opened.\n";
+ return 0;
}
- bool writeTo(ostream&out) const
- {
- out << "Array ";
- out << size() << " ";
- out << a << endl;
- for(int iv=0;iv<=maxWritten;iv++)
- {
- writeOb(out,(*this)[iv]);
- out << endl;
- }
- return 1;
+ in >> s;
+ if( !(s=="Array") ) {
+ cerr << "ERROR(Array): Array!='"<<s<<"'\n";
+ return 0;
}
- bool readFrom(istream&in)
- {
- string s;
- if( !in )
- {
- cerr << "ERROR(Array): file cannot be opened.\n";
- return 0;
- }
- in >> s;
- if( !(s=="Array") )
- {
- cerr << "ERROR(Array): Array!='"<<s<<"'\n";
- return 0;
- }
- int biggest;
- in >> biggest;
- in >> a;
- resize(biggest);
- for(int iv=0;iv<size();iv++)
- {
- readOb(in,(*this)[iv]);
- }
- return 1;
+ int biggest;
+ in >> biggest;
+ in >> a;
+ resize(biggest);
+ for(int iv=0; iv<size(); iv++) {
+ readOb(in,(*this)[iv]);
}
+ return 1;
+ }
};
template<class T> bool operator==(const Array<T> &x, const Array<T> &y)
{
if( &x == &y )
return 1;
- else
- {
- if( y.size()!=x.size() )
- return 0;
- else
- {
- for(int iii=0;iii<x.size();iii++)
- if( !(x[iii]==y[iii]) )
- return 0;
- return 1;
- }
+ else {
+ if( y.size()!=x.size() )
+ return 0;
+ else {
+ for(int iii=0; iii<x.size(); iii++)
+ if( !(x[iii]==y[iii]) )
+ return 0;
+ return 1;
}
+ }
}
template<class T> bool operator<(const Array<T> &x, const Array<T> &y)
{
if( &x == &y )
return 0;
- else
- {
- if( y.size()<x.size() )
- return !(y<x);
- for(int iii=0;iii<x.size();iii++)
- {
- massert( iii!=y.size() );
- if( x[iii]<y[iii] )
- return 1;
- else if( y[iii]<x[iii] )
- return 0;
- }
- return x.size()!=y.size();
+ else {
+ if( y.size()<x.size() )
+ return !(y<x);
+ for(int iii=0; iii<x.size(); iii++) {
+ massert( iii!=y.size() );
+ if( x[iii]<y[iii] )
+ return 1;
+ else if( y[iii]<x[iii] )
+ return 0;
}
+ return x.size()!=y.size();
+ }
}
template<class T> const void Array<T>:: errorAccess(int n) const
{
- cerr << "ERROR: Access to array element " << n
- << " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
- cout << "ERROR: Access to array element " << n
+ cerr << "ERROR: Access to array element " << n
+ << " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
+ cout << "ERROR: Access to array element " << n
<< " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
massert(0);
#ifndef DEBUG
@@ -298,29 +280,31 @@ template<class T> const void Array<T>:: errorAccess(int n) const
template<class T> ostream& operator<<(ostream&o,const Array<T>&a)
{
o << "Array(" << a.size() << "," << a.autoexpand() << "){ ";
- for(int iii=0;iii<a.size();iii++)
+ for(int iii=0; iii<a.size(); iii++)
o << " " << iii<< ":" << a[iii]<<";";
return o << "}\n";
}
template<class T> istream& operator>>(istream&in, Array<T>&)
-{return in;}
+{
+ return in;
+}
template<class T> int Hash(const Array<T>&a)
{
int n=0;
- for(int iii=0;iii<a.size();iii++)
+ for(int iii=0; iii<a.size(); iii++)
n+=Hash(a[iii])*(iii+1);
return n+a.size()*47;
}
template<class T> void Array<T>::copy(T *aa,const T *bb,int n)
{
- for(int iii=0;iii<n;iii++)
+ for(int iii=0; iii<n; iii++)
aa[iii]=bb[iii];
}
template<class T> void Array<T>::copy(T *aa,T *bb,int n)
{
- for(int iii=0;iii<n;iii++)
+ for(int iii=0; iii<n; iii++)
aa[iii]=bb[iii];
}
@@ -328,7 +312,7 @@ template<class T> void Array<T>::_expand()
{
#ifdef VERY_ARRAY_DEBUG
cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
+#endif
T *oldp=p;
int oldsize=realSize;
realSize=realSize*2+1;
@@ -337,34 +321,32 @@ template<class T> void Array<T>::_expand()
delete [] oldp;
#ifdef VERY_ARRAY_DEBUG
cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
+#endif
}
template<class T> int Array<T>::findMax() const
{
if( size()==0 )
return -1;
- else
- {
- int maxPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[maxPos]<(*this)[iii] )
- maxPos=iii;
- return maxPos;
- }
+ else {
+ int maxPos=0;
+ for(int iii=1; iii<size(); iii++)
+ if( (*this)[maxPos]<(*this)[iii] )
+ maxPos=iii;
+ return maxPos;
+ }
}
template<class T> int Array<T>::findMin() const
{
if( size()==0 )
return -1;
- else
- {
- int minPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[iii]<(*this)[minPos] )
- minPos=iii;
- return minPos;
- }
+ else {
+ int minPos=0;
+ for(int iii=1; iii<size(); iii++)
+ if( (*this)[iii]<(*this)[minPos] )
+ minPos=iii;
+ return minPos;
+ }
}
#endif
diff --git a/mgizapp/src/mkcls/FixedArray.h b/mgizapp/src/mkcls/FixedArray.h
index 39da0b1..8b08b2b 100644
--- a/mgizapp/src/mkcls/FixedArray.h
+++ b/mgizapp/src/mkcls/FixedArray.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,14 +32,14 @@ USA.
template<class T>
bool writeOb(ostream&out,const T&f)
-{
- out << f << " ";
+{
+ out << f << " ";
return 1;
}
template<class T>
bool readOb(istream&in,T&f)
-{
+{
in >> f;
char c;
in.get(c);
@@ -49,21 +49,20 @@ bool readOb(istream&in,T&f)
template<class T>
bool writeOb(ostream&out,const string &s,const T&f)
-{
- out << s << " " << f << " ";
+{
+ out << s << " " << f << " ";
return 1;
}
template<class T>
bool readOb(istream&in,const string&s,T&f)
-{
+{
string ss;
in >> ss;
- if( s!=ss )
- {
- cerr << "ERROR: readOb should be '" << s << "' and is '" << ss << "'" << endl;
- return 0;
- }
- in >> f;
+ if( s!=ss ) {
+ cerr << "ERROR: readOb should be '" << s << "' and is '" << ss << "'" << endl;
+ return 0;
+ }
+ in >> f;
char c;
in.get(c);
massert(c==' ');
@@ -72,148 +71,154 @@ bool readOb(istream&in,const string&s,T&f)
template<class T> class FixedArray
{
- private:
- void copy(T *aa,const T *bb,int nnn)
- {for(int iii=0;iii<nnn;iii++)aa[iii]=bb[iii];}
-
- public:
- T *p;
+private:
+ void copy(T *aa,const T *bb,int nnn) {
+ for(int iii=0; iii<nnn; iii++)aa[iii]=bb[iii];
+ }
+
+public:
+ T *p;
int realSize;
- FixedArray()
- : p(0),realSize(0){}
+ FixedArray()
+ : p(0),realSize(0) {}
FixedArray(const FixedArray<T> &x)
- : p(new T[x.realSize]),realSize(x.realSize) {copy(p,x.p,realSize);}
+ : p(new T[x.realSize]),realSize(x.realSize) {
+ copy(p,x.p,realSize);
+ }
explicit FixedArray(int n)
- : p(new T[n]),realSize(n){}
+ : p(new T[n]),realSize(n) {}
FixedArray(int n,const T&_init)
- : p(new T[n]),realSize(n){for(int z=0;z<n;z++)p[z]=_init;}
+ : p(new T[n]),realSize(n) {
+ for(int z=0; z<n; z++)p[z]=_init;
+ }
FixedArray(const FixedArray&f,const T&t)
- : p(new T[f.size()+1]),realSize(f.size()+1){for(int z=0;z<f.size();z++)p[z]=f[z];p[f.size()]=t;}
- ~FixedArray()
- { delete [] p;p=0;realSize=-1;}
-
- FixedArray<T>& operator=(const FixedArray<T>&x)
- {
- if( this!= &x )
- {
- delete [] p;
- realSize = x.realSize;
- p = new T[x.realSize];
- copy(p,x.p,realSize);
- }
- return *this;
- }
- void resize(int n)
- {
- if( n<=realSize )
- shrink(n);
- else
- {
- T*np=new T[n];
- copy(np,p,realSize);
- delete []p;
- p=np;
- realSize=n;
- }
- }
- void shrink(int n)
- {
- assert(n<=realSize);
- realSize=n;
+ : p(new T[f.size()+1]),realSize(f.size()+1) {
+ for(int z=0; z<f.size(); z++)p[z]=f[z];
+ p[f.size()]=t;
+ }
+ ~FixedArray() {
+ delete [] p;
+ p=0;
+ realSize=-1;
+ }
+
+ FixedArray<T>& operator=(const FixedArray<T>&x) {
+ if( this!= &x ) {
+ delete [] p;
+ realSize = x.realSize;
+ p = new T[x.realSize];
+ copy(p,x.p,realSize);
}
- void init(int n,const T&_init)
- {
+ return *this;
+ }
+ void resize(int n) {
+ if( n<=realSize )
+ shrink(n);
+ else {
+ T*np=new T[n];
+ copy(np,p,realSize);
delete []p;
- p=new T[n];
+ p=np;
realSize=n;
- for(int l=0;l<n;l++)p[l]=_init;
}
- inline const T&top(int n=0) const
- {return (*this)[realSize-1-n];}
- inline int size() const
- {return realSize;}
+ }
+ void shrink(int n) {
+ assert(n<=realSize);
+ realSize=n;
+ }
+ void init(int n,const T&_init) {
+ delete []p;
+ p=new T[n];
+ realSize=n;
+ for(int l=0; l<n; l++)p[l]=_init;
+ }
+ inline const T&top(int n=0) const {
+ return (*this)[realSize-1-n];
+ }
+ inline int size() const {
+ return realSize;
+ }
- inline T*begin(){ return p; }
- inline T*end(){ return p+realSize; }
+ inline T*begin() {
+ return p;
+ }
+ inline T*end() {
+ return p+realSize;
+ }
- inline const T*begin()const{ return p; }
- inline const T*end()const{return p+realSize;}
+ inline const T*begin()const {
+ return p;
+ }
+ inline const T*end()const {
+ return p+realSize;
+ }
- inline int low() const
- {return 0;}
- inline int high() const
- {return realSize-1;}
+ inline int low() const {
+ return 0;
+ }
+ inline int high() const {
+ return realSize-1;
+ }
const void errorAccess(int n) const;
-
- inline T& operator[](int n)
- {
- return p[n];
- }
- inline const T& operator[](int n) const
- {
- return p[n];
- }
- bool writeTo(ostream&out) const
- {
- out << "FixedArray ";
- out << size() << " ";
- for(int a=0;a<size();a++)
- {
- writeOb(out,(*this)[a]);
- out << " ";
- }
- out << endl;
- return 1;
- }
- bool readFrom(istream&in)
- {
- string s;
- if( !in )
- {
- cerr << "ERROR(FixedArray): file cannot be opened.\n";
- return 0;
- }
- in >> s;
- if( !(s=="FixedArray") )
- {
- cerr << "ERROR(FixedArray): FixedArray!='"<<s<<"'\n";
- return 0;
- }
- int biggest;
- in >> biggest;
- resize(biggest);
- for(int a=0;a<size();a++)
- readOb(in,(*this)[a]);
- return 1;
- }
- void sort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until);
- }
- void invsort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until,greater<T>());
+
+ inline T& operator[](int n) {
+ return p[n];
+ }
+ inline const T& operator[](int n) const {
+ return p[n];
+ }
+ bool writeTo(ostream&out) const {
+ out << "FixedArray ";
+ out << size() << " ";
+ for(int a=0; a<size(); a++) {
+ writeOb(out,(*this)[a]);
+ out << " ";
}
- int binary_locate(const T&t)
- {
- T*ppos=std::lower_bound(p,p+size(),t);
- int pos=ppos-p;
- if( pos>=-1&&pos<size() )
- return pos;
- else
- return -1;
+ out << endl;
+ return 1;
+ }
+ bool readFrom(istream&in) {
+ string s;
+ if( !in ) {
+ cerr << "ERROR(FixedArray): file cannot be opened.\n";
+ return 0;
}
- int binary_search(const T&t)
- {
- T*ppos=std::lower_bound(p,p+size(),t);
- int pos=ppos-p;
- if( pos>=0&&pos<size()&& *ppos==t )
- return pos;
- else
- return -1;
+ in >> s;
+ if( !(s=="FixedArray") ) {
+ cerr << "ERROR(FixedArray): FixedArray!='"<<s<<"'\n";
+ return 0;
}
+ int biggest;
+ in >> biggest;
+ resize(biggest);
+ for(int a=0; a<size(); a++)
+ readOb(in,(*this)[a]);
+ return 1;
+ }
+ void sort(int until=-1) {
+ if( until== -1 ) until=size();
+ std::sort(p,p+until);
+ }
+ void invsort(int until=-1) {
+ if( until== -1 ) until=size();
+ std::sort(p,p+until,greater<T>());
+ }
+ int binary_locate(const T&t) {
+ T*ppos=std::lower_bound(p,p+size(),t);
+ int pos=ppos-p;
+ if( pos>=-1&&pos<size() )
+ return pos;
+ else
+ return -1;
+ }
+ int binary_search(const T&t) {
+ T*ppos=std::lower_bound(p,p+size(),t);
+ int pos=ppos-p;
+ if( pos>=0&&pos<size()&& *ppos==t )
+ return pos;
+ else
+ return -1;
+ }
typedef T* iterator;
typedef const T* const_iterator;
};
@@ -221,7 +226,7 @@ template<class T> class FixedArray
template<class T> bool operator<(const FixedArray<T> &x, const FixedArray<T> &y)
{
return lexicographical_compare(x.begin(),x.end(),y.begin(),y.end());
-
+
}
@@ -230,7 +235,7 @@ template<class T> bool operator==(const FixedArray<T> &x, const FixedArray<T> &
if( &x == &y )return 1;
const int s = x.size();
if( s !=y.size() )return 0;
- for(int iii=0;iii<s;iii++)
+ for(int iii=0; iii<s; iii++)
if( !(x.p[iii]==y.p[iii]) )
return 0;
return 1;
@@ -240,7 +245,7 @@ template<class T> int Hash(const FixedArray<T>&a)
{
int n=0;
const int s=a.size();
- for(int iii=0;iii<s;iii++)
+ for(int iii=0; iii<s; iii++)
n=13*n+Hash(a.p[iii]);
return n;
}
@@ -248,26 +253,28 @@ template<class T> int Hash(const FixedArray<T>&a)
template<class T> const void FixedArray<T>:: errorAccess(int n) const
{
massert(0);
- cerr << "ERROR: Access to array element " << n
- << " (" << realSize << "," << (void*)p << ")\n";
+ cerr << "ERROR: Access to array element " << n
+ << " (" << realSize << "," << (void*)p << ")\n";
}
-
+
template<class T> ostream& operator<<(ostream&o,const FixedArray<T>&a)
{
o << "FixedArray(" << a.size() << "){ ";
- for(int iii=0;iii<a.size();iii++)
+ for(int iii=0; iii<a.size(); iii++)
o << " " << iii<< ":" << a[iii]<<";";
return o << "}\n";
}
template<class T> istream& operator>>(istream&in, FixedArray<T>&)
-{ return in;}
+{
+ return in;
+}
template<class T> FixedArray<T> operator+(const FixedArray<T>&a,const FixedArray<T>&b)
{
massert(a.size()==b.size());
FixedArray<T> x(a.size());
- for(int iii=0;iii<a.size();iii++)
+ for(int iii=0; iii<a.size(); iii++)
x[iii]=a[iii]+b[iii];
return x;
}
@@ -276,7 +283,7 @@ template<class T> FixedArray<T> operator|(const FixedArray<T>&aaa,const FixedArr
iassert(aaa.size()==bbb.size());
FixedArray<T> xxx(aaa.size());
- for(int iii=0;iii<aaa.size();iii++)
+ for(int iii=0; iii<aaa.size(); iii++)
xxx.p[iii]=aaa.p[iii]||bbb.p[iii];
return xxx;
}
diff --git a/mgizapp/src/mkcls/FlexArray.h b/mgizapp/src/mkcls/FlexArray.h
index ede3e9e..6b7b2d7 100644
--- a/mgizapp/src/mkcls/FlexArray.h
+++ b/mgizapp/src/mkcls/FlexArray.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,12 +36,18 @@ private:
public:
FlexArray(int _start=0,int _end=-1)
: p(_end-_start+1),start(_start),end(_end) {}
- T&operator[](int i)
- {return p[i-start];}
- const T&operator[](int i)const
- {returnp[i-start];}
- int low()const{return start;}
- int high()const{return end;}
+ T&operator[](int i) {
+ return p[i-start];
+ }
+ const T&operator[](int i)const {
+ returnp[i-start];
+ }
+ int low()const {
+ return start;
+ }
+ int high()const {
+ return end;
+ }
};
diff --git a/mgizapp/src/mkcls/GDAOptimization.cpp b/mgizapp/src/mkcls/GDAOptimization.cpp
index a9e2fa7..08b6a08 100644
--- a/mgizapp/src/mkcls/GDAOptimization.cpp
+++ b/mgizapp/src/mkcls/GDAOptimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -31,49 +31,48 @@ USA.
#define GDAOptimization GDAOptimization
#define IterOptimization IterOptimization
-
-
+
+
double GDAOptimization::defaultTemperatur=1e100;
double GDAOptimization::defaultAlpha=0.001;
-
-GDAOptimization::GDAOptimization(Problem &p,int m)
-: IterOptimization(p,m) ,temperatur(defaultTemperatur),alpha(defaultAlpha)
+
+GDAOptimization::GDAOptimization(Problem &p,int m)
+ : IterOptimization(p,m) ,temperatur(defaultTemperatur),alpha(defaultAlpha)
{
}
-
-GDAOptimization::GDAOptimization(Problem &p,double t,double a,int m)
-: IterOptimization(p,m) ,temperatur(t) ,alpha(a)
+
+GDAOptimization::GDAOptimization(Problem &p,double t,double a,int m)
+ : IterOptimization(p,m) ,temperatur(t) ,alpha(a)
{
}
-
+
GDAOptimization::GDAOptimization(GDAOptimization &o)
-: IterOptimization(o)
+ : IterOptimization(o)
{
temperatur = o.temperatur;
alpha = o.alpha;
gdaEndFlag = o.gdaEndFlag;
}
-
+
void GDAOptimization::zInitialize()
{
IterOptimization::zInitialize();
- if(temperatur==1e100)
- {
- double v=problem.value();
-
-
-
-
-
- temperatur=v;
- }
+ if(temperatur==1e100) {
+ double v=problem.value();
+
+
+
+
+
+ temperatur=v;
+ }
assert(alpha>=0);
}
@@ -88,7 +87,7 @@ short GDAOptimization::accept(double delta)
void GDAOptimization::abkuehlen()
{
double newTemperatur = temperatur - alpha*(temperatur - curValue);
- if( fabs(temperatur - newTemperatur)<1e-30 )
+ if( fabs(temperatur - newTemperatur)<1e-30 )
gdaEndFlag=1;
else
gdaEndFlag=0;
@@ -105,55 +104,49 @@ void GDAOptimization::makeGraphOutput()
IterOptimization::makeGraphOutput();
*GraphOutput << temperatur-curValue;
}
-
-
+
+
double GDAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
- int optimierungsschritte,int print)
+ int optimierungsschritte,int print)
{
- if(typ!=1)
- {
- cerr << "Error: wrong parameter-type in GDAOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
- }
- else
- {
- double bestPar=-1,best=1e100;
- double now;
- if( print )
- cout << "#GDA-optimizeValues: " << numParameter<<endl;
-
-
- defaultTemperatur=1e100;
-
- for(int i=0;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- defaultAlpha = pow(pow(200,1.0/numParameter),i)*0.002;
- solveProblem(0,p,proParameter,optimierungsschritte,GDA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAlpha;
- }
- if( print )
- {
- cout << defaultAlpha <<" ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller()<< " "<< end.getSigmaBigger()<< endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit"
- " Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultAlpha=0.03;
- return bestPar;
+ if(typ!=1) {
+ cerr << "Error: wrong parameter-type in GDAOptimization::optimizeValue ("
+ << typ << ")\n";
+ exit(1);
+ } else {
+ double bestPar=-1,best=1e100;
+ double now;
+ if( print )
+ cout << "#GDA-optimizeValues: " << numParameter<<endl;
+
+
+ defaultTemperatur=1e100;
+
+ for(int i=0; i<=numParameter; i++) {
+ StatVar end,laufzeit,init;
+ defaultAlpha = pow(pow(200,1.0/numParameter),i)*0.002;
+ solveProblem(0,p,proParameter,optimierungsschritte,GDA_OPT,now,end,
+ laufzeit,init);
+ if( best>now ) {
+ best=now;
+ bestPar=defaultAlpha;
+ }
+ if( print ) {
+ cout << defaultAlpha <<" ";
+ cout << end.getMean() << " " << end.quantil(0.2) << " "
+ << end.quantil(0.79) << " " << laufzeit.getMean() << " "
+ << end.quantil(0.0) << " " << end.getSigma() << " "
+ << end.getSigmaSmaller()<< " "<< end.getSigmaBigger()<< endl;
+ }
}
- return 1e100;
+ if( print )
+ cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit"
+ " Bester Sigma SigmaSmaller SigmaBigger\n";
+ defaultAlpha=0.03;
+ return bestPar;
+ }
+ return 1e100;
}
diff --git a/mgizapp/src/mkcls/GDAOptimization.h b/mgizapp/src/mkcls/GDAOptimization.h
index 33bcec3..b1dad41 100644
--- a/mgizapp/src/mkcls/GDAOptimization.h
+++ b/mgizapp/src/mkcls/GDAOptimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,47 +32,47 @@ USA.
class GDAOptimization : public IterOptimization
{
-
- private:
- double temperatur;
- double alpha;
- short gdaEndFlag;
-
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- GDAOptimization(Problem &p,double temperatur,double alpha,
- int maxIter=-1);
-
-
- GDAOptimization(Problem &p,int maxIter=-1);
-
-
- GDAOptimization(GDAOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,int schritte= -1,int verbose=1);
-
-
-
- static double defaultTemperatur;
- static double defaultAlpha;
+
+private:
+ double temperatur;
+ double alpha;
+ short gdaEndFlag;
+
+
+protected:
+ virtual void zInitialize();
+
+
+ virtual short accept(double delta);
+
+
+ virtual void abkuehlen();
+
+
+ virtual short end();
+
+
+ virtual void makeGraphOutput();
+
+
+public:
+ GDAOptimization(Problem &p,double temperatur,double alpha,
+ int maxIter=-1);
+
+
+ GDAOptimization(Problem &p,int maxIter=-1);
+
+
+ GDAOptimization(GDAOptimization &o);
+
+
+ static double optimizeValue(Problem &p,int proParameter,
+ int numParameter,int typ,int schritte= -1,int verbose=1);
+
+
+
+ static double defaultTemperatur;
+ static double defaultAlpha;
};
#endif
diff --git a/mgizapp/src/mkcls/HCOptimization.cpp b/mgizapp/src/mkcls/HCOptimization.cpp
index 0c6a729..8f8b943 100644
--- a/mgizapp/src/mkcls/HCOptimization.cpp
+++ b/mgizapp/src/mkcls/HCOptimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -26,14 +26,14 @@ USA.
#include "HCOptimization.h"
-HCOptimization::HCOptimization(Problem &p,int m)
-: IterOptimization(p,m)
+HCOptimization::HCOptimization(Problem &p,int m)
+ : IterOptimization(p,m)
{
if( maxStep<=0 )
maxStep=(int)(problem.expectedNumberOfIterations());
}
HCOptimization::HCOptimization(HCOptimization &o)
-: IterOptimization(o)
+ : IterOptimization(o)
{
}
@@ -45,9 +45,9 @@ short HCOptimization::accept(double delta)
else
return 0;
}
-short HCOptimization::end()
-{
- return endFlag>0;
+short HCOptimization::end()
+{
+ return endFlag>0;
}
void HCOptimization::abkuehlen()
{
diff --git a/mgizapp/src/mkcls/HCOptimization.h b/mgizapp/src/mkcls/HCOptimization.h
index ec147b2..2eb3d02 100644
--- a/mgizapp/src/mkcls/HCOptimization.h
+++ b/mgizapp/src/mkcls/HCOptimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,23 +32,23 @@ USA.
class HCOptimization : public IterOptimization
{
-
- protected:
- virtual short accept(double delta);
-
- virtual void abkuehlen();
-
+protected:
+ virtual short accept(double delta);
- virtual short end();
-
- public:
- HCOptimization(Problem &p,int maxIter=-1);
-
+ virtual void abkuehlen();
+
+
+ virtual short end();
+
+
+public:
+ HCOptimization(Problem &p,int maxIter=-1);
+
+
+ HCOptimization(HCOptimization &o);
- HCOptimization(HCOptimization &o);
-
};
#endif
diff --git a/mgizapp/src/mkcls/IterOptimization.cpp b/mgizapp/src/mkcls/IterOptimization.cpp
index 258cb1f..06540df 100644
--- a/mgizapp/src/mkcls/IterOptimization.cpp
+++ b/mgizapp/src/mkcls/IterOptimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -26,11 +26,11 @@ USA.
#include "IterOptimization.h"
#include "ProblemTest.h"
-ostream *GraphOutput;
+ostream *GraphOutput;
-
-IterOptimization::IterOptimization(Problem& p,int m)
+
+IterOptimization::IterOptimization(Problem& p,int m)
: maxNonBetterIterations(0),problem(p),maxStep(m),initialisiert(0)
{
}
@@ -50,89 +50,83 @@ IterOptimization::IterOptimization(IterOptimization& o) : Optimization(),problem
}
-
+
double IterOptimization::minimize(int steps)
{
if( !initialisiert )
zInitialize();
-
+
if( steps==0 )
return curValue;
-
+
int t=0;
int every=(steps<0)?10000:(steps/1000+1);
-
- do
- {
- curStep++;
- t++;
- if(verboseMode&&(curStep%1000==0))
- {
- if(steps>0)
- cout << "Processed: " << 100.0*(curStep/(double)max(maxStep,1)) << " percent. (IterOptimization run) "
- << curValue << " max:" << maxStep << " " << steps << " \r";
- else
- cout << "In step:" << curStep << " currentValue: " << curValue
- << " bestValue: " << bestValue-curValue << " " << curStep-bestStep << ". \r";
- cout.flush();
- }
-
-
- ProblemChange *change= &(problem.change());
-
-
- double delta=problem.valueChange(*change);
-
-
- abkuehlen();
-
-
- if( accept(delta) )
- {
-
- problem.doChange(*change);
-
-
- curValue+=delta;
-
-
- if( curValue<bestValue-1e-10 )
- {
- bestValue=curValue;
- bestStep=curStep;
- endFlag2=endFlag=0;
- }
-
- if( verboseMode>1 )
- cout<<"in step: "<<curStep<<" accepted with : "<<delta<<endl;
- }
-
- if(curStep - bestStep>maxNonBetterIterations && maxNonBetterIterations>0)
- endFlag=1;
- if(curStep - bestStep>2*maxNonBetterIterations && maxNonBetterIterations>0)
- endFlag2=1;
-
-
-
- if( GraphOutput&&((curStep%every)==0) )
- {
- makeGraphOutput();
- *GraphOutput<<" "<<delta<<endl;
- }
-
- delete change;
- } while( t!=steps && (!end()) && (!problem.endCriterion()) );
-
- if( GraphOutput)
- {
+
+ do {
+ curStep++;
+ t++;
+ if(verboseMode&&(curStep%1000==0)) {
+ if(steps>0)
+ cout << "Processed: " << 100.0*(curStep/(double)max(maxStep,1)) << " percent. (IterOptimization run) "
+ << curValue << " max:" << maxStep << " " << steps << " \r";
+ else
+ cout << "In step:" << curStep << " currentValue: " << curValue
+ << " bestValue: " << bestValue-curValue << " " << curStep-bestStep << ". \r";
+ cout.flush();
+ }
+
+
+ ProblemChange *change= &(problem.change());
+
+
+ double delta=problem.valueChange(*change);
+
+
+ abkuehlen();
+
+
+ if( accept(delta) ) {
+
+ problem.doChange(*change);
+
+
+ curValue+=delta;
+
+
+ if( curValue<bestValue-1e-10 ) {
+ bestValue=curValue;
+ bestStep=curStep;
+ endFlag2=endFlag=0;
+ }
+
+ if( verboseMode>1 )
+ cout<<"in step: "<<curStep<<" accepted with : "<<delta<<endl;
+ }
+
+ if(curStep - bestStep>maxNonBetterIterations && maxNonBetterIterations>0)
+ endFlag=1;
+ if(curStep - bestStep>2*maxNonBetterIterations && maxNonBetterIterations>0)
+ endFlag2=1;
+
+
+
+ if( GraphOutput&&((curStep%every)==0) ) {
makeGraphOutput();
- *GraphOutput<<endl;
+ *GraphOutput<<" "<<delta<<endl;
}
+
+ delete change;
+ } while( t!=steps && (!end()) && (!problem.endCriterion()) );
+
+ if( GraphOutput) {
+ makeGraphOutput();
+ *GraphOutput<<endl;
+ }
return curValue;
}
-void IterOptimization::zInitialize()
+void IterOptimization::zInitialize()
{
initialisiert=1;
bestValue=curValue=problem.value();
@@ -144,19 +138,19 @@ void IterOptimization::zInitialize()
void IterOptimization::makeGraphOutput()
{
-
+
*GraphOutput << curStep << " " <<curValue << " ";
}
double IterOptimizationOptimizeParameter(Problem &p,
- double &parameter,double min,double max,
- int nRun,int nPar,int verfahren,
- double &bv)
+ double &parameter,double min,double max,
+ int nRun,int nPar,int verfahren,
+ double &bv)
{
if( nPar<=0 )
return (max+min)/2;
-
+
StatVar end1,time1,init1;
StatVar end2,time2,init2;
double mean1,mean2;
@@ -169,31 +163,23 @@ double IterOptimizationOptimizeParameter(Problem &p,
parameter = par2 = min + 2*(max-min)/3;
solveProblem(0,p,nRun,-1,verfahren,mean2,end2,time2,init2);
cout << parameter << " " << mean2 << " " << end2.quantil(0.0) << " " << end2.quantil(1.0) << endl;
-
+
double bestPar,bestVal;
- if(mean1<mean2)
- {
- bestVal = mean1;
- bestPar=IterOptimizationOptimizeParameter(p,parameter,min,min+2*(max-min)/3,nRun,nPar-2,verfahren,bestVal);
- }
- else
- {
- bestVal = mean2;
- bestPar=IterOptimizationOptimizeParameter(p,parameter,min+(max-min)/3,max,nRun,nPar-2,verfahren,bestVal);
- }
- if( mean1<bestVal&&mean1<=mean2 )
- {
- bv = mean1;
- return par1;
- }
- else if(mean2<bestVal && mean2<=mean1)
- {
- bv = mean2;
- return par2;
- }
- else
- {
- bv = bestVal;
- return bestPar;
- }
+ if(mean1<mean2) {
+ bestVal = mean1;
+ bestPar=IterOptimizationOptimizeParameter(p,parameter,min,min+2*(max-min)/3,nRun,nPar-2,verfahren,bestVal);
+ } else {
+ bestVal = mean2;
+ bestPar=IterOptimizationOptimizeParameter(p,parameter,min+(max-min)/3,max,nRun,nPar-2,verfahren,bestVal);
+ }
+ if( mean1<bestVal&&mean1<=mean2 ) {
+ bv = mean1;
+ return par1;
+ } else if(mean2<bestVal && mean2<=mean1) {
+ bv = mean2;
+ return par2;
+ } else {
+ bv = bestVal;
+ return bestPar;
+ }
}
diff --git a/mgizapp/src/mkcls/IterOptimization.h b/mgizapp/src/mkcls/IterOptimization.h
index ba39b55..4d70c14 100644
--- a/mgizapp/src/mkcls/IterOptimization.h
+++ b/mgizapp/src/mkcls/IterOptimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,85 +36,85 @@ USA.
-#define ANZ_VERSCHLECHTERUNGEN 500
+#define ANZ_VERSCHLECHTERUNGEN 500
+
+extern ostream *GraphOutput;
-extern ostream *GraphOutput;
-
class IterOptimization : public Optimization
- {
-
+{
+
+
+private:
+ int maxNonBetterIterations;
+
+
+protected:
+ Problem &problem;
+ int curStep;
+ double curValue;
+ int bestStep;
+ double bestValue;
+ int maxStep;
+ int initialisiert;
+ short endFlag;
+ short endFlag2;
+
+
+
+
+ virtual void makeGraphOutput();
- private:
- int maxNonBetterIterations;
-
- protected:
- Problem &problem;
- int curStep;
- double curValue;
- int bestStep;
- double bestValue;
- int maxStep;
- int initialisiert;
- short endFlag;
- short endFlag2;
-
-
+ virtual short end()=0;
- virtual void makeGraphOutput();
-
+ virtual void abkuehlen()=0;
- virtual short end()=0;
-
- virtual void abkuehlen()=0;
-
+ virtual short accept(double delta)=0;
- virtual short accept(double delta)=0;
-
- virtual void zInitialize();
-
+ virtual void zInitialize();
- public:
- IterOptimization(Problem &p,int maxIter=-1);
-
- IterOptimization(IterOptimization &o);
-
+public:
+ IterOptimization(Problem &p,int maxIter=-1);
- virtual double minimize(int steps=-1);
-
- inline int getCurStep();
-
+ IterOptimization(IterOptimization &o);
- inline double getCurrentValue();
-
- inline const Problem& getProblem();
-
+ virtual double minimize(int steps=-1);
+
+
+ inline int getCurStep();
+
+
+ inline double getCurrentValue();
+
+
+ inline const Problem& getProblem();
+
};
double IterOptimizationOptimizeParameter(Problem &p,
- double &parameter,double min,double max,
- int nRun,int nPar,int verfahren,double &bv);
+ double &parameter,double min,double max,
+ int nRun,int nPar,int verfahren,double &bv);
-inline int IterOptimization::getCurStep()
-{
+inline int IterOptimization::getCurStep()
+{
return curStep;
};
-inline double IterOptimization::getCurrentValue()
-{
- return curValue;
-};
-inline const Problem& IterOptimization::getProblem()
-{
- return problem;
-};
+inline double IterOptimization::getCurrentValue()
+{
+ return curValue;
+};
+inline const Problem& IterOptimization::getProblem()
+{
+ return problem;
+};
#endif
diff --git a/mgizapp/src/mkcls/KategProblem.cpp b/mgizapp/src/mkcls/KategProblem.cpp
index 9bc1d90..ede82e4 100644
--- a/mgizapp/src/mkcls/KategProblem.cpp
+++ b/mgizapp/src/mkcls/KategProblem.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,9 +32,9 @@ USA.
extern double SigmaVerfaelschung;
-double h_table[MAX_H_TABLE],l_table[MAX_H_TABLE],hmy_table[MAX_H_TABLE],hmy_sigma;
+double h_table[MAX_H_TABLE],l_table[MAX_H_TABLE],hmy_table[MAX_H_TABLE],hmy_sigma;
-double LWRW_Faktor=0.5;
+double LWRW_Faktor=0.5;
static int intcompare(const void *p,const void *j)
{
@@ -42,12 +42,12 @@ static int intcompare(const void *p,const void *j)
}
KategProblem::KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
- int _nachbarschaft,int mindestAnzahl)
-: Problem(mak,aw,_initialisierung,_auswertung,_nachbarschaft),
- sigmaVerfaelschung(SigmaVerfaelschung),katWasEmpty(0),nwg(mak+2),ngw(mak+2),_katOfWord(aw,-1),words(0),kats(0),
- wordFreq(aw,mindestAnzahl),katFreq(mak+2,(_auswertung==CRITERION_MY)?SigmaVerfaelschung:0.0),
- initLike(aw,-1)
-
+ int _nachbarschaft,int mindestAnzahl)
+ : Problem(mak,aw,_initialisierung,_auswertung,_nachbarschaft),
+ sigmaVerfaelschung(SigmaVerfaelschung),katWasEmpty(0),nwg(mak+2),ngw(mak+2),_katOfWord(aw,-1),words(0),kats(0),
+ wordFreq(aw,mindestAnzahl),katFreq(mak+2,(_auswertung==CRITERION_MY)?SigmaVerfaelschung:0.0),
+ initLike(aw,-1)
+
{
if( auswertung == CRITERION_MY )
cout << "Sigma-Verfaelschung: " << sigmaVerfaelschung << endl;
@@ -56,19 +56,18 @@ KategProblem::KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
massert(katFreq.nKats>0);
massert(mak<=aw);
-
- for(int i=1;i<MAX_H_TABLE;i++)
- {
- h_table[i]=i*log((double)(i));
- l_table[i]=log((double)(i));
- hmy_table[i]=i*log(verfaelsche(i,sigmaVerfaelschung));
- }
+
+ for(int i=1; i<MAX_H_TABLE; i++) {
+ h_table[i]=i*log((double)(i));
+ l_table[i]=log((double)(i));
+ hmy_table[i]=i*log(verfaelsche(i,sigmaVerfaelschung));
+ }
hmy_sigma=sigmaVerfaelschung;
l_table[0]=h_table[0]=0;
- if( katwahl()==K_BEST )
- _maxCompVal=1;
-
+ if( katwahl()==K_BEST )
+ _maxCompVal=1;
+
}
KategProblem::~KategProblem()
@@ -84,186 +83,170 @@ void KategProblem::_initialize(int initTyp)
}
void KategProblem::_initialize(int initTyp,int specialFixedWord)
-
+
{
massert(wordFreq.filled);
- initialisierung = initTyp;
- int i;
-
- for(i=0;i<katFreq.nKats;i++)
- for(int j=0;j<katFreq.nKats;j++)
+ initialisierung = initTyp;
+ int i;
+
+ for(i=0; i<katFreq.nKats; i++)
+ for(int j=0; j<katFreq.nKats; j++)
katFreq.setN(i,j,0);
-
-
-
- for(i=0;i<wordFreq.nWords;i++)
- {
- setKatOfWord(i,-1);
- if( strcmp(getString(i),"$")==0||strcmp(getString(i),"1$")==0||strcmp(getString(i),"2$")==0||strcmp(getString(i),"3$")==0||strcmp(getString(i),"4$")==0 )
- wordFreq.setDollar(i);
- }
- wordFreq.init(specialFixedWord);
-
-
-
-
+
+
+
+ for(i=0; i<wordFreq.nWords; i++) {
+ setKatOfWord(i,-1);
+ if( strcmp(getString(i),"$")==0||strcmp(getString(i),"1$")==0||strcmp(getString(i),"2$")==0||strcmp(getString(i),"3$")==0||strcmp(getString(i),"4$")==0 )
+ wordFreq.setDollar(i);
+ }
+ wordFreq.init(specialFixedWord);
+
+
+
+
_maxComp=wordFreq.nTranspWords;
-
- switch(initTyp)
- {
- case INIT_OTHER:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_OTHER)\n";
- for(i=0;i<wordFreq.nWords;i++)
- fastPutWord(i,initLike[i]);
- break;
- case INIT_RAN:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_RAN)\n";
- for(i=0;i<wordFreq.nWords;i++)
- {
- if( wordFreq.minIndex[i]>0 && wordFreq.maxIndex[i]>0 )
- fastPutWord(i,wordFreq.minIndex[i]+randomInt(wordFreq.maxIndex[i]-wordFreq.minIndex[i]+1));
- else
- fastPutWord(i,2+randomInt(katFreq.nKats-2));
- }
-
-
- break;
- case INIT_AIO:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_AIO)\n";
- for(i=0;i<wordFreq.nWords;i++)
- fastPutWord(i,2);
- break;
- case INIT_FREQ:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_FREQ)\n";
- for(i=0;i<wordFreq.nWords;i++)
- {
- int to=i+2;
- if( to>=katFreq.nKats )
- to=katFreq.nKats-1;
- fastPutWord((*(wordFreq.absteigend))[i],to);
- }
- curComp=katFreq.nKats-2;
- break;
- case INIT_LWRW:
-
- {
- Array<int> markList(wordFreq.nWords,1);
- int to=2;
- int i=0;
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_LWRW)\n";
- for(to=2;to<katFreq.nKats*LWRW_Faktor;to++)
- {
- int w=(*(wordFreq.absteigend))[to-2];
- fastPutWord(w,to);
- markList[w]=0;
- }
- while(to<katFreq.nKats-1 && i<wordFreq.nWords)
- {
- int toFilled=0;
- int word=(*(wordFreq.absteigend))[i];
- if(i%2)
- {
- ManyFreq &after=wordFreq.after[word];
- for(int j=0;j<after.size();j++)
- {
- int w=after[j].w;
- if( markList[w] )
- fastPutWord(w,to),toFilled++;
- markList[w]=0;
- }
- }
- else
- {
- ManyFreq &before=wordFreq.before[word];
- for(int j=0;j<before.size();j++)
- {
- int w=before[j].w;
- if( markList[w] )
- fastPutWord(w,to),toFilled++;
- markList[w]=0;
- }
- }
- i++;
- if( toFilled>0 )
- to++;
- }
- for(i=0;i<wordFreq.nWords;i++)
- if(markList[i])
- fastPutWord(i,katFreq.nKats-1);
- }
- break;
- default:
- cerr << "Wrong _initialize in KategProblem: " << initTyp << endl;
- exit(1);
- }
-
-
- for(int word=0;word<wordFreq.nWords;word++)
- {
- Array<OneFreq>& aft=wordFreq.after[word];
-
- int nAft=aft.size();
-
- for(i=0;i<nAft;i++)
- katFreq.addN(katOfWord(word),katOfWord(aft[i].w),aft[i].n);
+ switch(initTyp) {
+ case INIT_OTHER:
+
+ if(verboseMode>2)cout << "KategProblem::_initialize(INIT_OTHER)\n";
+ for(i=0; i<wordFreq.nWords; i++)
+ fastPutWord(i,initLike[i]);
+ break;
+ case INIT_RAN:
+
+ if(verboseMode>2)cout << "KategProblem::_initialize(INIT_RAN)\n";
+ for(i=0; i<wordFreq.nWords; i++) {
+ if( wordFreq.minIndex[i]>0 && wordFreq.maxIndex[i]>0 )
+ fastPutWord(i,wordFreq.minIndex[i]+randomInt(wordFreq.maxIndex[i]-wordFreq.minIndex[i]+1));
+ else
+ fastPutWord(i,2+randomInt(katFreq.nKats-2));
}
- if(verboseMode>2)
- {
- cout << "\nInitialization of KategProblem:";
- dumpOn(cout);
+
+ break;
+ case INIT_AIO:
+
+ if(verboseMode>2)cout << "KategProblem::_initialize(INIT_AIO)\n";
+ for(i=0; i<wordFreq.nWords; i++)
+ fastPutWord(i,2);
+ break;
+ case INIT_FREQ:
+
+ if(verboseMode>2)cout << "KategProblem::_initialize(INIT_FREQ)\n";
+ for(i=0; i<wordFreq.nWords; i++) {
+ int to=i+2;
+ if( to>=katFreq.nKats )
+ to=katFreq.nKats-1;
+ fastPutWord((*(wordFreq.absteigend))[i],to);
+ }
+ curComp=katFreq.nKats-2;
+ break;
+ case INIT_LWRW:
+
+ {
+ Array<int> markList(wordFreq.nWords,1);
+ int to=2;
+ int i=0;
+ if(verboseMode>2)cout << "KategProblem::_initialize(INIT_LWRW)\n";
+ for(to=2; to<katFreq.nKats*LWRW_Faktor; to++) {
+ int w=(*(wordFreq.absteigend))[to-2];
+ fastPutWord(w,to);
+ markList[w]=0;
+ }
+ while(to<katFreq.nKats-1 && i<wordFreq.nWords) {
+ int toFilled=0;
+ int word=(*(wordFreq.absteigend))[i];
+ if(i%2) {
+ ManyFreq &after=wordFreq.after[word];
+ for(int j=0; j<after.size(); j++) {
+ int w=after[j].w;
+ if( markList[w] )
+ fastPutWord(w,to),toFilled++;
+ markList[w]=0;
+ }
+ } else {
+ ManyFreq &before=wordFreq.before[word];
+ for(int j=0; j<before.size(); j++) {
+ int w=before[j].w;
+ if( markList[w] )
+ fastPutWord(w,to),toFilled++;
+ markList[w]=0;
+ }
+ }
+ i++;
+ if( toFilled>0 )
+ to++;
}
+ for(i=0; i<wordFreq.nWords; i++)
+ if(markList[i])
+ fastPutWord(i,katFreq.nKats-1);
+ }
+ break;
+ default:
+ cerr << "Wrong _initialize in KategProblem: " << initTyp << endl;
+ exit(1);
+ }
+
+
+
+ for(int word=0; word<wordFreq.nWords; word++) {
+ Array<OneFreq>& aft=wordFreq.after[word];
+
+ int nAft=aft.size();
+
+ for(i=0; i<nAft; i++)
+ katFreq.addN(katOfWord(word),katOfWord(aft[i].w),aft[i].n);
+ }
+
+ if(verboseMode>2) {
+ cout << "\nInitialization of KategProblem:";
+ dumpOn(cout);
+ }
}
double KategProblem::valueChange(ProblemChange&c)
-
+
{
numberOfPartEvaluations++;
KategProblemChange &k=*(KategProblemChange *)&c;
- fillNWG(k.word);
-
- return _valueChange(k);
+ fillNWG(k.word);
+
+ return _valueChange(k);
}
Problem *KategProblem::makeEqualProblem()
-
+
{
KategProblem*p = new KategProblem(wordFreq.nWords,katFreq.nKats-2,initialisierung,
- auswertung,nachbarschaft);
+ auswertung,nachbarschaft);
KategProblemWBC &w=p->wordFreq;
- for(int x=0;x<wordFreq.nWords;x++)
- {
- w.setAfterWords(x,wordFreq.after[x].size());
- w.setBeforeWords(x,wordFreq.before[x].size());
- }
- int i;
- for(i=0;i<wordFreq.nWords;i++)
- {
- for(int j=0;j<wordFreq.after[i].size();j++)
- w.setFreq(i,wordFreq.after[i][j].w,wordFreq.after[i][j].n);
- }
+ for(int x=0; x<wordFreq.nWords; x++) {
+ w.setAfterWords(x,wordFreq.after[x].size());
+ w.setBeforeWords(x,wordFreq.before[x].size());
+ }
+ int i;
+ for(i=0; i<wordFreq.nWords; i++) {
+ for(int j=0; j<wordFreq.after[i].size(); j++)
+ w.setFreq(i,wordFreq.after[i][j].w,wordFreq.after[i][j].n);
+ }
w.testFull();
w.mindestAnzahl = wordFreq.mindestAnzahl;
if(words)
p->words = new leda_array<string>(*words);
- for(i=0;i<wordFreq.nWords;i++)
- {
- p->setKatOfWord(i,katOfWord(i));
- p->initLike[i]=initLike[i];
- }
+ for(i=0; i<wordFreq.nWords; i++) {
+ p->setKatOfWord(i,katOfWord(i));
+ p->initLike[i]=initLike[i];
+ }
p->setValuesFrom(this);
return p;
}
double KategProblem::nicevalue(double val)
-
+
{
double v;
if( val!=1e100)
@@ -279,79 +262,65 @@ double KategProblem::nicevalue(double val)
}
void KategProblem::makeKats()
-
+
{
if(kats)delete kats;
kats = new leda_array<intSet>(katFreq.nKats);
- for(int i=0;i<wordFreq.nWords;i++)
+ for(int i=0; i<wordFreq.nWords; i++)
(*kats)[katOfWord(i)].insert(i);
}
void KategProblem::dumpInfos(ostream &strm)
-
+
{
strm << ";KategProblem:";
- strm << "cats: " << katFreq.nKats-2 << " words: " << wordFreq.nWords
- << endl;
+ strm << "cats: " << katFreq.nKats-2 << " words: " << wordFreq.nWords
+ << endl;
}
void KategProblem::dumpOn(ostream &strm)
-
+
{
writeClasses(_katOfWord,*this,strm);
- if(PrintBestTo2)
- {
- dumpInfos(*PrintBestTo2);
- makeKats();
- if( kats==0 )
- {
- if( words==0 )
- {
- for(int i=0;i<wordFreq.nWords;i++)
- {
- *PrintBestTo2 << i << ":" << katOfWord(i) << " ";
- }
- }
- else
- {
- for(int i=0;i<wordFreq.nWords;i++)
- *PrintBestTo2 << (*words)[i] << ":" << katOfWord(i) << " ";
- }
- }
- else
- {
- int anzkat=0;
- for(int i=0;i<katFreq.nKats;i++)
- {
- int printed=0;
- *PrintBestTo2 << i << ":";
- leda_set<int>&theSet = (*kats)[i];
- if( words==0 )
- {
- int nr=0;
- forall_set(leda_set<int>,nr,theSet)
- {
- *PrintBestTo2 << nr << ", ";
- printed=1;
- }
- }
- else
- {
- int nr=0;
- forall_set(leda_set<int>,nr,theSet)
- {
- *PrintBestTo2 << (*words)[nr]<< ",";
- printed=1;
- }
- }
- if(printed==1)anzkat++;
- *PrintBestTo2 << endl;
- }
- *PrintBestTo2 << ";I have " << anzkat << " categories used.\n";
- }
- *PrintBestTo2 << endl;
- Problem::dumpOn(*PrintBestTo2);
+ if(PrintBestTo2) {
+ dumpInfos(*PrintBestTo2);
+ makeKats();
+ if( kats==0 ) {
+ if( words==0 ) {
+ for(int i=0; i<wordFreq.nWords; i++) {
+ *PrintBestTo2 << i << ":" << katOfWord(i) << " ";
+ }
+ } else {
+ for(int i=0; i<wordFreq.nWords; i++)
+ *PrintBestTo2 << (*words)[i] << ":" << katOfWord(i) << " ";
+ }
+ } else {
+ int anzkat=0;
+ for(int i=0; i<katFreq.nKats; i++) {
+ int printed=0;
+ *PrintBestTo2 << i << ":";
+ leda_set<int>&theSet = (*kats)[i];
+ if( words==0 ) {
+ int nr=0;
+ forall_set(leda_set<int>,nr,theSet) {
+ *PrintBestTo2 << nr << ", ";
+ printed=1;
+ }
+ } else {
+ int nr=0;
+ forall_set(leda_set<int>,nr,theSet) {
+ *PrintBestTo2 << (*words)[nr]<< ",";
+ printed=1;
+ }
+ }
+ if(printed==1)anzkat++;
+ *PrintBestTo2 << endl;
+ }
+ *PrintBestTo2 << ";I have " << anzkat << " categories used.\n";
}
+ *PrintBestTo2 << endl;
+ Problem::dumpOn(*PrintBestTo2);
+ }
}
@@ -360,7 +329,7 @@ void KategProblem::dumpOn(ostream &strm)
const char *KategProblem::getString(int i)
-
+
{
if(words==0)
return "<>";
@@ -374,7 +343,7 @@ string KategProblem::getTheString(int i)
}
int KategProblem::maxNonBetterIterations()
-
+
{
if(katwahl()==K_BEST)
return wordFreq.nTranspWords;
@@ -385,7 +354,7 @@ int KategProblem::maxNonBetterIterations()
int KategProblem::expectedNumberOfIterations()
{
-
+
if(katwahl()==K_BEST)
return 10*wordFreq.nTranspWords;
else
@@ -393,215 +362,202 @@ int KategProblem::expectedNumberOfIterations()
}
void KategProblem::makeTitle(char x[512])
-
+
{
const char *ww;
const char *kw;
const char *in;
- switch(wortwahl())
- {
- case W_RAN:
- ww="zufaellig";
- break;
- case W_DET_DECR:
- ww="absteigend";
- break;
- case W_DET_INCR:
- ww="aufsteigend";
- break;
- default:
- cerr << "Error: unknown word selection\n";
- exit(1);
- }
- switch(katwahl())
- {
- case K_DET:
- kw="rotierend";
- break;
- case K_RAN:
- kw="zufaellig";
- break;
- case K_BEST:
- kw="best ";
- break;
- default:
- cout << "Error: unknown cagegory selection\n";
- exit(1);
- }
- switch(initialisierung)
- {
- case INIT_RAN:
- in="zufaellig ";
- break;
- case INIT_AIO:
- in="all-in-one";
- break;
- case INIT_LWRW:
- in="lwrw ";
- break;
- case INIT_FREQ:
- in="freq ";
- break;
- case INIT_OTHER:
- in="other ";
- break;
- default:
- cout << "Error: unknown initialization\n";
- exit(1);
- }
+ switch(wortwahl()) {
+ case W_RAN:
+ ww="zufaellig";
+ break;
+ case W_DET_DECR:
+ ww="absteigend";
+ break;
+ case W_DET_INCR:
+ ww="aufsteigend";
+ break;
+ default:
+ cerr << "Error: unknown word selection\n";
+ exit(1);
+ }
+ switch(katwahl()) {
+ case K_DET:
+ kw="rotierend";
+ break;
+ case K_RAN:
+ kw="zufaellig";
+ break;
+ case K_BEST:
+ kw="best ";
+ break;
+ default:
+ cout << "Error: unknown cagegory selection\n";
+ exit(1);
+ }
+ switch(initialisierung) {
+ case INIT_RAN:
+ in="zufaellig ";
+ break;
+ case INIT_AIO:
+ in="all-in-one";
+ break;
+ case INIT_LWRW:
+ in="lwrw ";
+ break;
+ case INIT_FREQ:
+ in="freq ";
+ break;
+ case INIT_OTHER:
+ in="other ";
+ break;
+ default:
+ cout << "Error: unknown initialization\n";
+ exit(1);
+ }
sprintf(x,"(c:%d,w:%d(%d),ww:%s,kw:%s,in:%s)",katFreq.nKats,wordFreq.nWords,
- wordFreq.nTranspWords,ww,kw,in);
+ wordFreq.nTranspWords,ww,kw,in);
}
-
-
-
+
+
+
int KategProblem::_change(ProblemChange **p)
-
+
{
*p=0;
int word=curDimension();
- switch( wortwahl() )
- {
- case W_RAN:
- word=(*(wordFreq.absteigend))[randomInt(wordFreq.nTranspWords)];
- break;
- case W_DET_DECR:
- word=(*(wordFreq.absteigend))[word];
- break;
- case W_DET_INCR:
- word=(*(wordFreq.absteigend))[wordFreq.nTranspWords-word-1];
- break;
- default:
- cerr << "Error: Unknown word selection\n";
- exit(1);
- }
+ switch( wortwahl() ) {
+ case W_RAN:
+ word=(*(wordFreq.absteigend))[randomInt(wordFreq.nTranspWords)];
+ break;
+ case W_DET_DECR:
+ word=(*(wordFreq.absteigend))[word];
+ break;
+ case W_DET_INCR:
+ word=(*(wordFreq.absteigend))[wordFreq.nTranspWords-word-1];
+ break;
+ default:
+ cerr << "Error: Unknown word selection\n";
+ exit(1);
+ }
+
+ int kat=curDimensionVal()+2;
+ switch( katwahl() ) {
+ case K_RAN:
+ kat=randomInt(katFreq.nKats-2)+2;
+
+ case K_DET:
+
+
+ if( kat==katOfWord(word)||(katWasEmpty&&katFreq.n1(kat)==0) )
+ return 0;
+ else if( wordFreq.minIndex[word]>0 && wordFreq.maxIndex[word]>0 && (kat<wordFreq.minIndex[word]||kat>wordFreq.maxIndex[word])) {
- int kat=curDimensionVal()+2;
- switch( katwahl() )
- {
- case K_RAN:
- kat=randomInt(katFreq.nKats-2)+2;
-
- case K_DET:
-
-
- if( kat==katOfWord(word)||(katWasEmpty&&katFreq.n1(kat)==0) )
- return 0;
- else if( wordFreq.minIndex[word]>0 && wordFreq.maxIndex[word]>0 && (kat<wordFreq.minIndex[word]||kat>wordFreq.maxIndex[word]))
- {
-
- return 0;
- }
- else
- {
- KategProblemChange *c = new KategProblemChange;
- c->toKat=kat;
- c->word=word;
- c->fromKat=katOfWord(c->word);
- massert( c->toKat < katFreq.nKats );
- massert( c->fromKat < katFreq.nKats );
- massert( c->word < wordFreq.nWords );
- massert( c->toKat!=0 && c->toKat!=1 );
- massert( c->fromKat!=0 && c->fromKat!=1 );
- if(katFreq.n1(kat)==0)
- katWasEmpty=1;
- *p=c;
- return 1;
- }
- break;
- case K_BEST:
- {
- fillNWG(word);
- double smallest=1e100;
- KategProblemChange &smallestChange = *new KategProblemChange;
- short withEmpty=0;
-
-
- int startKat=2;
- int endKat=katFreq.nKats;
- if( wordFreq.minIndex[word]>0&&wordFreq.maxIndex[word]>0 )
- {
- startKat = max(2,wordFreq.minIndex[word]);
- endKat = min(katFreq.nKats,wordFreq.maxIndex[word]+1);
- }
- for(kat=startKat;kat<endKat;kat++)
- {
- if( kat!=katOfWord(word) && (withEmpty==0 || katFreq.n1(kat)
- || katFreq.n2(kat)) )
- {
- KategProblemChange c;
- c.toKat=kat;
- c.word=word;
- c.fromKat=katOfWord(word);
- double n=_valueChange(c);
- if(n<smallest)
- {
- smallest=n;
- smallestChange=c;
- }
- }
- if( katFreq.n1(kat)==0 && katFreq.n2(kat)==0 )
- withEmpty=1;
- }
- massert(smallest!=1e100);
- *p= &smallestChange;
- return 1;
- }
- break;
- default:
- cerr << "Error: Unknown category selection\n";
- exit(1);
return 0;
+ } else {
+ KategProblemChange *c = new KategProblemChange;
+ c->toKat=kat;
+ c->word=word;
+ c->fromKat=katOfWord(c->word);
+ massert( c->toKat < katFreq.nKats );
+ massert( c->fromKat < katFreq.nKats );
+ massert( c->word < wordFreq.nWords );
+ massert( c->toKat!=0 && c->toKat!=1 );
+ massert( c->fromKat!=0 && c->fromKat!=1 );
+ if(katFreq.n1(kat)==0)
+ katWasEmpty=1;
+ *p=c;
+ return 1;
+ }
+ break;
+ case K_BEST: {
+ fillNWG(word);
+ double smallest=1e100;
+ KategProblemChange &smallestChange = *new KategProblemChange;
+ short withEmpty=0;
+
+
+ int startKat=2;
+ int endKat=katFreq.nKats;
+ if( wordFreq.minIndex[word]>0&&wordFreq.maxIndex[word]>0 ) {
+ startKat = max(2,wordFreq.minIndex[word]);
+ endKat = min(katFreq.nKats,wordFreq.maxIndex[word]+1);
}
+ for(kat=startKat; kat<endKat; kat++) {
+ if( kat!=katOfWord(word) && (withEmpty==0 || katFreq.n1(kat)
+ || katFreq.n2(kat)) ) {
+ KategProblemChange c;
+ c.toKat=kat;
+ c.word=word;
+ c.fromKat=katOfWord(word);
+ double n=_valueChange(c);
+ if(n<smallest) {
+ smallest=n;
+ smallestChange=c;
+ }
+ }
+ if( katFreq.n1(kat)==0 && katFreq.n2(kat)==0 )
+ withEmpty=1;
+ }
+ massert(smallest!=1e100);
+ *p= &smallestChange;
+ return 1;
+ }
+ break;
+ default:
+ cerr << "Error: Unknown category selection\n";
+ exit(1);
+ return 0;
+ }
}
void KategProblem::_doChange(ProblemChange &c)
-
+
{
KategProblemChange &k=*(KategProblemChange *)&c;
putWord(k.word,k.toKat);
-
+
}
void KategProblem::_undoChange(ProblemChange &c)
-
+
{
KategProblemChange &k=*(KategProblemChange *)&c;
putWord(k.word,k.fromKat);
-
+
}
void KategProblem::incrementDirection()
-
+
{
Problem::incrementDirection();
- katWasEmpty=0;
+ katWasEmpty=0;
massert( _maxComp==wordFreq.nTranspWords );
}
double KategProblem::_value()
-
+
{
-
+
return katFreq.fullBewertung(auswertung);
}
double mkat_h_full(int n,double tf)
{
-
-
+
+
if( tf>0 )
return n*log(tf);
- else
+ else
return 0.0;
}
double mkat_h_part(int n,double cf)
{
-
-
+
+
if( cf>0.0 )
return n*log(cf);
else
@@ -628,210 +584,194 @@ double KategProblem::kat_h_part(double n)
return mkat_h_part((int)n,verfaelsche(n,sigmaVerfaelschung));
}
-
-
-
+
+
+
double KategProblem::nmo_my(int i,int j)
-
+
{
FreqType n=nstrich(i,j),k=katFreq.n(i,j);
return kat_h_full(n+k)-kat_h_full(k);
}
double KategProblem::nmo(int i,int j)
-
+
{
FreqType n=nstrich(i,j),k=katFreq.n(i,j);
return kat_h(n+k)-kat_h(k);
}
double KategProblem::nmo_lo(int i,int j,int &e0,int &e1)
-
+
{
FreqType kij=katFreq.n(i,j);
FreqType nij=nstrich(i,j)+kij;
- if( kij!=nij)
- {
- if( nij==0 )
- e0++;
- else if(nij==1)
- e1++;
- if( kij==0 )
- e0--;
- else if(kij==1)
- e1--;
- }
+ if( kij!=nij) {
+ if( nij==0 )
+ e0++;
+ else if(nij==1)
+ e1++;
+ if( kij==0 )
+ e0--;
+ else if(kij==1)
+ e1--;
+ }
return nij*kat_mlog(nij-1-rhoLo)-kij*kat_mlog(kij-1-rhoLo);
}
double KategProblem::_valueChange(KategProblemChange &k)
-
+
{
double v=0;
int i=0;
-
+
ursprung=k.fromKat;
ziel=k.toKat;
- if( auswertung==CRITERION_LO )
- {
- int e0a=katFreq.eta0,e1a=katFreq.eta1;
- v-=nmo_lo(ursprung,ursprung,e0a,e1a)+nmo_lo(ziel,ziel,e0a,e1a)
- +nmo_lo(ursprung,ziel,e0a,e1a)+nmo_lo(ziel,ursprung,e0a,e1a);
- i=0;
- while(i<nwg.anzNot0)
- {
- int cl=nwg.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_lo(ursprung,cl,e0a,e1a)+nmo_lo(ziel,cl,e0a,e1a);
- i++;
- }
- i=0;
- while(i<ngw.anzNot0)
- {
- int cl=ngw.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_lo(cl,ursprung,e0a,e1a)+nmo_lo(cl,ziel,e0a,e1a);
- i++;
- }
-
- v+=kat_hlo(katFreq.n1(ursprung)-wordFreq.n1(k.word))
- -kat_hlo(katFreq.n1(ursprung))
- +kat_hlo(katFreq.n2(ursprung)-wordFreq.n2(k.word))
- -kat_hlo(katFreq.n2(ursprung))
- +kat_hlo(katFreq.n1(ziel)+wordFreq.n1(k.word))
- -kat_hlo(katFreq.n1(ziel))
- +kat_hlo(katFreq.n2(ziel)+wordFreq.n2(k.word))
- -kat_hlo(katFreq.n2(ziel));
-
- int old0=katFreq.c1_0*katFreq.nKats+katFreq.c2_0*katFreq.nKats
- -katFreq.c1_0*katFreq.c2_0;
- int nc1_0=katFreq.c1_0,nc2_0=katFreq.c2_0;
- if( wordFreq.n1(k.word)>0 && katFreq.n1(ursprung)==wordFreq.n1(k.word) )
- nc1_0++;
- if( wordFreq.n2(k.word)>0 && katFreq.n2(ursprung)==wordFreq.n2(k.word) )
- nc2_0++;
- if( wordFreq.n1(k.word)>0 && katFreq.n1(ziel)==0 ) nc1_0--;
- if( wordFreq.n2(k.word)>0 && katFreq.n2(ziel)==0 ) nc2_0--;
- int new0=nc1_0*katFreq.nKats+nc2_0*katFreq.nKats-nc1_0*nc2_0;
- v-=kat_etaFkt(e0a,e1a,new0,katFreq.nKats)
- -kat_etaFkt(katFreq.eta0,katFreq.eta1,old0,katFreq.nKats);
- vassert(NULLFLOAT(Problem::valueChange(k)-v));
+ if( auswertung==CRITERION_LO ) {
+ int e0a=katFreq.eta0,e1a=katFreq.eta1;
+ v-=nmo_lo(ursprung,ursprung,e0a,e1a)+nmo_lo(ziel,ziel,e0a,e1a)
+ +nmo_lo(ursprung,ziel,e0a,e1a)+nmo_lo(ziel,ursprung,e0a,e1a);
+ i=0;
+ while(i<nwg.anzNot0) {
+ int cl=nwg.not0[i];
+ if( cl!= ursprung && cl!=ziel )
+ v -= nmo_lo(ursprung,cl,e0a,e1a)+nmo_lo(ziel,cl,e0a,e1a);
+ i++;
}
- else if(auswertung==CRITERION_ML)
- {
- v-=nmo(ursprung,ursprung)+nmo(ziel,ziel)
- +nmo(ursprung,ziel)+nmo(ziel,ursprung);
- i=0;
- while(i<nwg.anzNot0)
- {
- int cl=nwg.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo(ursprung,cl)+nmo(ziel,cl);
- i++;
- }
- i=0;
- while(i<ngw.anzNot0)
- {
- int cl=ngw.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo(cl,ursprung)+nmo(cl,ziel);
- i++;
- }
- v+=kat_h(katFreq.n1(ursprung)-wordFreq.n1(k.word))
- -kat_h(katFreq.n1(ursprung))
- +kat_h(katFreq.n2(ursprung)-wordFreq.n2(k.word))
- -kat_h(katFreq.n2(ursprung))
- +kat_h(katFreq.n1(ziel)+wordFreq.n1(k.word))
- -kat_h(katFreq.n1(ziel))
- +kat_h(katFreq.n2(ziel)+wordFreq.n2(k.word))
- -kat_h(katFreq.n2(ziel));
+ i=0;
+ while(i<ngw.anzNot0) {
+ int cl=ngw.not0[i];
+ if( cl!= ursprung && cl!=ziel )
+ v -= nmo_lo(cl,ursprung,e0a,e1a)+nmo_lo(cl,ziel,e0a,e1a);
+ i++;
}
- else if( auswertung==CRITERION_MY )
- {
- v-=nmo_my(ursprung,ursprung)+nmo_my(ziel,ziel)
- +nmo_my(ursprung,ziel)+nmo_my(ziel,ursprung);
- i=0;
- while(i<nwg.anzNot0)
- {
- int cl=nwg.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_my(ursprung,cl)+nmo_my(ziel,cl);
- i++;
- }
- i=0;
- while(i<ngw.anzNot0)
- {
- int cl=ngw.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_my(cl,ursprung)+nmo_my(cl,ziel);
- i++;
- }
- v+=kat_h_part(katFreq.n1(ursprung)-wordFreq.n1(k.word))
- -kat_h_part(katFreq.n1(ursprung))
- +kat_h_part(katFreq.n2(ursprung)-wordFreq.n2(k.word))
- -kat_h_part(katFreq.n2(ursprung))
- +kat_h_part(katFreq.n1(ziel)+wordFreq.n1(k.word))
- -kat_h_part(katFreq.n1(ziel))
- +kat_h_part(katFreq.n2(ziel)+wordFreq.n2(k.word))
- -kat_h_part(katFreq.n2(ziel));
- double bishZusatz = katFreq.myCriterionTerm();
- _doChange(k);
- double neuZusatz = katFreq.myCriterionTerm();
- _undoChange(k);
- if(verboseMode>2)
- cout << "ZUSATZ: " << bishZusatz << " " << neuZusatz << " " <<neuZusatz-bishZusatz<<" " << v << endl;
- v+=neuZusatz-bishZusatz;
+
+ v+=kat_hlo(katFreq.n1(ursprung)-wordFreq.n1(k.word))
+ -kat_hlo(katFreq.n1(ursprung))
+ +kat_hlo(katFreq.n2(ursprung)-wordFreq.n2(k.word))
+ -kat_hlo(katFreq.n2(ursprung))
+ +kat_hlo(katFreq.n1(ziel)+wordFreq.n1(k.word))
+ -kat_hlo(katFreq.n1(ziel))
+ +kat_hlo(katFreq.n2(ziel)+wordFreq.n2(k.word))
+ -kat_hlo(katFreq.n2(ziel));
+
+ int old0=katFreq.c1_0*katFreq.nKats+katFreq.c2_0*katFreq.nKats
+ -katFreq.c1_0*katFreq.c2_0;
+ int nc1_0=katFreq.c1_0,nc2_0=katFreq.c2_0;
+ if( wordFreq.n1(k.word)>0 && katFreq.n1(ursprung)==wordFreq.n1(k.word) )
+ nc1_0++;
+ if( wordFreq.n2(k.word)>0 && katFreq.n2(ursprung)==wordFreq.n2(k.word) )
+ nc2_0++;
+ if( wordFreq.n1(k.word)>0 && katFreq.n1(ziel)==0 ) nc1_0--;
+ if( wordFreq.n2(k.word)>0 && katFreq.n2(ziel)==0 ) nc2_0--;
+ int new0=nc1_0*katFreq.nKats+nc2_0*katFreq.nKats-nc1_0*nc2_0;
+ v-=kat_etaFkt(e0a,e1a,new0,katFreq.nKats)
+ -kat_etaFkt(katFreq.eta0,katFreq.eta1,old0,katFreq.nKats);
+ vassert(NULLFLOAT(Problem::valueChange(k)-v));
+ } else if(auswertung==CRITERION_ML) {
+ v-=nmo(ursprung,ursprung)+nmo(ziel,ziel)
+ +nmo(ursprung,ziel)+nmo(ziel,ursprung);
+ i=0;
+ while(i<nwg.anzNot0) {
+ int cl=nwg.not0[i];
+ if( cl!= ursprung && cl!=ziel )
+ v -= nmo(ursprung,cl)+nmo(ziel,cl);
+ i++;
}
- else
- {
- cerr << "Fatal error: Unknown criterion: '"<<auswertung<<"'\n";
+ i=0;
+ while(i<ngw.anzNot0) {
+ int cl=ngw.not0[i];
+ if( cl!= ursprung && cl!=ziel )
+ v -= nmo(cl,ursprung)+nmo(cl,ziel);
+ i++;
}
+ v+=kat_h(katFreq.n1(ursprung)-wordFreq.n1(k.word))
+ -kat_h(katFreq.n1(ursprung))
+ +kat_h(katFreq.n2(ursprung)-wordFreq.n2(k.word))
+ -kat_h(katFreq.n2(ursprung))
+ +kat_h(katFreq.n1(ziel)+wordFreq.n1(k.word))
+ -kat_h(katFreq.n1(ziel))
+ +kat_h(katFreq.n2(ziel)+wordFreq.n2(k.word))
+ -kat_h(katFreq.n2(ziel));
+ } else if( auswertung==CRITERION_MY ) {
+ v-=nmo_my(ursprung,ursprung)+nmo_my(ziel,ziel)
+ +nmo_my(ursprung,ziel)+nmo_my(ziel,ursprung);
+ i=0;
+ while(i<nwg.anzNot0) {
+ int cl=nwg.not0[i];
+ if( cl!= ursprung && cl!=ziel )
+ v -= nmo_my(ursprung,cl)+nmo_my(ziel,cl);
+ i++;
+ }
+ i=0;
+ while(i<ngw.anzNot0) {
+ int cl=ngw.not0[i];
+ if( cl!= ursprung && cl!=ziel )
+ v -= nmo_my(cl,ursprung)+nmo_my(cl,ziel);
+ i++;
+ }
+ v+=kat_h_part(katFreq.n1(ursprung)-wordFreq.n1(k.word))
+ -kat_h_part(katFreq.n1(ursprung))
+ +kat_h_part(katFreq.n2(ursprung)-wordFreq.n2(k.word))
+ -kat_h_part(katFreq.n2(ursprung))
+ +kat_h_part(katFreq.n1(ziel)+wordFreq.n1(k.word))
+ -kat_h_part(katFreq.n1(ziel))
+ +kat_h_part(katFreq.n2(ziel)+wordFreq.n2(k.word))
+ -kat_h_part(katFreq.n2(ziel));
+ double bishZusatz = katFreq.myCriterionTerm();
+ _doChange(k);
+ double neuZusatz = katFreq.myCriterionTerm();
+ _undoChange(k);
+ if(verboseMode>2)
+ cout << "ZUSATZ: " << bishZusatz << " " << neuZusatz << " " <<neuZusatz-bishZusatz<<" " << v << endl;
+ v+=neuZusatz-bishZusatz;
+ } else {
+ cerr << "Fatal error: Unknown criterion: '"<<auswertung<<"'\n";
+ }
vassert( NULLFLOAT(Problem::valueChange(k)-v) );
return v;
}
void KategProblem::fillNWG(int w)
-
+
{
if(nwgWord==w)
- return;
- else
- {
- Array<OneFreq> &after=wordFreq.after[w];
- int size=after.size(),i;
- nww=0;
- nwg.init();
- for(i=0;i<size;i++)
- {
- nwg.addFreq(katOfWord(after[i].w),after[i].n);
- if(after[i].w==w)
- nww=after[i].n;
- }
-
- Array<OneFreq> &before=wordFreq.before[w];
- size=before.size();
- ngw.init();
- for(i=0;i<size;i++)
- ngw.addFreq(katOfWord(before[i].w),before[i].n);
- nwgWord=w;
+ return;
+ else {
+ Array<OneFreq> &after=wordFreq.after[w];
+ int size=after.size(),i;
+ nww=0;
+ nwg.init();
+ for(i=0; i<size; i++) {
+ nwg.addFreq(katOfWord(after[i].w),after[i].n);
+ if(after[i].w==w)
+ nww=after[i].n;
}
+
+ Array<OneFreq> &before=wordFreq.before[w];
+ size=before.size();
+ ngw.init();
+ for(i=0; i<size; i++)
+ ngw.addFreq(katOfWord(before[i].w),before[i].n);
+ nwgWord=w;
+ }
}
void KategProblem::vnstrich(int i,int j)
-
+
{
cout << ".) " << katFreq.n(i,j) << " ";
if( i==ursprung )
cout << "a) "<<-nwg.getFreq(j) << " ";
if( i==ziel )
cout << "b) " <<nwg.getFreq(j) << " ";
-
+
if( j==ursprung )
cout << "c) " <<-ngw.getFreq(i) << " ";
if( j==ziel )
cout << "d) " <<+ngw.getFreq(i) << " " ;
-
+
if( i==ursprung && j==ursprung )
cout << "e) " <<+nww << " ";
if( i==ziel && j==ziel )
@@ -847,14 +787,14 @@ void KategProblem::vnstrich(int i,int j)
void KategProblem::fastPutWord(int word,int toKat)
-
+
{
massert(toKat>=0 && toKat<katFreq.nKats);
-
-
-
+
+
+
if( wordFreq.fixedWord[word]>=0 )
- toKat=wordFreq.fixedWord[word];
+ toKat=wordFreq.fixedWord[word];
massert(katOfWord(word)==-1);
setKatOfWord(word,toKat);
}
@@ -863,25 +803,24 @@ void KategProblem::fixInitLike()
{
int fixed=0,fixed2=0;
over_arr(initLike,i)
- if(initLike[i]>=0 )
- {
- fixed++;
- if( initLike[i]>=wordFreq.minIndex[i] || initLike[i]==1 )
- wordFreq.fixedWord[i]=initLike[i];
- else
- {
- wordFreq.fixedWord[i]=wordFreq.minIndex[i]+initLike[i]-2;
- fixed2++;
- }
- initLike[i]=-1;
- }
+ if(initLike[i]>=0 ) {
+ fixed++;
+ if( initLike[i]>=wordFreq.minIndex[i] || initLike[i]==1 )
+ wordFreq.fixedWord[i]=initLike[i];
+ else {
+ wordFreq.fixedWord[i]=wordFreq.minIndex[i]+initLike[i]-2;
+ fixed2++;
+ }
+ initLike[i]=-1;
+ }
cout << "Fixed from file are: " << fixed << " " << fixed2 << " words.\n";
}
void KategProblem::putWord(int word,int toKat)
-
+
{
- massert(toKat!=0);massert(toKat!=1);
+ massert(toKat!=0);
+ massert(toKat!=1);
massert(word<wordFreq.nWords);
massert(toKat<katFreq.nKats);
massert(wordFreq.fixedWord[word]<0);
@@ -893,66 +832,64 @@ void KategProblem::putWord(int word,int toKat)
int nBef=bef.size();
int i;
if(verboseMode>4)
- cout << "putWord(" << word << "," << toKat << ")" << k << " nAft"
- << nAft << " nBef" << nBef << " k" << k << "\n";
+ cout << "putWord(" << word << "," << toKat << ")" << k << " nAft"
+ << nAft << " nBef" << nBef << " k" << k << "\n";
- massert( k!=-1 );
+ massert( k!=-1 );
massert( k!=toKat );
- for(i=0;i<nAft;i++)
- {
- katFreq.addN(k,katOfWord(aft[i].w),-aft[i].n);
+ for(i=0; i<nAft; i++) {
+ katFreq.addN(k,katOfWord(aft[i].w),-aft[i].n);
+ if(verboseMode>4)
+ cout << k << " " << katOfWord(aft[i].w) << " " << -aft[i].n << endl;
+ }
+ for(i=0; i<nBef; i++)
+ if( bef[i].w!=word ) {
+ katFreq.addN(katOfWord(bef[i].w),k,-bef[i].n);
if(verboseMode>4)
- cout << k << " " << katOfWord(aft[i].w) << " " << -aft[i].n << endl;
+ cout << katOfWord(bef[i].w) << " " << k << " " << -bef[i].n << endl;
}
- for(i=0;i<nBef;i++)
- if( bef[i].w!=word )
- {
- katFreq.addN(katOfWord(bef[i].w),k,-bef[i].n);
- if(verboseMode>4)
- cout << katOfWord(bef[i].w) << " " << k << " " << -bef[i].n << endl;
- }
-
+
setKatOfWord(word,toKat);
- for(i=0;i<nAft;i++)
+ for(i=0; i<nAft; i++)
katFreq.addN(toKat,katOfWord(aft[i].w),aft[i].n);
- for(i=0;i<nBef;i++)
- if( bef[i].w!=word )
+ for(i=0; i<nBef; i++)
+ if( bef[i].w!=word )
katFreq.addN(katOfWord(bef[i].w),toKat,bef[i].n);
}
-
-
-
-static KategProblemChange theOneKategProblemChange;
+
+
+
+static KategProblemChange theOneKategProblemChange;
static int anzKategProblemChange=0;
void *KategProblemChange::operator new(size_t size)
-{
+{
anzKategProblemChange++;
massert(anzKategProblemChange>0);
massert(anzKategProblemChange<2);
if( anzKategProblemChange==1 )
return &theOneKategProblemChange;
- else
- {
- if( verboseMode>1 )
- cout << "generate instance of KategProblemChange: " << size
- << " " << anzKategProblemChange<< endl;
- return malloc(size);
- }
+ else {
+ if( verboseMode>1 )
+ cout << "generate instance of KategProblemChange: " << size
+ << " " << anzKategProblemChange<< endl;
+ return malloc(size);
+ }
}
-void KategProblemChange::operator delete(void *ptr,size_t
-)
-{ massert(size==sizeof(KategProblemChange));
+void KategProblemChange::operator delete(void *ptr,size_t
+ )
+{
+ massert(size==sizeof(KategProblemChange));
anzKategProblemChange--;
if( ptr!= &theOneKategProblemChange)
free(ptr);
@@ -960,9 +897,9 @@ void KategProblemChange::operator delete(void *ptr,size_t
-
-
-
+
+
+
@@ -998,4 +935,4 @@ int KategProblem::maxDimensionVal()
{
return _maxCompVal;
}
-
+
diff --git a/mgizapp/src/mkcls/KategProblem.h b/mgizapp/src/mkcls/KategProblem.h
index f040b85..4854816 100644
--- a/mgizapp/src/mkcls/KategProblem.h
+++ b/mgizapp/src/mkcls/KategProblem.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -33,9 +33,9 @@ USA.
#include <cstdlib>
#include "Problem.h"
-extern double rhoLo;
+extern double rhoLo;
-typedef int Kategory;
+typedef int Kategory;
typedef int Word;
@@ -47,22 +47,22 @@ typedef int FreqType;
#endif
-#include "KategProblemWBC.h"
+#include "KategProblemWBC.h"
-#include "KategProblemKBC.h"
+#include "KategProblemKBC.h"
-enum {
+enum {
INIT_RAN=1,
INIT_AIO=2,
INIT_LWRW=3,
INIT_FREQ=4,
INIT_OTHER=5
- };
+};
-enum {
+enum {
W_RAN=(8|16),
W_DET_DECR=(16),
W_DET_INCR =(32)
@@ -88,241 +88,245 @@ enum {
class NWG
{
- private:
- Array<FreqType> freq;
-
- Array<int> timeOfFreq;
-
-
-
-
- int curTime;
- public:
- NWG(int n);
- void init();
-
- int anzNot0;
-
-
- Array<int> not0;
-
- int word;
-
- inline void addFreq(int C,FreqType n);
-
- void sort();
-
- FreqType getFreq(int i)
- {
- if( timeOfFreq[i]==curTime )
- return freq[i];
- else
- return 0;
- };
+private:
+ Array<FreqType> freq;
+
+ Array<int> timeOfFreq;
+
+
+
+
+ int curTime;
+public:
+ NWG(int n);
+ void init();
+
+ int anzNot0;
+
+
+ Array<int> not0;
+
+ int word;
+
+ inline void addFreq(int C,FreqType n);
+
+ void sort();
+
+ FreqType getFreq(int i) {
+ if( timeOfFreq[i]==curTime )
+ return freq[i];
+ else
+ return 0;
+ };
};
inline void NWG::addFreq(int g,FreqType n)
{
if(timeOfFreq[g]==curTime)
freq[g]+=n;
- else
- {
- timeOfFreq[g]=curTime;
- freq[g]=n;
- not0[anzNot0++]=g;
- }
+ else {
+ timeOfFreq[g]=curTime;
+ freq[g]=n;
+ not0[anzNot0++]=g;
+ }
}
-struct KategProblemChange : public ProblemChange
-{
+struct KategProblemChange : public ProblemChange {
void *operator new(size_t size);
void operator delete(void *ptr,size_t size);
- int word;
- int toKat;
- int fromKat;
+ int word;
+ int toKat;
+ int fromKat;
};
class KategProblem : public Problem
{
- private:
+private:
double kat_h_full(int n);
double kat_h_full(double n);
double kat_h_part(int n);
double kat_h_part(double n);
double sigmaVerfaelschung;
short katWasEmpty;
-
-
+
+
int nwgWord;
-
- NWG nwg;
- NWG ngw;
- FreqType nww;
- int ursprung,ziel;
-
- Array<int> _katOfWord;
+ NWG nwg;
+ NWG ngw;
+ FreqType nww;
+
+ int ursprung,ziel;
+
+ Array<int> _katOfWord;
int _maxComp,_maxCompVal;
- double nmo_my(int i,int j);
- double nmo(int i,int j);
-
+ double nmo_my(int i,int j);
+ double nmo(int i,int j);
- double nmo_lo(int i,int j,int &e0,int &e1);
-
- void putWord(int word,int to);
-
+ double nmo_lo(int i,int j,int &e0,int &e1);
- void fastPutWord(int word,int to);
-
- void setKatOfWord(int w,int k)
-{
- if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) )
- {
+ void putWord(int word,int to);
+
+
+ void fastPutWord(int word,int to);
+
+
+ void setKatOfWord(int w,int k) {
+ if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) ) {
cout << "mkcls::setKatOfWord::ERROR: " << w << " " << k << " " << wordFreq.fixedWord[w] << " " << (*words)[w] << endl;
}
- _katOfWord[w]=k;
- nwgWord=-1;
-};
-
+ _katOfWord[w]=k;
+ nwgWord=-1;
+ };
+
+
+ void fillNWG(int w);
+
+
+ inline FreqType nstrich(int i,int j);
- void fillNWG(int w);
-
- inline FreqType nstrich(int i,int j);
-
+ void vnstrich(int i,int j);
- void vnstrich(int i,int j);
-
- protected:
- virtual int _change(ProblemChange **p);
-
+protected:
+ virtual int _change(ProblemChange **p);
- virtual void _doChange(ProblemChange &c);
-
- virtual void _undoChange(ProblemChange &c);
-
+ virtual void _doChange(ProblemChange &c);
- virtual double _value();
-
- double _valueChange(KategProblemChange &k);
-
+ virtual void _undoChange(ProblemChange &c);
- virtual void incrementDirection();
-
- virtual int maxDimensionVal(void) ;
-
+ virtual double _value();
+
+
+ double _valueChange(KategProblemChange &k);
+
+
+ virtual void incrementDirection();
+
+
+ virtual int maxDimensionVal(void) ;
+
+
+ virtual int maxDimension(void) ;
+
- virtual int maxDimension(void) ;
-
-
public:
leda_array<string> *words;
-typedef leda_set<int> intSet;
+ typedef leda_set<int> intSet;
+
+ leda_array<intSet> *kats;
+
+ KategProblemWBC wordFreq;
+ KategProblemKBC katFreq;
-leda_array<intSet> *kats;
-
- KategProblemWBC wordFreq;
- KategProblemKBC katFreq;
+ Array<int> initLike;
+
+ KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
+ int _nachbarschaft,int minw=0);
- Array<int> initLike;
-
- KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
- int _nachbarschaft,int minw=0);
-
virtual ~KategProblem();
-
- virtual void _initialize(int initTyp);
- virtual void _initialize(int initTyp,int specialFixedWord);
-
- virtual double valueChange(ProblemChange&c);
-
+ virtual void _initialize(int initTyp);
+ virtual void _initialize(int initTyp,int specialFixedWord);
+
+
+ virtual double valueChange(ProblemChange&c);
+
+
+ virtual Problem *makeEqualProblem();
+
+
+ virtual double nicevalue(double value=1e100);
+
+
+ void makeKats();
+
+
+ virtual void dumpOn(ostream &strm);
+
+
+ virtual void dumpInfos(ostream &strm);
+
+
+
+
+
+ inline void katwahl(int k);
+
+
+ inline void wortwahl(int w);
+
+
- virtual Problem *makeEqualProblem();
-
- virtual double nicevalue(double value=1e100);
-
- void makeKats();
-
+ inline int katOfWord(int w);
- virtual void dumpOn(ostream &strm);
-
- virtual void dumpInfos(ostream &strm);
-
+ inline short wortwahl();
-
-
-
- inline void katwahl(int k);
-
- inline void wortwahl(int w);
-
+ inline short katwahl() ;
-
-
-
- inline int katOfWord(int w);
-
- inline short wortwahl();
-
+ virtual int maxNonBetterIterations();
- inline short katwahl() ;
-
- virtual int maxNonBetterIterations();
-
+ virtual int expectedNumberOfIterations();
- virtual int expectedNumberOfIterations();
-
- const char *getString(int i);
- string getTheString(int i);
-
+ const char *getString(int i);
+ string getTheString(int i);
- void makeTitle(char x[512]);
+ void makeTitle(char x[512]);
- void fixInitLike();
+
+ void fixInitLike();
};
-inline int KategProblem::katOfWord(int w){return _katOfWord[w];};
-inline short KategProblem::wortwahl(){return nachbarschaft&CHOOSE_WORD;};
-inline short KategProblem::katwahl() {return nachbarschaft&CHOOSE_KAT;};
+inline int KategProblem::katOfWord(int w)
+{
+ return _katOfWord[w];
+};
+inline short KategProblem::wortwahl()
+{
+ return nachbarschaft&CHOOSE_WORD;
+};
+inline short KategProblem::katwahl()
+{
+ return nachbarschaft&CHOOSE_KAT;
+};
-inline void KategProblem::katwahl(int k)
- {
- nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k;
- if(k==K_BEST)
- _maxCompVal=1;
- else
- _maxCompVal=katFreq.nKats-2;
- };
+inline void KategProblem::katwahl(int k)
+{
+ nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k;
+ if(k==K_BEST)
+ _maxCompVal=1;
+ else
+ _maxCompVal=katFreq.nKats-2;
+};
-inline void KategProblem::wortwahl(int w)
- {
- nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w;
- };
+inline void KategProblem::wortwahl(int w)
+{
+ nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w;
+};
@@ -331,7 +335,7 @@ inline FreqType KategProblem::nstrich(int i,int j)
FreqType n=0;
if( i==ursprung )
- n-=nwg.getFreq(j);
+ n-=nwg.getFreq(j);
if( i==ziel )
n+=nwg.getFreq(j);
@@ -349,7 +353,7 @@ inline FreqType KategProblem::nstrich(int i,int j)
n-=nww;
if( i==ziel && j==ursprung )
n-=nww;
-
+
return n;
}
@@ -357,10 +361,10 @@ inline FreqType KategProblem::nstrich(int i,int j)
-#define MAX_H_TABLE 4000
+#define MAX_H_TABLE 4000
extern double h_table[],l_table[],hmy_table[],hmy_sigma;
-
+
inline double kat_mlog(double x)
{
if(x<=1e-9)
@@ -369,46 +373,41 @@ inline double kat_mlog(double x)
return log(x);
}
-
+
inline double kat_mlog(int s)
{
if(s<=0)
return 0;
- else if( s<MAX_H_TABLE )
- {
- massert( s==0 || l_table[s]==log(s) );
- return l_table[s];
- }
- else
+ else if( s<MAX_H_TABLE ) {
+ massert( s==0 || l_table[s]==log(s) );
+ return l_table[s];
+ } else
return log((double)(s));
}
-
+
inline double kat_hlo(int n)
{
return n*kat_mlog(n-1);
}
-
+
inline double kat_hlo(double n)
{
return n*kat_mlog(n-1);
}
-
+
inline double kat_h(int n)
{
massert(n>=-1);
if(n<=0)
return 0;
- else
- if(n<MAX_H_TABLE)
- {
- massert(n==0||fabs(h_table[n]-n*log((double)n))<1e-8);
- return h_table[n];
- }
- else
- return n*log((double)(n));
+ else if(n<MAX_H_TABLE) {
+ massert(n==0||fabs(h_table[n]-n*log((double)n))<1e-8);
+ return h_table[n];
+ } else
+ return n*log((double)(n));
}
inline double kat_h(double n)
{
@@ -418,7 +417,7 @@ inline double kat_h(double n)
return n*log(n);
}
-
+
inline double kat_etaFkt(int _e0,int e1,int immer0,int cats)
{
int e0 = _e0 - immer0;
diff --git a/mgizapp/src/mkcls/KategProblemKBC.cpp b/mgizapp/src/mkcls/KategProblemKBC.cpp
index ced780e..a973214 100644
--- a/mgizapp/src/mkcls/KategProblemKBC.cpp
+++ b/mgizapp/src/mkcls/KategProblemKBC.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -34,23 +34,20 @@ using namespace boost::math;
#endif
-double rhoLo=0.75;
+double rhoLo=0.75;
#define MAX_VERFAELSCHUNG 5000
double verfTab[MAX_VERFAELSCHUNG],verfTabSigma=-1.0;
double verfaelsche(int a,double b)
{
-
- if( a>=0&&verfTabSigma==b&&a<MAX_VERFAELSCHUNG )
- {
-
- massert(verfTab[a]== b*(erf(10000.0) - erf(a/b))/2+a);
- return verfTab[a];
- }
- else
- {
- double x = b*(erf(10000.0) - erf(a/b))/2+a;
- return x;
- }
+
+ if( a>=0&&verfTabSigma==b&&a<MAX_VERFAELSCHUNG ) {
+
+ massert(verfTab[a]== b*(erf(10000.0) - erf(a/b))/2+a);
+ return verfTab[a];
+ } else {
+ double x = b*(erf(10000.0) - erf(a/b))/2+a;
+ return x;
+ }
}
double verfaelsche(double,double b)
{
@@ -58,109 +55,98 @@ double verfaelsche(double,double b)
return b;
}
-KategProblemKBC::KategProblemKBC(int s,double sv) :
+KategProblemKBC::KategProblemKBC(int s,double sv) :
_n(s),_n1(s,0),_n2(s,0),sigmaVerfaelschung(sv),withVerfaelschung(sv!=0.0),
_nverf(s),_n1verf(s,0.0),_n2verf(s,0.0),_nWords(0),
eta0(s*s),eta1(0),c1_0(s),c2_0(s),
_bigramVerfSum(0.0),_unigramVerfSum1(0.0),_unigramVerfSum2(0.0),nKats(s)
-
-{
+
+{
verfInit0=0.0;
int i;
- if( withVerfaelschung )
- {
- verfInit0=verfaelsche(0,sv);
- cout << "VERFAELSCHUNG wird mitgefuehrt => LANGSAMER!!!\n";
- }
- for(i=0;i<s;i++)
- {
- _n[i].init(s,0);
- _nverf[i].init(s,verfInit0);
- _n1verf[i]=_n2verf[i]=verfInit0;
- _bigramVerfSum+=verfInit0*s;
- _unigramVerfSum1+=verfInit0;
- _unigramVerfSum2+=verfInit0;
- }
- if( withVerfaelschung )
- {
- cout << "VERFAELSCHUNG " << _bigramVerfSum << " " << _unigramVerfSum1 << " " << _unigramVerfSum2 << endl;
- }
+ if( withVerfaelschung ) {
+ verfInit0=verfaelsche(0,sv);
+ cout << "VERFAELSCHUNG wird mitgefuehrt => LANGSAMER!!!\n";
+ }
+ for(i=0; i<s; i++) {
+ _n[i].init(s,0);
+ _nverf[i].init(s,verfInit0);
+ _n1verf[i]=_n2verf[i]=verfInit0;
+ _bigramVerfSum+=verfInit0*s;
+ _unigramVerfSum1+=verfInit0;
+ _unigramVerfSum2+=verfInit0;
+ }
+ if( withVerfaelschung ) {
+ cout << "VERFAELSCHUNG " << _bigramVerfSum << " " << _unigramVerfSum1 << " " << _unigramVerfSum2 << endl;
+ }
verfTabSigma=sigmaVerfaelschung;
-
-
-
+
+
+
}
void KategProblemKBC::setN(int w1,int w2, FreqType n)
-
+
{
addN(w1,w2,-_n[w1][w2]);
addN(w1,w2,n);
}
-
+
double KategProblemKBC::fullBewertung(int auswertung)
{
-
+
double bewertung=0;
int c1,c2;
-
-
- switch( auswertung )
- {
- case CRITERION_ML:
- for(c1=0;c1<nKats;c1++)
- {
- for(c2=0;c2<nKats;c2++)
- bewertung-=kat_h(_n[c1][c2]);
- bewertung+=kat_h(_n1[c1])+kat_h(_n2[c1]);
- }
- break;
- case CRITERION_MY:
- {
- for(c1=0;c1<nKats;c1++)
- {
- for(c2=0;c2<nKats;c2++)
- bewertung-=mkat_h_full((int)n(c1,c2),nverf(c1,c2));
- bewertung+=mkat_h_part((int)(n1(c1)),n1verf(c1))+mkat_h_part((int)(n2(c1)),n2verf(c1));
- }
- double u1=_unigramVerfSum1-verfInit0*c1_0;
- double u2=_unigramVerfSum2-verfInit0*c2_0;
- double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
- if( verboseMode>1 )
- {
- cout << "CRITERION_MY: " << bewertung << endl;
- cout << "U1:"<<_unigramVerfSum1 << " n:"<<u1<< " "
- << "U2:"<<_unigramVerfSum2 << " n:"<<u2<< " "
- << "U3:"<<_bigramVerfSum << " n:"<<b<< endl;
- }
- if(b>0.000001)
- {
-
-
- if(verboseMode>1 )
- cout << " NEU: " <<_nWords*log( u1 * u2 / b ) << endl;
- bewertung -= _nWords*log( u1 * u2 / b );
- if(verboseMode>1)
- cout << "SCHLUSSBEWERTUNG: " << bewertung << endl;
- }
- else
- cout << "B zu klein " << b << endl;
- }
- break;
- case CRITERION_LO:
- for(c1=0;c1<nKats;c1++)
- {
- for(c2=0;c2<nKats;c2++)
- bewertung-=_n[c1][c2]*kat_mlog(_n[c1][c2]-1-rhoLo);
- bewertung+=_n1[c1]*kat_mlog(_n1[c1]-1)+_n2[c1]*kat_mlog(_n2[c1]-1);
- }
- bewertung-=kat_etaFkt(eta0,eta1,(c1_0*nKats+c2_0*nKats-c1_0*c2_0),nKats);
- break;
- default:
- cerr << "Error: wrong criterion " << auswertung << endl;
- exit(1);
+
+
+ switch( auswertung ) {
+ case CRITERION_ML:
+ for(c1=0; c1<nKats; c1++) {
+ for(c2=0; c2<nKats; c2++)
+ bewertung-=kat_h(_n[c1][c2]);
+ bewertung+=kat_h(_n1[c1])+kat_h(_n2[c1]);
+ }
+ break;
+ case CRITERION_MY: {
+ for(c1=0; c1<nKats; c1++) {
+ for(c2=0; c2<nKats; c2++)
+ bewertung-=mkat_h_full((int)n(c1,c2),nverf(c1,c2));
+ bewertung+=mkat_h_part((int)(n1(c1)),n1verf(c1))+mkat_h_part((int)(n2(c1)),n2verf(c1));
+ }
+ double u1=_unigramVerfSum1-verfInit0*c1_0;
+ double u2=_unigramVerfSum2-verfInit0*c2_0;
+ double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
+ if( verboseMode>1 ) {
+ cout << "CRITERION_MY: " << bewertung << endl;
+ cout << "U1:"<<_unigramVerfSum1 << " n:"<<u1<< " "
+ << "U2:"<<_unigramVerfSum2 << " n:"<<u2<< " "
+ << "U3:"<<_bigramVerfSum << " n:"<<b<< endl;
}
+ if(b>0.000001) {
+
+
+ if(verboseMode>1 )
+ cout << " NEU: " <<_nWords*log( u1 * u2 / b ) << endl;
+ bewertung -= _nWords*log( u1 * u2 / b );
+ if(verboseMode>1)
+ cout << "SCHLUSSBEWERTUNG: " << bewertung << endl;
+ } else
+ cout << "B zu klein " << b << endl;
+ }
+ break;
+ case CRITERION_LO:
+ for(c1=0; c1<nKats; c1++) {
+ for(c2=0; c2<nKats; c2++)
+ bewertung-=_n[c1][c2]*kat_mlog(_n[c1][c2]-1-rhoLo);
+ bewertung+=_n1[c1]*kat_mlog(_n1[c1]-1)+_n2[c1]*kat_mlog(_n2[c1]-1);
+ }
+ bewertung-=kat_etaFkt(eta0,eta1,(c1_0*nKats+c2_0*nKats-c1_0*c2_0),nKats);
+ break;
+ default:
+ cerr << "Error: wrong criterion " << auswertung << endl;
+ exit(1);
+ }
return bewertung;
}
@@ -168,19 +154,18 @@ double KategProblemKBC::myCriterionTerm()
{
iassert( withVerfaelschung );
double r;
- double u1=_unigramVerfSum1-verfInit0*c1_0;
- double u2=_unigramVerfSum2-verfInit0*c2_0;
- double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
-
-
- if( verboseMode>1 )
- {
- cout << "nwords divisor:"<<_nWords << " " << u1 * u2 / b << endl;
- cout << "ergebnis: "<<_nWords*log( u1 * u2 / b ) << endl;
- cout << "0: "<<c1_0 << endl;
- }
+ double u1=_unigramVerfSum1-verfInit0*c1_0;
+ double u2=_unigramVerfSum2-verfInit0*c2_0;
+ double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
+
+
+ if( verboseMode>1 ) {
+ cout << "nwords divisor:"<<_nWords << " " << u1 * u2 / b << endl;
+ cout << "ergebnis: "<<_nWords*log( u1 * u2 / b ) << endl;
+ cout << "0: "<<c1_0 << endl;
+ }
r = _nWords*log( u1 * u2 / b );
-
+
return -r;
}
@@ -190,8 +175,8 @@ double KategProblemKBC::myCriterionTerm()
double KategProblemKBC::bigramVerfSum()
{
double sum=0;
- for(int c1=0;c1<nKats;c1++)
- for(int c2=0;c2<nKats;c2++)
+ for(int c1=0; c1<nKats; c1++)
+ for(int c2=0; c2<nKats; c2++)
sum+=nverf(c1,c2);
cout << "BIGRAMVERFSUM: " << sum << endl;
return sum;
@@ -200,7 +185,7 @@ double KategProblemKBC::bigramVerfSum()
double KategProblemKBC::unigramVerfSum1()
{
double sum=0;
- for(int c1=0;c1<nKats;c1++)
+ for(int c1=0; c1<nKats; c1++)
sum+=n1verf(c1);
cout << "UNIGRAMVERFSUM1: " << sum << endl;
return sum;
@@ -209,10 +194,10 @@ double KategProblemKBC::unigramVerfSum1()
double KategProblemKBC::unigramVerfSum2()
{
double sum=0;
- for(int c1=0;c1<nKats;c1++)
+ for(int c1=0; c1<nKats; c1++)
sum+=n2verf(c1);
cout << "UNIGRAMVERFSUM2: " << sum << endl;
- return sum;
+ return sum;
}
diff --git a/mgizapp/src/mkcls/KategProblemKBC.h b/mgizapp/src/mkcls/KategProblemKBC.h
index 4bac62a..54a44a3 100644
--- a/mgizapp/src/mkcls/KategProblemKBC.h
+++ b/mgizapp/src/mkcls/KategProblemKBC.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -41,15 +41,15 @@ class KategProblemKBC
{
- friend class KategProblem;
-
- private:
- Array<FreqArray> _n;
- Array<FreqType> _n1;
-
- Array<FreqType> _n2;
-
-
+ friend class KategProblem;
+
+private:
+ Array<FreqArray> _n;
+ Array<FreqType> _n1;
+
+ Array<FreqType> _n2;
+
+
double sigmaVerfaelschung;
short withVerfaelschung;
@@ -58,100 +58,112 @@ class KategProblemKBC
Array<double> _n2verf;
FreqType _nWords;
- protected:
- int eta0;
- int eta1;
- int c1_0;
- int c2_0;
+protected:
+ int eta0;
+ int eta1;
+ int c1_0;
+ int c2_0;
double _bigramVerfSum;
double _unigramVerfSum1;
double _unigramVerfSum2;
double verfInit0;
- public:
- int nKats;
+public:
+ int nKats;
KategProblemKBC(int nKats,double sv);
-
+
double fullBewertung(int auswertung);
-
- FreqType n(int w1,int w2) { return _n[w1][w2]; };
-
- FreqType n1(int w) { return _n1[w];};
-
+ FreqType n(int w1,int w2) {
+ return _n[w1][w2];
+ };
+
+
+ FreqType n1(int w) {
+ return _n1[w];
+ };
+
+
+ FreqType n2(int w) {
+ return _n2[w];
+ };
+
- FreqType n2(int w) { return _n2[w];};
-
-
double bigramVerfSum();
double unigramVerfSum1();
double unigramVerfSum2();
- double nverf(int w1,int w2) { return _nverf[w1][w2]; }
-
- double n1verf(int w) { return _n1verf[w]; };
+ double nverf(int w1,int w2) {
+ return _nverf[w1][w2];
+ }
+
+ double n1verf(int w) {
+ return _n1verf[w];
+ };
- double n2verf(int w) { return _n2verf[w]; };
+ double n2verf(int w) {
+ return _n2verf[w];
+ };
inline void addN(int w1,int w2, FreqType n);
-
- void setN(int w1,int w2, FreqType n);
-
-
+
+ void setN(int w1,int w2, FreqType n);
+
+
double myCriterionTerm();
};
-inline void KategProblemKBC::addN(int w1,int w2, FreqType n)
+inline void KategProblemKBC::addN(int w1,int w2, FreqType n)
{
- if(n!=0)
- {
- FreqType &s= _n[w1][w2];
- if(s==0)
- eta0--;
- else if(s==1)
- eta1--;
- if(_n1[w1]==0)
- c1_0--;
- if(_n2[w2]==0)
- c2_0--;
-
- if(withVerfaelschung)
- {
- double verfOld=verfaelsche(s,sigmaVerfaelschung);
- double verfNew=verfaelsche(s+n,sigmaVerfaelschung);
- double verfOld1=verfaelsche(_n1[w1],sigmaVerfaelschung);
- assert(verfOld1==_n1verf[w1]);
- double verfNew1=verfaelsche(_n1[w1]+n,sigmaVerfaelschung);
- double verfOld2=verfaelsche(_n2[w2],sigmaVerfaelschung);
- assert(verfOld2==_n2verf[w2]);
- double verfNew2=verfaelsche(_n2[w2]+n,sigmaVerfaelschung);
- _n1verf[w1]=verfNew1;
- _unigramVerfSum1+=verfNew1-verfOld1;
- _n2verf[w2]=verfNew2;
- _unigramVerfSum2+=verfNew2-verfOld2;
- _nverf[w1][w2]=verfNew;
- _bigramVerfSum+=verfNew-verfOld;
- _nWords+=n;
- }
- s+=n;_n1[w1]+=n;_n2[w2]+=n;
-
- assert(_n[w1][w2]>=0);
- assert(_n1[w1]>=0);
- assert(_n2[w2]>=0);
-
- if(s==0)
- eta0++;
- else if(s==1)
- eta1++;
- if(_n1[w1]==0)
- c1_0++;
- if(_n2[w2]==0)
- c2_0++;
+ if(n!=0) {
+ FreqType &s= _n[w1][w2];
+ if(s==0)
+ eta0--;
+ else if(s==1)
+ eta1--;
+ if(_n1[w1]==0)
+ c1_0--;
+ if(_n2[w2]==0)
+ c2_0--;
+
+ if(withVerfaelschung) {
+ double verfOld=verfaelsche(s,sigmaVerfaelschung);
+ double verfNew=verfaelsche(s+n,sigmaVerfaelschung);
+ double verfOld1=verfaelsche(_n1[w1],sigmaVerfaelschung);
+ assert(verfOld1==_n1verf[w1]);
+ double verfNew1=verfaelsche(_n1[w1]+n,sigmaVerfaelschung);
+ double verfOld2=verfaelsche(_n2[w2],sigmaVerfaelschung);
+ assert(verfOld2==_n2verf[w2]);
+ double verfNew2=verfaelsche(_n2[w2]+n,sigmaVerfaelschung);
+ _n1verf[w1]=verfNew1;
+ _unigramVerfSum1+=verfNew1-verfOld1;
+ _n2verf[w2]=verfNew2;
+ _unigramVerfSum2+=verfNew2-verfOld2;
+ _nverf[w1][w2]=verfNew;
+ _bigramVerfSum+=verfNew-verfOld;
+ _nWords+=n;
}
+ s+=n;
+ _n1[w1]+=n;
+ _n2[w2]+=n;
+
+ assert(_n[w1][w2]>=0);
+ assert(_n1[w1]>=0);
+ assert(_n2[w2]>=0);
+
+ if(s==0)
+ eta0++;
+ else if(s==1)
+ eta1++;
+ if(_n1[w1]==0)
+ c1_0++;
+ if(_n2[w2]==0)
+ c2_0++;
+ }
};
#endif
diff --git a/mgizapp/src/mkcls/KategProblemTest.cpp b/mgizapp/src/mkcls/KategProblemTest.cpp
index 3084a0b..ed78e6f 100644
--- a/mgizapp/src/mkcls/KategProblemTest.cpp
+++ b/mgizapp/src/mkcls/KategProblemTest.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -53,26 +53,25 @@ char *strdup(char *a)
void writeClasses(Array<Kategory> &katOfWord,KategProblem &problem,ostream &to)
-{
- for(int i=0;i<katOfWord.size();i++)
- {
- if( strcmp(problem.getString(i),"$") )
- if( strcmp(problem.getString(i),"mkcls-mapped-dollar-symbol-$")==0 )
- to << "$" << "\t" << katOfWord[i] << endl;
- else
- to << problem.getString(i) << "\t" << katOfWord[i] << endl;
- }
+{
+ for(int i=0; i<katOfWord.size(); i++) {
+ if( strcmp(problem.getString(i),"$") )
+ if( strcmp(problem.getString(i),"mkcls-mapped-dollar-symbol-$")==0 )
+ to << "$" << "\t" << katOfWord[i] << endl;
+ else
+ to << problem.getString(i) << "\t" << katOfWord[i] << endl;
+ }
}
void mysplit(const string &s,string &s1,string &s2)
{
unsigned int i=0;
- for(;i<s.length();i++)if( s[i]==' ' || s[i]=='\t' || s[i]==' ')break;
+ for(; i<s.length(); i++)if( s[i]==' ' || s[i]=='\t' || s[i]==' ')break;
s1=s.substr(0,i);
- for(;i<s.length();i++)if( !(s[i]==' ' || s[i]=='\t' || s[i]==' ') )break;
+ for(; i<s.length(); i++)if( !(s[i]==' ' || s[i]=='\t' || s[i]==' ') )break;
s2=s.substr(i,s.length()-i);
-
+
iassert(s1.size());
iassert(s2.size());
}
@@ -84,164 +83,163 @@ int fromCatFile(KategProblem *p,const char *fname,bool verb)
leda_h_array<string,int> translation(-1);
int maxCat=2;
ifstream in(fname);
- if(!in)
- {
- cerr << "Error: File '" << fname << "' cannot be opened.\n";
- exit(1);
- }
- for(int i=0;i<p->wordFreq.nWords;i++)
+ if(!in) {
+ cerr << "Error: File '" << fname << "' cannot be opened.\n";
+ exit(1);
+ }
+ for(int i=0; i<p->wordFreq.nWords; i++)
(p->initLike)[i]= -1;
-
-
+
+
translation["1"]=1;
translation["0"]=0;
-
+
string s;
- while( getline(in,s) )
- {
- string str,categ;
- mysplit(s,str,categ);
- int i=p->words->binary_locate(str);
- if(i>=0 && (*(p->words))[i]==str )
- {
-
- if( translation[categ]==-1 )
- translation[categ]=maxCat++;
- int cat=translation[categ];
- if( (p->initLike)[i]!= -1 )
- cerr << "Warning: Word '" << ((*(p->words))[i])<< "' is already in a category.\n";
- (p->initLike)[i]=cat;
- }
- else
- cerr << "Warning: Word '" << str << "' " << i << " is not in training corpus.\n";
- }
-
+ while( getline(in,s) ) {
+ string str,categ;
+ mysplit(s,str,categ);
+ int i=p->words->binary_locate(str);
+ if(i>=0 && (*(p->words))[i]==str ) {
+
+ if( translation[categ]==-1 )
+ translation[categ]=maxCat++;
+ int cat=translation[categ];
+ if( (p->initLike)[i]!= -1 )
+ cerr << "Warning: Word '" << ((*(p->words))[i])<< "' is already in a category.\n";
+ (p->initLike)[i]=cat;
+ } else
+ cerr << "Warning: Word '" << str << "' " << i << " is not in training corpus.\n";
+ }
+
if( verboseMode )
- cout << "We have " << maxCat << " read non-empty categories"
- " (with words from the corpus).\n";
-
- if(maxCat>p->katFreq.nKats)
- {
- cerr << "Error: Not enough categories reserved (only "
- << p->katFreq.nKats << ", but i need " << maxCat << ").\n";
- exit(1);
- }
-
-
+ cout << "We have " << maxCat << " read non-empty categories"
+ " (with words from the corpus).\n";
+
+ if(maxCat>p->katFreq.nKats) {
+ cerr << "Error: Not enough categories reserved (only "
+ << p->katFreq.nKats << ", but i need " << maxCat << ").\n";
+ exit(1);
+ }
+
+
int i=p->words->binary_locate("$");
if( i>=0 && (*(p->words))[i]=="$" )
(p->initLike)[i]=0;
- else
- if( verboseMode )
- cerr << "Warning: No '$' in vocabulary!\n";
-
-
+ else if( verboseMode )
+ cerr << "Warning: No '$' in vocabulary!\n";
+
+
int errors=0;
- for(i=0;i<p->wordFreq.nWords;i++)
- if((p->initLike)[i]== -1 )
- {
- if( verb ) cerr << "Error: I don't know the category of word " << i
- << " (" << (*(p->words))[i] << ") " << ".\n";
- errors=1;
- }
+ for(i=0; i<p->wordFreq.nWords; i++)
+ if((p->initLike)[i]== -1 ) {
+ if( verb ) cerr << "Error: I don't know the category of word " << i
+ << " (" << (*(p->words))[i] << ") " << ".\n";
+ errors=1;
+ }
return errors;
}
KategProblem *makeKategProblem(const leda_h_array<PSS,FreqType>&cTbl,const leda_set<string>&setVokabular, int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency)
+ int auswertung,int nachbarschaft,int minWordFrequency)
{
-
+
int nwrd=0;
leda_array<string>&sVok = *new leda_array<string>(setVokabular.size());
string s;
unsigned int ctr=0;
- forall_set(leda_set<string>,s,setVokabular)
- {
- if( verboseMode>2 )
- cout << "mkcls:Wort " << ctr << " " << s << endl;
- sVok[ctr++]=s;
- }
- for(unsigned int z=0;z<ctr-1;z++)
+ forall_set(leda_set<string>,s,setVokabular) {
+ if( verboseMode>2 )
+ cout << "mkcls:Wort " << ctr << " " << s << endl;
+ sVok[ctr++]=s;
+ }
+ for(unsigned int z=0; z<ctr-1; z++)
iassert( sVok[z]<sVok[z+1] );
sVok.sort();
if( verboseMode>2 )
cout << "*****Vocabulary: " << sVok;
-
+
unsigned int vokSize=sVok.size();
- massert(vokSize==ctr); massert(vokSize==setVokabular.size());
- if(verboseMode)
- {cout << "Size of vocabulary: " << vokSize << "\n";cout.flush();}
-
+ massert(vokSize==ctr);
+ massert(vokSize==setVokabular.size());
+ if(verboseMode) {
+ cout << "Size of vocabulary: " << vokSize << "\n";
+ cout.flush();
+ }
+
KategProblem *k = new KategProblem(vokSize,maxClass,initialisierung,
- auswertung,nachbarschaft,minWordFrequency);
+ auswertung,nachbarschaft,minWordFrequency);
KategProblemWBC &w=k->wordFreq;
k->words=&sVok;
-
+
Array<int> after(vokSize,0);
Array<int> before(vokSize,0);
-
-
+
+
nwrd=0;
{
PSS s;
- forall_defined_h2(PSS,FreqType,s,cTbl)
- {
- const string&ss1=s.first;
- const string&ss2=s.second;
- if( ss2.length()&&(ss1!="$" || ss2!="$") )
- {
- int i1=sVok.binary_search(ss1);
- int i2=sVok.binary_search(ss2);
- iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
- after[i1]++;
- before[i2]++;
- }
- if( verboseMode&&((nwrd++)%10000==0) )
- {cout<<"Statistiken-1 " << nwrd<< ". \r";cout.flush();}
+ forall_defined_h2(PSS,FreqType,s,cTbl) {
+ const string&ss1=s.first;
+ const string&ss2=s.second;
+ if( ss2.length()&&(ss1!="$" || ss2!="$") ) {
+ int i1=sVok.binary_search(ss1);
+ int i2=sVok.binary_search(ss2);
+ iassert( sVok[i1] == ss1 );
+ iassert( sVok[i2] == ss2 );
+ after[i1]++;
+ before[i2]++;
+ }
+ if( verboseMode&&((nwrd++)%10000==0) ) {
+ cout<<"Statistiken-1 " << nwrd<< ". \r";
+ cout.flush();
}
- }
-
- for(unsigned int i=0;i<vokSize;i++)
- {
- w.setAfterWords(i,after[i]);
- w.setBeforeWords(i,before[i]);
}
-
-
+ }
+
+ for(unsigned int i=0; i<vokSize; i++) {
+ w.setAfterWords(i,after[i]);
+ w.setBeforeWords(i,before[i]);
+ }
+
+
{
nwrd=0;
PSS s;
- forall_defined_h2(PSS,FreqType,s,cTbl)
- {
- const string&ss1=s.first;
- const string&ss2=s.second;
- FreqType p=cTbl[s];
- if( ss2.length()&&(ss1!="$" || ss2!="$") )
- {
- int i1=sVok.binary_search(ss1);
- int i2=sVok.binary_search(ss2);
- iassert( sVok[i1] == ss1 );iassert( sVok[i2] == ss2 );
- w.setFreq(i1,i2,p);
- if( verboseMode>2 )
- cout << "BIGRAMM-HAEUF: " << ss1 << ":" << i1 << " "
- << ss2 << ":" << i2 << " " << p << endl;
- }
- if( verboseMode&&((nwrd++)%10000==0) )
- {cout<<"Statistiken-2 " <<nwrd<< ". \r";cout.flush();}
- }
+ forall_defined_h2(PSS,FreqType,s,cTbl) {
+ const string&ss1=s.first;
+ const string&ss2=s.second;
+ FreqType p=cTbl[s];
+ if( ss2.length()&&(ss1!="$" || ss2!="$") ) {
+ int i1=sVok.binary_search(ss1);
+ int i2=sVok.binary_search(ss2);
+ iassert( sVok[i1] == ss1 );
+ iassert( sVok[i2] == ss2 );
+ w.setFreq(i1,i2,p);
+ if( verboseMode>2 )
+ cout << "BIGRAMM-HAEUF: " << ss1 << ":" << i1 << " "
+ << ss2 << ":" << i2 << " " << p << endl;
+ }
+ if( verboseMode&&((nwrd++)%10000==0) ) {
+ cout<<"Statistiken-2 " <<nwrd<< ". \r";
+ cout.flush();
+ }
+ }
}
-
+
w.testFull();
- if(verboseMode){cout << "Datenintegritaet getestet.\n";cout.flush();}
+ if(verboseMode) {
+ cout << "Datenintegritaet getestet.\n";
+ cout.flush();
+ }
return k;
}
KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency)
+ int auswertung,int nachbarschaft,int minWordFrequency)
{
ifstream file(str);
if(!file)return 0;
@@ -250,118 +248,110 @@ KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
double c=0;
if( verboseMode )cout << "NGRFILE: " << str << endl;
string s1,s2;
- while(file >> c >> s1 >> s2)
- {
- if( s1.length()==0||s2.length()==0 )
- {
- cerr << "ERROR: strings are zero: " << s1.length() <<" " << s1 <<" " << s2.length()<<" " << s2 << endl;
- return 0;
- }
- if( c==0 )
- {
- cerr << "Count ist 0 " << s1 << " " << s2 << endl;
- return 0;
- }
- cTbl[pair<string,string>(s1,s2)]=(FreqType)c;
- setVokabular.insert(s1);
- setVokabular.insert(s2);
- if( verboseMode>1 )
- cout << "R: " << s1 << " " << s2 << " " << c << endl;
- c=0;
+ while(file >> c >> s1 >> s2) {
+ if( s1.length()==0||s2.length()==0 ) {
+ cerr << "ERROR: strings are zero: " << s1.length() <<" " << s1 <<" " << s2.length()<<" " << s2 << endl;
+ return 0;
+ }
+ if( c==0 ) {
+ cerr << "Count ist 0 " << s1 << " " << s2 << endl;
+ return 0;
}
-
+ cTbl[pair<string,string>(s1,s2)]=(FreqType)c;
+ setVokabular.insert(s1);
+ setVokabular.insert(s2);
+ if( verboseMode>1 )
+ cout << "R: " << s1 << " " << s2 << " " << c << endl;
+ c=0;
+ }
+
return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
}
-
-
+
+
KategProblem *fromKModel(const char *str,int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency)
+ int auswertung,int nachbarschaft,int minWordFrequency)
{
string oldText,text,line;
ifstream f(str);
- if( !f )
- {
- cerr << "ERROR: can not open file " << str << ".\n";
- return 0;
- }
-
+ if( !f ) {
+ cerr << "ERROR: can not open file " << str << ".\n";
+ return 0;
+ }
+
leda_set<string> setVokabular;
leda_h_array<PSS,FreqType> cTbl(0);
oldText="$";
- while(1)
- {
- getline(f,line);
- if(f.fail() && !f.bad() && !f.eof())
- {
- cerr << "WARNING: strange characters in stream (getline) " << endl;f.clear();
- }
- if(!f)break;
-
- istrstream f2(line.c_str());
- while( 1 )
- {
- f2 >> text;
- if(f2.fail() && !f2.bad() && !f2.eof())
- {
- cerr << "WARNING: strange characters in stream (>>) !\n";
- f2.clear(ios::failbit);
- }
- if(!f2){break;}
-
-
-
-
-
-
- if( text == "$" )
- text = "mkcls-mapped-dollar-symbol-$";
- if( !setVokabular.member(text) )setVokabular.insert(text);
- cTbl[pair<string,string>(oldText,text)]++;
- oldText=text;
- }
- text="$";
+ while(1) {
+ getline(f,line);
+ if(f.fail() && !f.bad() && !f.eof()) {
+ cerr << "WARNING: strange characters in stream (getline) " << endl;
+ f.clear();
+ }
+ if(!f)break;
+
+ istrstream f2(line.c_str());
+ while( 1 ) {
+ f2 >> text;
+ if(f2.fail() && !f2.bad() && !f2.eof()) {
+ cerr << "WARNING: strange characters in stream (>>) !\n";
+ f2.clear(ios::failbit);
+ }
+ if(!f2) {
+ break;
+ }
+
+
+
+
+
+
+ if( text == "$" )
+ text = "mkcls-mapped-dollar-symbol-$";
if( !setVokabular.member(text) )setVokabular.insert(text);
cTbl[pair<string,string>(oldText,text)]++;
oldText=text;
}
+ text="$";
+ if( !setVokabular.member(text) )setVokabular.insert(text);
+ cTbl[pair<string,string>(oldText,text)]++;
+ oldText=text;
+ }
return makeKategProblem(cTbl,setVokabular,maxClass,initialisierung,auswertung,nachbarschaft,minWordFrequency);
}
-
+
void KategProblemSetParameters(KategProblem &p)
{
- if( p.katwahl()==K_BEST )
- {
- TAOptimization::defaultAnnRate=0.7;
- RRTOptimization::defaultAnnRate=0.95;
- GDAOptimization::defaultAlpha=0.05;
- if( verboseMode )
- cout << "Parameter-setting like W-DET-BEST\n";
- }
- else
- {
- TAOptimization::defaultAnnRate=0.4;
- RRTOptimization::defaultAnnRate=0.6;
- GDAOptimization::defaultAlpha=0.0125;
- if( verboseMode )
- cout << "Parameter-setting like W-DET-DET\n";
- }
+ if( p.katwahl()==K_BEST ) {
+ TAOptimization::defaultAnnRate=0.7;
+ RRTOptimization::defaultAnnRate=0.95;
+ GDAOptimization::defaultAlpha=0.05;
+ if( verboseMode )
+ cout << "Parameter-setting like W-DET-BEST\n";
+ } else {
+ TAOptimization::defaultAnnRate=0.4;
+ RRTOptimization::defaultAnnRate=0.6;
+ GDAOptimization::defaultAlpha=0.0125;
+ if( verboseMode )
+ cout << "Parameter-setting like W-DET-DET\n";
+ }
}
KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
- int auswertung,int nachbarschaft,float relInit)
+ int auswertung,int nachbarschaft,float relInit)
{
KategProblem &k=
*new KategProblem(ANZ_WORD,ANZ_CLS,initValue,auswertung,nachbarschaft);
@@ -369,41 +359,35 @@ KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
Array<int> after(ANZ_WORD,0);
Array<int> before(ANZ_WORD,0);
Array<FreqArray> twoD(ANZ_WORD);
- int i;
- for(i=0;i<ANZ_WORD;i++) twoD[i].init(ANZ_WORD,0);
-
- for(i=0;i<ANZ_WORD;i++)
- {
- massert(after[i]==0);
- massert(before[i]==0);
- for(int j=0;j<ANZ_WORD;j++)
- {
- massert(twoD[i][j]==0);
- }
- }
- for(i=0;i<ANZ_WORD*ANZ_WORD*relInit;i++)
- {
- int x=randomInt(ANZ_WORD);
- int y=randomInt(ANZ_WORD);
- if(twoD[x][y]==0)
- {
- after[x]++;
- before[y]++;
- }
- twoD[x][y]+=randomInt(10)+1;
- }
- for(i=0;i<ANZ_WORD;i++)
- {
- w.setAfterWords(i,after[i]);
- w.setBeforeWords(i,before[i]);
+ int i;
+ for(i=0; i<ANZ_WORD; i++) twoD[i].init(ANZ_WORD,0);
+
+ for(i=0; i<ANZ_WORD; i++) {
+ massert(after[i]==0);
+ massert(before[i]==0);
+ for(int j=0; j<ANZ_WORD; j++) {
+ massert(twoD[i][j]==0);
}
-
- for(i=0;i<ANZ_WORD;i++)
- {
- for(int j=0;j<ANZ_WORD;j++)
- if( twoD[i][j] )
- w.setFreq(i,j,twoD[i][j]);
+ }
+ for(i=0; i<ANZ_WORD*ANZ_WORD*relInit; i++) {
+ int x=randomInt(ANZ_WORD);
+ int y=randomInt(ANZ_WORD);
+ if(twoD[x][y]==0) {
+ after[x]++;
+ before[y]++;
}
+ twoD[x][y]+=randomInt(10)+1;
+ }
+ for(i=0; i<ANZ_WORD; i++) {
+ w.setAfterWords(i,after[i]);
+ w.setBeforeWords(i,before[i]);
+ }
+
+ for(i=0; i<ANZ_WORD; i++) {
+ for(int j=0; j<ANZ_WORD; j++)
+ if( twoD[i][j] )
+ w.setFreq(i,j,twoD[i][j]);
+ }
w.testFull();
return k;
}
@@ -414,24 +398,23 @@ KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initValue,
char *makeTitle(KategProblem &problem,int verfahren)
{
char x[1024];
- switch(verfahren)
- {
- case HC_OPT:
- strcpy(x,"HC ");
- break;
- case SA_OPT:
- strcpy(x,"SA ");
- break;
- case TA_OPT:
- strcpy(x,"TA ");
- break;
- case GDA_OPT:
- strcpy(x,"GDA ");
- break;
- case RRT_OPT:
- strcpy(x,"RRT ");
- break;
- }
+ switch(verfahren) {
+ case HC_OPT:
+ strcpy(x,"HC ");
+ break;
+ case SA_OPT:
+ strcpy(x,"SA ");
+ break;
+ case TA_OPT:
+ strcpy(x,"TA ");
+ break;
+ case GDA_OPT:
+ strcpy(x,"GDA ");
+ break;
+ case RRT_OPT:
+ strcpy(x,"RRT ");
+ break;
+ }
problem.makeTitle(x+strlen(x));
return strdup(x);
}
@@ -439,11 +422,11 @@ char *makeTitle(KategProblem &problem,int verfahren)
-#define MAX_MULTIPLE 10
+#define MAX_MULTIPLE 10
Array<KategProblem *> &_izrOptimization(Array<KategProblem *> &probs,
-int anzprob,double timeForOneRed,double maxClock,Array<Kategory> &katOfWord,
-int anzIter,int verfahren)
+ int anzprob,double timeForOneRed,double maxClock,Array<Kategory> &katOfWord,
+ int anzIter,int verfahren)
{
massert(anzprob>1);
massert(probs[0]->wordFreq.mindestAnzahl<=1);
@@ -456,184 +439,161 @@ int anzIter,int verfahren)
int indexOfDurchschnitt;
Array<int> newWords(nWords);
int useAnzprob=anzprob;
- do
- {
- int w,k;
- indexOfDurchschnitt=0;
- for(w=0;w<nWords;w++)
- newWords[w]=-1;
- for(k=0;k<useAnzprob;k++)
- {
- massert(probs[k]->wordFreq.nWords==nWords);
- probs[k]->makeKats();
- }
-
- for(w=0;w<nWords;w++)
- {
- if( newWords[w]==-1 )
- {
-
-
-
- leda_set<int> durchschnitt=(*p0->kats)[p0->katOfWord(w)];
- for(k=1;k<useAnzprob;k++)
- durchschnitt = durchschnitt & (*probs[k]->kats)[probs[k]->katOfWord(w)];
-
-
- int _anzInDurchschnitt=0;
- int nr=0;
- forall_set(leda_set<int>,nr,durchschnitt)
- {
- _anzInDurchschnitt++;
- newWords[nr]=indexOfDurchschnitt;
- }
- if( verboseMode && _anzInDurchschnitt>1 && anzIter==0 )
- {
- cout << "- (";
- forall_set(leda_set<int>,nr,durchschnitt)
- {
- cout << p0->getString(nr);
- if( p0->wordFreq.n1(nr)==1 )
- cout << "* ";
- else
- cout << " ";
- }
- cout << ")\n";
- }
-
-
-
-
- for(k=0;k<useAnzprob;k++)
- {
- durchschnitt = durchschnitt - (*probs[k]->kats)[probs[k]->katOfWord(w)];
- }
- indexOfDurchschnitt++;
- }
- }
-
- if(indexOfDurchschnitt>=minimumNumberOfWords)
- {
- if(useAnzprob==1)
- {
- cout << "useAnzProb==1 => mysterious.\n";
- break;
- }
- useAnzprob--;
- }
+ do {
+ int w,k;
+ indexOfDurchschnitt=0;
+ for(w=0; w<nWords; w++)
+ newWords[w]=-1;
+ for(k=0; k<useAnzprob; k++) {
+ massert(probs[k]->wordFreq.nWords==nWords);
+ probs[k]->makeKats();
+ }
+
+ for(w=0; w<nWords; w++) {
+ if( newWords[w]==-1 ) {
+
+
+
+ leda_set<int> durchschnitt=(*p0->kats)[p0->katOfWord(w)];
+ for(k=1; k<useAnzprob; k++)
+ durchschnitt = durchschnitt & (*probs[k]->kats)[probs[k]->katOfWord(w)];
+
+
+ int _anzInDurchschnitt=0;
+ int nr=0;
+ forall_set(leda_set<int>,nr,durchschnitt) {
+ _anzInDurchschnitt++;
+ newWords[nr]=indexOfDurchschnitt;
+ }
+ if( verboseMode && _anzInDurchschnitt>1 && anzIter==0 ) {
+ cout << "- (";
+ forall_set(leda_set<int>,nr,durchschnitt) {
+ cout << p0->getString(nr);
+ if( p0->wordFreq.n1(nr)==1 )
+ cout << "* ";
+ else
+ cout << " ";
+ }
+ cout << ")\n";
+ }
+
+
+
+
+ for(k=0; k<useAnzprob; k++) {
+ durchschnitt = durchschnitt - (*probs[k]->kats)[probs[k]->katOfWord(w)];
+ }
+ indexOfDurchschnitt++;
+ }
+ }
+
+ if(indexOfDurchschnitt>=minimumNumberOfWords) {
+ if(useAnzprob==1) {
+ cout << "useAnzProb==1 => mysterious.\n";
+ break;
+ }
+ useAnzprob--;
}
- while(indexOfDurchschnitt>=minimumNumberOfWords);
-
-
+ } while(indexOfDurchschnitt>=minimumNumberOfWords);
+
+
Array<KategProblem *> &neu=*new Array<KategProblem *>(MAX_MULTIPLE*anzprob,(KategProblem *)0);
qsort(probs.getPointerToData(),useAnzprob,sizeof(KategProblem *),compareProblem);
massert(useAnzprob<=probs.size());
double startTime=clockSec();
int i, numberOfNew;
- for(numberOfNew=0; (clockSec()-startTime<timeForOneRed)
- || (numberOfNew < anzprob) ; numberOfNew++)
- {
- int w;
- if( numberOfNew==anzprob*MAX_MULTIPLE-1 )
- break;
- KategProblem *p
- = neu[numberOfNew]
- = new KategProblem(indexOfDurchschnitt,nKats-2,
- p0->initialisierung,p0->auswertung,p0->nachbarschaft);
-
- for(w=0;w<indexOfDurchschnitt;w++)
- {
- p->wordFreq.setAfterWords(w,5);
- p->wordFreq.setBeforeWords(w,5);
- }
- for(w=0;w<nWords;w++)
- {
- Array<OneFreq> &after=p0->wordFreq.after[w];
- int size=after.size();
- for(i=0;i<size;i++)
- p->wordFreq.addFreq(newWords[w],newWords[after[i].w],after[i].n);
- }
- p->wordFreq.testFull(1);
-
-
-
-
-
-
- p->wordFreq.set_h_of_words(p0->wordFreq.get_h_of_words());
- double w1=0.0,w2=0.0;
- if(numberOfNew<useAnzprob)
- {
-
- for(i=0;i<nWords;i++)
- (p->initLike)[newWords[i]]=probs[numberOfNew]->katOfWord(i);
- p->_initialize(5);
- HCOptimization hc(*p,-1);
- if(verboseMode)
- {
- w1=p->nicevalue();
- cout << "from old category system:" << w1 << endl;
- }
- hc.minimize(-1);
- if(verboseMode)
- {
- w2=p->nicevalue();
- if(w2<w1)
- cout << "improvement: " << w1-w2 << endl;
- }
- }
- else
- {
- p->_initialize(1);
- double mean;
- StatVar end,laufzeit,start;
- solveProblem(0,*p,1,-1,verfahren,mean,end,laufzeit,start);
- w2=p->value();
- if(verboseMode)
- cout << "new category system: " << w2 << " (" << p->nicevalue()
- << ") Zeit: " << clockSec() << "\n";
- }
+ for(numberOfNew=0; (clockSec()-startTime<timeForOneRed)
+ || (numberOfNew < anzprob) ; numberOfNew++) {
+ int w;
+ if( numberOfNew==anzprob*MAX_MULTIPLE-1 )
+ break;
+ KategProblem *p
+ = neu[numberOfNew]
+ = new KategProblem(indexOfDurchschnitt,nKats-2,
+ p0->initialisierung,p0->auswertung,p0->nachbarschaft);
+
+ for(w=0; w<indexOfDurchschnitt; w++) {
+ p->wordFreq.setAfterWords(w,5);
+ p->wordFreq.setBeforeWords(w,5);
+ }
+ for(w=0; w<nWords; w++) {
+ Array<OneFreq> &after=p0->wordFreq.after[w];
+ int size=after.size();
+ for(i=0; i<size; i++)
+ p->wordFreq.addFreq(newWords[w],newWords[after[i].w],after[i].n);
}
- int p;
- for(p=0;p<probs.size();p++)
- {
- if( probs[p] )
- delete probs[p];
+ p->wordFreq.testFull(1);
+
+
+
+
+
+
+ p->wordFreq.set_h_of_words(p0->wordFreq.get_h_of_words());
+ double w1=0.0,w2=0.0;
+ if(numberOfNew<useAnzprob) {
+
+ for(i=0; i<nWords; i++)
+ (p->initLike)[newWords[i]]=probs[numberOfNew]->katOfWord(i);
+ p->_initialize(5);
+ HCOptimization hc(*p,-1);
+ if(verboseMode) {
+ w1=p->nicevalue();
+ cout << "from old category system:" << w1 << endl;
+ }
+ hc.minimize(-1);
+ if(verboseMode) {
+ w2=p->nicevalue();
+ if(w2<w1)
+ cout << "improvement: " << w1-w2 << endl;
+ }
+ } else {
+ p->_initialize(1);
+ double mean;
+ StatVar end,laufzeit,start;
+ solveProblem(0,*p,1,-1,verfahren,mean,end,laufzeit,start);
+ w2=p->value();
+ if(verboseMode)
+ cout << "new category system: " << w2 << " (" << p->nicevalue()
+ << ") Zeit: " << clockSec() << "\n";
}
+ }
+ int p;
+ for(p=0; p<probs.size(); p++) {
+ if( probs[p] )
+ delete probs[p];
+ }
qsort(neu.getPointerToData(),numberOfNew,sizeof(Problem *),compareProblem);
massert(numberOfNew<=neu.size());
if( verboseMode )
- cout << "Iterierte Zustandsraum-Reduktion: " << indexOfDurchschnitt
- << " words. costs: " << neu[0]->value() << " "
- << neu[0]->nicevalue() << " (" << numberOfNew-anzprob << ")" << "time: "
- << clockSec() << endl;
- if( indexOfDurchschnitt<=nKats
- || (clockSec()>maxClock&&maxClock) )
- {
- if( clockSec()>maxClock&&maxClock )
- cout << "STOP (time limit: " << (clockSec()-maxClock) << " s)\n";
- for(i=0;i<nWords;i++)
- katOfWord[i]=neu[0]->katOfWord(newWords[i]);
- return neu;
- }
- else
- {
- Array<Kategory> &newKatOfWord=
- *(new Array<Kategory>(neu[0]->wordFreq.nWords,-1));
- Array<KategProblem *> &erg=_izrOptimization(neu,anzprob,timeForOneRed,
- maxClock,newKatOfWord,
- anzIter+1,verfahren);
- for(i=0;i<nWords;i++)
- katOfWord[i]=newKatOfWord[newWords[i]];
- return erg;
- }
+ cout << "Iterierte Zustandsraum-Reduktion: " << indexOfDurchschnitt
+ << " words. costs: " << neu[0]->value() << " "
+ << neu[0]->nicevalue() << " (" << numberOfNew-anzprob << ")" << "time: "
+ << clockSec() << endl;
+ if( indexOfDurchschnitt<=nKats
+ || (clockSec()>maxClock&&maxClock) ) {
+ if( clockSec()>maxClock&&maxClock )
+ cout << "STOP (time limit: " << (clockSec()-maxClock) << " s)\n";
+ for(i=0; i<nWords; i++)
+ katOfWord[i]=neu[0]->katOfWord(newWords[i]);
+ return neu;
+ } else {
+ Array<Kategory> &newKatOfWord=
+ *(new Array<Kategory>(neu[0]->wordFreq.nWords,-1));
+ Array<KategProblem *> &erg=_izrOptimization(neu,anzprob,timeForOneRed,
+ maxClock,newKatOfWord,
+ anzIter+1,verfahren);
+ for(i=0; i<nWords; i++)
+ katOfWord[i]=newKatOfWord[newWords[i]];
+ return erg;
+ }
}
KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
- double clockForOneRed,double maxClock,int verfahren)
+ double clockForOneRed,double maxClock,int verfahren)
{
Array<Kategory> katOfWord(p.wordFreq.nWords,-1);
int startN;
@@ -647,31 +607,29 @@ KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
double startTime=clockSec();
int i;
- for(i=0;i<startN;i++)
- {
- StatVar end,laufzeit,start;
- double mean;
- probs[i] = (KategProblem *)((KategProblem *)p.makeEqualProblem());
- solveProblem(0,*(probs[i]),1,-1,verfahren,mean,end,laufzeit,start);
- if( i==minN-1 )
- endTime = clockSec();
- if( i>=firstN-1 && (startTime+clockForOneRed>clockSec() || i==999) )
- break;
- }
+ for(i=0; i<startN; i++) {
+ StatVar end,laufzeit,start;
+ double mean;
+ probs[i] = (KategProblem *)((KategProblem *)p.makeEqualProblem());
+ solveProblem(0,*(probs[i]),1,-1,verfahren,mean,end,laufzeit,start);
+ if( i==minN-1 )
+ endTime = clockSec();
+ if( i>=firstN-1 && (startTime+clockForOneRed>clockSec() || i==999) )
+ break;
+ }
if( endTime<0 )
endTime=clockSec();
massert(i>=firstN);
qsort(probs.getPointerToData(),i,sizeof(KategProblem *),compareProblem);
massert(i<=probs.size());
- if( clockForOneRed<=0 )
- {
- clockForOneRed=endTime-startTime;
- if( verboseMode )
- cout << "time for one reduction: " << clockForOneRed << endl;
- }
+ if( clockForOneRed<=0 ) {
+ clockForOneRed=endTime-startTime;
+ if( verboseMode )
+ cout << "time for one reduction: " << clockForOneRed << endl;
+ }
_izrOptimization(probs,minN,clockForOneRed,maxClock,katOfWord,0,verfahren);
-
+
KategProblem *n=(KategProblem *)(p.makeEqualProblem());
n->initLike= katOfWord;
n->_initialize(5);
diff --git a/mgizapp/src/mkcls/KategProblemTest.h b/mgizapp/src/mkcls/KategProblemTest.h
index 7767b7d..987c1c7 100644
--- a/mgizapp/src/mkcls/KategProblemTest.h
+++ b/mgizapp/src/mkcls/KategProblemTest.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -31,16 +31,16 @@ USA.
KategProblem &makRandom(int ANZ_WORD,int ANZ_CLS,int initialisierung,
- int auswertung,int nachbarschaft,float relInit=0.1);
+ int auswertung,int nachbarschaft,float relInit=0.1);
KategProblem *fromKModel(const char *str,int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency);
+ int auswertung,int nachbarschaft,int minWordFrequency);
KategProblem *fromNgrFile(const char *str,int maxClass,int initialisierung,
- int auswertung,int nachbarschaft,int minWordFrequency);
+ int auswertung,int nachbarschaft,int minWordFrequency);
void writeClasses(Array<Kategory> &katOfWord,KategProblem &problem,ostream &to);
@@ -51,7 +51,7 @@ int fromCatFile(KategProblem *p,const char *s,bool verb=1);
KategProblem *izrOptimization(KategProblem &p,int minN,int firstN,
-double clockForOneRed,double maxClock,int verfahren);
+ double clockForOneRed,double maxClock,int verfahren);
diff --git a/mgizapp/src/mkcls/KategProblemWBC.cpp b/mgizapp/src/mkcls/KategProblemWBC.cpp
index 422b4a4..a3280d1 100644
--- a/mgizapp/src/mkcls/KategProblemWBC.cpp
+++ b/mgizapp/src/mkcls/KategProblemWBC.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -55,15 +55,15 @@ static int oneFreqCompareFallend(const void *p,const void *j)
}
-KategProblemWBC::KategProblemWBC(int n,int minw)
-: _n1(n,0),_n2(n,0),with_h_of_words(0),afterFilled(n,0),beforeFilled(n,0),filled(0),fixedWord(n,-1),absteigend(0),nWords(n),nTranspWords(0),
- mindestAnzahl(minw),after(n),before(n),minIndex(n,-1),maxIndex(n,-1)
-
+KategProblemWBC::KategProblemWBC(int n,int minw)
+ : _n1(n,0),_n2(n,0),with_h_of_words(0),afterFilled(n,0),beforeFilled(n,0),filled(0),fixedWord(n,-1),absteigend(0),nWords(n),nTranspWords(0),
+ mindestAnzahl(minw),after(n),before(n),minIndex(n,-1),maxIndex(n,-1)
+
{
}
KategProblemWBC::~KategProblemWBC()
-
+
{
massert( after.size()==nWords);
if( absteigend )
@@ -72,77 +72,73 @@ KategProblemWBC::~KategProblemWBC()
void KategProblemWBC::init(int specialFixedWord)
{
-
+
nTranspWords=0;
int i;
- for(i=0;i<_n1.size();i++)
- {
- if( (_n1[i]<mindestAnzahl && _n2[i]<mindestAnzahl && minIndex[i]<=1) ||i==specialFixedWord )
- {
-
- if(!( fixedWord[i]==1 || fixedWord[i]== -1))
- cerr << "mkcls:KategProblemWBC::init::ERROR: " << i << " " << fixedWord[i] << endl;
- fixedWord[i]=1;
- }
- else if(fixedWord[i]<0)
- nTranspWords++;
- }
+ for(i=0; i<_n1.size(); i++) {
+ if( (_n1[i]<mindestAnzahl && _n2[i]<mindestAnzahl && minIndex[i]<=1) ||i==specialFixedWord ) {
+
+ if(!( fixedWord[i]==1 || fixedWord[i]== -1))
+ cerr << "mkcls:KategProblemWBC::init::ERROR: " << i << " " << fixedWord[i] << endl;
+ fixedWord[i]=1;
+ } else if(fixedWord[i]<0)
+ nTranspWords++;
+ }
if( absteigend==0 )
absteigend= &(getSortedList(0));
-
-
-
-
-
+
+
+
+
+
if(verboseMode && nTranspWords!=_n1.size()-1 )
cout << "Es sind: " <<nTranspWords<<" transportierbar.\n";
}
void KategProblemWBC::set_h_of_words(double s)
-
+
{
with_h_of_words=1;
h_of_words = -s;
}
double KategProblemWBC::get_h_of_words()
-
+
{
if( with_h_of_words )
return -h_of_words;
- else
- {
- h_of_words=0;
- for(int i=0;i<nWords;i++)
- h_of_words+=0.5*(kat_h(_n2[i])+kat_h(_n1[i]));
- with_h_of_words=1;
- return -h_of_words;
- }
+ else {
+ h_of_words=0;
+ for(int i=0; i<nWords; i++)
+ h_of_words+=0.5*(kat_h(_n2[i])+kat_h(_n1[i]));
+ with_h_of_words=1;
+ return -h_of_words;
+ }
}
-void KategProblemWBC::setAfterWords(int w,int anzahl)
-
-{
+void KategProblemWBC::setAfterWords(int w,int anzahl)
+
+{
OneFreq o;
o.w=-1;
o.n=0;
- afterFilled[w]=0;
+ afterFilled[w]=0;
after[w].init(anzahl,o,1);
}
-void KategProblemWBC::setBeforeWords(int w,int anzahl)
-
-{
+void KategProblemWBC::setBeforeWords(int w,int anzahl)
+
+{
OneFreq o;
o.w=-1;
o.n=0;
beforeFilled[w]=0;
- before[w].init(anzahl,o,1);
+ before[w].init(anzahl,o,1);
}
-void KategProblemWBC::setFreq(int w1,int w2,FreqType anzahl)
-
+void KategProblemWBC::setFreq(int w1,int w2,FreqType anzahl)
+
{
OneFreq o;
o.n=anzahl;
@@ -155,134 +151,118 @@ void KategProblemWBC::setFreq(int w1,int w2,FreqType anzahl)
_n2[w2]+=anzahl;
}
-void KategProblemWBC::addFreq(int w1,int w2,FreqType anzahl)
-
+void KategProblemWBC::addFreq(int w1,int w2,FreqType anzahl)
+
{
OneFreq o;
o.n=anzahl;
int pos=-1,i;
- for(i=0;i<afterFilled[w1];i++)
+ for(i=0; i<afterFilled[w1]; i++)
if(after[w1][i].w==w2)
pos=i;
- if(pos==-1)
- {
- o.w=w2;
- after[w1][afterFilled[w1]++]=o;
- }
- else
- after[w1][pos].n+=anzahl;
+ if(pos==-1) {
+ o.w=w2;
+ after[w1][afterFilled[w1]++]=o;
+ } else
+ after[w1][pos].n+=anzahl;
_n1[w1]+=anzahl;
pos=-1;
- for(i=0;i<beforeFilled[w2];i++)
+ for(i=0; i<beforeFilled[w2]; i++)
if(before[w2][i].w==w1)
pos=i;
- if(pos==-1)
- {
- o.w=w1;
- before[w2][beforeFilled[w2]++]=o;
- }
- else
+ if(pos==-1) {
+ o.w=w1;
+ before[w2][beforeFilled[w2]++]=o;
+ } else
before[w2][pos].n+=anzahl;
_n2[w2]+=anzahl;
}
short KategProblemWBC::testFull(int doIt)
-
+
{
int enaNom=0;
int afterFilledSum=0,beforeFilledSum=0;
int ret=1,i;
- for(i=0;i<nWords;i++)
- {
- if( n1(i)==1 && n2(i)==1 )
- enaNom++;
- afterFilledSum+=afterFilled[i];
- beforeFilledSum+=beforeFilled[i];
- if(afterFilled[i]!=after[i].size())
- {
- ret=0;
- if( doIt )
- after[i].resize(afterFilled[i]);
- }
- if(beforeFilled[i]!=before[i].size())
- {
- ret=0;
- if( doIt )
- before[i].resize(beforeFilled[i]);
- }
-
+ for(i=0; i<nWords; i++) {
+ if( n1(i)==1 && n2(i)==1 )
+ enaNom++;
+ afterFilledSum+=afterFilled[i];
+ beforeFilledSum+=beforeFilled[i];
+ if(afterFilled[i]!=after[i].size()) {
+ ret=0;
+ if( doIt )
+ after[i].resize(afterFilled[i]);
}
- if( ret==0 && !doIt )
- {
- cerr << "Error: Unfilled word bigram statistics.\n";
- exit(1);
+ if(beforeFilled[i]!=before[i].size()) {
+ ret=0;
+ if( doIt )
+ before[i].resize(beforeFilled[i]);
}
- else
+
+ }
+ if( ret==0 && !doIt ) {
+ cerr << "Error: Unfilled word bigram statistics.\n";
+ exit(1);
+ } else
filled=1;
- if( verboseMode>1 )
- {
- cout << "MEAN(|L(w)|+|R(w)|)=" << (beforeFilledSum/(float)nWords)
- +(afterFilledSum/(float)nWords) << endl;
- cout << "Hapaslegomena: " << enaNom << endl;
- }
+ if( verboseMode>1 ) {
+ cout << "MEAN(|L(w)|+|R(w)|)=" << (beforeFilledSum/(float)nWords)
+ +(afterFilledSum/(float)nWords) << endl;
+ cout << "Hapaslegomena: " << enaNom << endl;
+ }
int symmetrisch=1;
- for(i=0;i<nWords;i++)
- {
- int j;
- massert(before[i].size()==beforeFilled[i]);
- massert( after[i].size()== afterFilled[i]);
- FreqType sum=0;
- for(j=0;j<after[i].size();j++)
- sum+=after[i][j].n;
- massert( sum==_n1[i] );
- sum=0;
- for(j=0;j<before[i].size();j++)
- sum+=before[i][j].n;
- massert(sum==_n2[i]);
- if(_n1[i]!=_n2[i])
- {
- symmetrisch=0;
- if( verboseMode>1 )
- cout << "Asymmetrie: " << i << " " << _n1[i] << " " << _n2[i] << endl;
- }
-
+ for(i=0; i<nWords; i++) {
+ int j;
+ massert(before[i].size()==beforeFilled[i]);
+ massert( after[i].size()== afterFilled[i]);
+ FreqType sum=0;
+ for(j=0; j<after[i].size(); j++)
+ sum+=after[i][j].n;
+ massert( sum==_n1[i] );
+ sum=0;
+ for(j=0; j<before[i].size(); j++)
+ sum+=before[i][j].n;
+ massert(sum==_n2[i]);
+ if(_n1[i]!=_n2[i]) {
+ symmetrisch=0;
+ if( verboseMode>1 )
+ cout << "Asymmetrie: " << i << " " << _n1[i] << " " << _n2[i] << endl;
}
+
+ }
if(verboseMode && symmetrisch==0)
- cout << "Warning: word bigram statistic is not symmetric "
- "(this is possibly an error)\n";
+ cout << "Warning: word bigram statistic is not symmetric "
+ "(this is possibly an error)\n";
return ret;
}
Array<Word> &KategProblemWBC::getSortedList(int steigend)
-
+
{
int siz=_n2.size(),i;
massert(filled);
Array<Word> &sortedList =*new Array<Word>(siz);
Array<OneFreq> list(siz);
int pos=0;
- for(i=0;i<siz;i++)
- {
- if( fixedWord[i]<0 )
- {
- list[pos].w=i;
- list[pos].n=_n1[i];
- pos++;
- }
+ for(i=0; i<siz; i++) {
+ if( fixedWord[i]<0 ) {
+ list[pos].w=i;
+ list[pos].n=_n1[i];
+ pos++;
}
+ }
int anzFree=pos;
- for(i=0;i<siz;i++)
- {
- if( fixedWord[i]>=0 )
- {
- list[pos].w=i;
- list[pos].n=_n1[i];
- pos++;
- }
+ for(i=0; i<siz; i++) {
+ if( fixedWord[i]>=0 ) {
+ list[pos].w=i;
+ list[pos].n=_n1[i];
+ pos++;
}
+ }
massert(pos==siz);
if(steigend )
qsort(list.getPointerToData(),anzFree,sizeof(OneFreq),oneFreqCompareSteigend);
@@ -290,32 +270,30 @@ Array<Word> &KategProblemWBC::getSortedList(int steigend)
qsort(list.getPointerToData(),anzFree,sizeof(OneFreq),oneFreqCompareFallend);
massert( anzFree<=list.size() );
- for(i=0;i<siz;i++)
- {
- sortedList[i]=list[i].w;
- massert(steigend || i==0 || i>=anzFree || list[i-1].n>=list[i].n );
- massert((!steigend) || i==0 || i>=anzFree || list[i-1].n<=list[i].n );
- }
+ for(i=0; i<siz; i++) {
+ sortedList[i]=list[i].w;
+ massert(steigend || i==0 || i>=anzFree || list[i-1].n>=list[i].n );
+ massert((!steigend) || i==0 || i>=anzFree || list[i-1].n<=list[i].n );
+ }
return sortedList;
}
FreqType KategProblemWBC::numberOfWords()
-
+
{
FreqType n1=0,n2=0;
- for(int i=0;i<_n1.size();i++)
- {
- n1+=_n1[i];
- n2+=_n2[i];
- }
- #ifndef FREQTYPE_DOUBLE
+ for(int i=0; i<_n1.size(); i++) {
+ n1+=_n1[i];
+ n2+=_n2[i];
+ }
+#ifndef FREQTYPE_DOUBLE
massert(n1==n2);
- #endif
+#endif
return n1;
}
void KategProblemWBC::setDollar(int n)
-
+
{
if( fixedWord[n]<0 )
nTranspWords--;
@@ -326,18 +304,17 @@ void KategProblemWBC::initializeIndex(const leda_array<string>&words,char firstC
{
int n=0;
int i;
- massert(-1<unten);massert(unten<oben);
+ massert(-1<unten);
+ massert(unten<oben);
if( verboseMode )
cout << "InitializeIndex: " << firstChar << " u:" << unten << " o:" << oben << " " << noHapas << endl;
- over_array(words,i)
- {
- if( words[i][0]==firstChar && (noHapas || ((short)(n1(i)+0.0001))>=mindestAnzahl || ((short)(n2(i)+0.0001))>=mindestAnzahl) )
- {
- minIndex[i]=unten;
- maxIndex[i]=oben;
- n++;
- }
+ over_array(words,i) {
+ if( words[i][0]==firstChar && (noHapas || ((short)(n1(i)+0.0001))>=mindestAnzahl || ((short)(n2(i)+0.0001))>=mindestAnzahl) ) {
+ minIndex[i]=unten;
+ maxIndex[i]=oben;
+ n++;
}
+ }
if( verboseMode )
cout << "InitializeIndex gefunden fuer " << n << " Woerter.\n";
}
diff --git a/mgizapp/src/mkcls/KategProblemWBC.h b/mgizapp/src/mkcls/KategProblemWBC.h
index 8a399e5..20353e9 100644
--- a/mgizapp/src/mkcls/KategProblemWBC.h
+++ b/mgizapp/src/mkcls/KategProblemWBC.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -30,10 +30,9 @@ USA.
#ifndef KATEGPROBLEMWBC_H
#define KATEGPROBLEMWBC_H
-struct OneFreq
-{
- int w;
- FreqType n;
+struct OneFreq {
+ int w;
+ FreqType n;
};
typedef Array<OneFreq> ManyFreq;
@@ -44,86 +43,89 @@ class KategProblemWBC
friend class KategProblem;
- private:
- Array<FreqType> _n1;
-
- Array<FreqType> _n2;
-
+private:
+ Array<FreqType> _n1;
+
+ Array<FreqType> _n2;
+
double h_of_words;
-
-
- short with_h_of_words;
- Array<int> afterFilled;
+
+ short with_h_of_words;
+
+ Array<int> afterFilled;
Array<int> beforeFilled;
- Array<int> &getSortedList(int steigend);
-
+ Array<int> &getSortedList(int steigend);
+
- protected:
+protected:
KategProblemWBC(int n,int minw);
-
+
~KategProblemWBC();
-
- short filled;
-
- Array<int> fixedWord;
- Array<int> *absteigend;
+
+ short filled;
+
+ Array<int> fixedWord;
+ Array<int> *absteigend;
void init(int specialFixedWord=-1);
-
-
- public:
- int nWords;
- int nTranspWords;
- short mindestAnzahl;
- Array<ManyFreq> after;
- Array<ManyFreq> before;
- Array<int> minIndex;
- Array<int> maxIndex;
-
-
-
+
+
+public:
+ int nWords;
+ int nTranspWords;
+ short mindestAnzahl;
+ Array<ManyFreq> after;
+ Array<ManyFreq> before;
+ Array<int> minIndex;
+ Array<int> maxIndex;
+
+
+
void setAfterWords(int w,int anzahl);
-
+
void setBeforeWords(int w,int anzahl);
-
+
void setFreq(int w1,int w2, FreqType anzahl);
-
+
void addFreq(int w1,int w2,FreqType anzahl);
-
+
void setDollar(int n);
-
- int fixed(int w)
- {
- return fixedWord[w];
- }
- FreqType n1(int w) { return _n1[w];};
-
+ int fixed(int w) {
+ return fixedWord[w];
+ }
+
+ FreqType n1(int w) {
+ return _n1[w];
+ };
+
+
+ FreqType n2(int w) {
+ return _n2[w];
+ };
- FreqType n2(int w) { return _n2[w];};
-
FreqType numberOfWords();
-
+
short testFull(int doIt=0);
-
+
double get_h_of_words();
-
+
void set_h_of_words(double s);
-
+
void initializeIndex(const leda_array<string>&words,char firstChar,int min,int max,bool noHapas);
};
diff --git a/mgizapp/src/mkcls/MSBOptimization.cpp b/mgizapp/src/mkcls/MSBOptimization.cpp
index 0cd89bb..3bf18d3 100644
--- a/mgizapp/src/mkcls/MSBOptimization.cpp
+++ b/mgizapp/src/mkcls/MSBOptimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -33,7 +33,10 @@ template class Array<double>;
template class Array<ProbAndOpt>;
#endif
-struct doubleInt { double a; int i; };
+struct doubleInt {
+ double a;
+ int i;
+};
static int doubleintcompare(const void *p,const void *j)
{
if(((struct doubleInt *)p)->a < ((doubleInt *)j)->a)
@@ -44,35 +47,35 @@ static int doubleintcompare(const void *p,const void *j)
return 1;
}
-
-MSBOptimization::MSBOptimization(Problem &p,int verf,int anz,Array<double> &pos,Array<double> &por)
-: PopOptimization(p,verf,anz),
-percentOfSteps(pos),percentOfRun(por),nachMinimierung(0)
+
+MSBOptimization::MSBOptimization(Problem &p,int verf,int anz,Array<double> &pos,Array<double> &por)
+ : PopOptimization(p,verf,anz),
+ percentOfSteps(pos),percentOfRun(por),nachMinimierung(0)
{
}
-
+
void MSBOptimization::zInitialize()
{
PopOptimization::zInitialize();
int iterationsschritte;
double mean;
- StatVar end,laufzeit,start;
- zufallSeed();
+ StatVar end,laufzeit,start;
+ zufallSeed();
+
+
+
-
-
-
solveProblem(ProblemTestVerboseMode,*originalProblem,2,-1,verfahren,mean,
- end,laufzeit,start,0,&iterationsschritte);
+ end,laufzeit,start,0,&iterationsschritte);
expectedSteps=(int)(iterationsschritte);
if(verboseMode)
cout << "MSB:mean number of steps for one run: " << expectedSteps << endl;
}
-
-
+
+
double MSBOptimization::minimize(int)
{
if( initialisiert==0 )
@@ -82,146 +85,144 @@ double MSBOptimization::minimize(int)
int anz=size();
int numproblems=anz;
- if( verboseMode )
- {
- double usedSteps=0;
- for(i=0;i<percentOfSteps.size();i++)
- {
- usedSteps+=expectedSteps*(percentOfSteps[i]-
- (i==0?0:percentOfSteps[i-1]))*numproblems;
- numproblems=(int)(ceil(anz*(1.0-percentOfRun[i])));
- if( numproblems<1 )numproblems=1;
- }
- usedSteps+=expectedSteps*
- (1.0-percentOfSteps[percentOfSteps.size()-1])*numproblems;
- cout << "MSB: speed factor: "
- << (double)usedSteps/(expectedSteps*size()) << endl;
- numproblems=anz=size();
+ if( verboseMode ) {
+ double usedSteps=0;
+ for(i=0; i<percentOfSteps.size(); i++) {
+ usedSteps+=expectedSteps*(percentOfSteps[i]-
+ (i==0?0:percentOfSteps[i-1]))*numproblems;
+ numproblems=(int)(ceil(anz*(1.0-percentOfRun[i])));
+ if( numproblems<1 )numproblems=1;
}
+ usedSteps+=expectedSteps*
+ (1.0-percentOfSteps[percentOfSteps.size()-1])*numproblems;
+ cout << "MSB: speed factor: "
+ << (double)usedSteps/(expectedSteps*size()) << endl;
+ numproblems=anz=size();
+ }
+
+ for(i=0; i<percentOfSteps.size(); i++) {
+
+ int steps=(int)(expectedSteps*(percentOfSteps[i]-
+ (i==0?0:percentOfSteps[i-1])));
- for(i=0;i<percentOfSteps.size();i++)
- {
-
- int steps=(int)(expectedSteps*(percentOfSteps[i]-
- (i==0?0:percentOfSteps[i-1])));
-
-
- for(int a=0;a<numproblems;a++)
- {
-
- double v;
- v= optimization(a)->minimize(steps);
- if(verboseMode)cout << "MSB:" << i << " " << a << ":" << v << endl;
- }
-
- sort();
-
- if(verboseMode)
- cout << "MSB: best:" << problem(0)->value()
- << " worst:" << problem(numproblems-1)->value() << endl;
-
-
- numproblems=(int)(anz*(1.0-percentOfRun[i]));
- if( numproblems<1 )
- numproblems=1;
- if(verboseMode)
- cout << "MSB: now i have : " << numproblems << " Problem's." << endl;
- if(numproblems==1)
- break;
+
+ for(int a=0; a<numproblems; a++) {
+
+ double v;
+ v= optimization(a)->minimize(steps);
+ if(verboseMode)cout << "MSB:" << i << " " << a << ":" << v << endl;
}
+
+ sort();
+
+ if(verboseMode)
+ cout << "MSB: best:" << problem(0)->value()
+ << " worst:" << problem(numproblems-1)->value() << endl;
+
+
+ numproblems=(int)(anz*(1.0-percentOfRun[i]));
+ if( numproblems<1 )
+ numproblems=1;
+ if(verboseMode)
+ cout << "MSB: now i have : " << numproblems << " Problem's." << endl;
+ if(numproblems==1)
+ break;
+ }
assert( numproblems>0 );
-
- for(int a=0;a<numproblems;a++)
+
+ for(int a=0; a<numproblems; a++)
optimization(a)->minimize(-1);
sort();
double ergebnisWert = problem(0)->value();
- cout << "MSB: value:" << ergebnisWert << " (nicevalue:"
- << problem(0)->nicevalue() << ")\n";
+ cout << "MSB: value:" << ergebnisWert << " (nicevalue:"
+ << problem(0)->nicevalue() << ")\n";
nachMinimierung=1;
return ergebnisWert;
}
-
+
void MSBOptimization::optimizeValues(Problem &p,int verfahren)
{
- int i;
+ int i;
struct doubleInt ri[20];
double mean;
- StatVar end,laufzeit,start;
+ StatVar end,laufzeit,start;
solveProblem(ProblemTestVerboseMode,p,5,-1,verfahren,mean,end,laufzeit,start);
double fivePercentSteps=(int)(laufzeit.getMean()/20.0);
double qualitaet[20][20];
- for(i=0;i<20;i++)
- {
- Optimization *o=(Optimization *)genIterOptimizer(verfahren,p,-1);
- for(int a=0;a<20;a++)
- {
- qualitaet[i][a]=o->minimize((int)fivePercentSteps);
- cout << qualitaet[i][a] << " ";
- }
- ri[i].a=o->minimize(-1);
- ri[i].i=i;
- cout << ri[i].a << endl;
- delete o;
+ for(i=0; i<20; i++) {
+ Optimization *o=(Optimization *)genIterOptimizer(verfahren,p,-1);
+ for(int a=0; a<20; a++) {
+ qualitaet[i][a]=o->minimize((int)fivePercentSteps);
+ cout << qualitaet[i][a] << " ";
}
+ ri[i].a=o->minimize(-1);
+ ri[i].i=i;
+ cout << ri[i].a << endl;
+ delete o;
+ }
qsort(ri,20,sizeof(struct doubleInt),doubleintcompare);
cout << "#Beschneidungsmatrix, welche die drei besten Laeufe erhaelt: ";
- for(i=0;i<20;i++)
- {
- int a;
- struct doubleInt v[20];
- for(a=0;a<20;a++)
- { v[a].i=a;v[a].a=qualitaet[a][i];}
- qsort(v,20,sizeof(struct doubleInt),doubleintcompare);
- int nr=0;
- for(a=0;a<20;a++)
- if( v[a].i==ri[0].i || v[a].i==ri[1].i || v[a].i==ri[2].i )
- nr=a;
- float percent=(1.0-nr/20.0)*100.0;
- if(nr==2)
- percent=100.0;
- cout << "# " << i << " " << (i/20.0)*100 << "% " << percent << "%\n";
+ for(i=0; i<20; i++) {
+ int a;
+ struct doubleInt v[20];
+ for(a=0; a<20; a++) {
+ v[a].i=a;
+ v[a].a=qualitaet[a][i];
}
+ qsort(v,20,sizeof(struct doubleInt),doubleintcompare);
+ int nr=0;
+ for(a=0; a<20; a++)
+ if( v[a].i==ri[0].i || v[a].i==ri[1].i || v[a].i==ri[2].i )
+ nr=a;
+ float percent=(1.0-nr/20.0)*100.0;
+ if(nr==2)
+ percent=100.0;
+ cout << "# " << i << " " << (i/20.0)*100 << "% " << percent << "%\n";
+ }
cout << "#Beschneidungsmatrix, welche die zwei besten Laeufe erhaelt: ";
- for(i=0;i<20;i++)
- {
- int a;
- struct doubleInt v[20];
- for(a=0;a<20;a++)
- { v[a].i=a;v[a].a=qualitaet[a][i];}
- qsort(v,20,sizeof(struct doubleInt),doubleintcompare);
- int nr=0;
- for(a=0;a<20;a++)
- if( v[a].i==ri[0].i || v[a].i==ri[1].i )
- nr=a;
- float percent=(1.0-nr/20.0)*100.0;
- if(nr==1)
- percent=100.0;
- cout << "# " << i << " " << (i/20.0)*100 << "% " << percent << "%\n";
+ for(i=0; i<20; i++) {
+ int a;
+ struct doubleInt v[20];
+ for(a=0; a<20; a++) {
+ v[a].i=a;
+ v[a].a=qualitaet[a][i];
}
+ qsort(v,20,sizeof(struct doubleInt),doubleintcompare);
+ int nr=0;
+ for(a=0; a<20; a++)
+ if( v[a].i==ri[0].i || v[a].i==ri[1].i )
+ nr=a;
+ float percent=(1.0-nr/20.0)*100.0;
+ if(nr==1)
+ percent=100.0;
+ cout << "# " << i << " " << (i/20.0)*100 << "% " << percent << "%\n";
+ }
cout << "#Beschneidungsmatrix, welche den besten Lauf erhaelt: ";
- for(i=0;i<20;i++)
- {int a;
- struct doubleInt v[20];
- for(a=0;a<20;a++)
- { v[a].i=a;v[a].a=qualitaet[a][i];}
- qsort(v,20,sizeof(struct doubleInt),doubleintcompare);
- int nr=0;
- for(a=0;a<20;a++)
- if( v[a].i==ri[0].i )
- nr=a;
- float percent=(1.0-nr/20.0)*100.0;
- if(nr==0)
- percent=100.0;
- cout << "# " << i << " " << (i/20.0)*100 << "% " << percent << "%\n";
+ for(i=0; i<20; i++) {
+ int a;
+ struct doubleInt v[20];
+ for(a=0; a<20; a++) {
+ v[a].i=a;
+ v[a].a=qualitaet[a][i];
}
+ qsort(v,20,sizeof(struct doubleInt),doubleintcompare);
+ int nr=0;
+ for(a=0; a<20; a++)
+ if( v[a].i==ri[0].i )
+ nr=a;
+ float percent=(1.0-nr/20.0)*100.0;
+ if(nr==0)
+ percent=100.0;
+ cout << "# " << i << " " << (i/20.0)*100 << "% " << percent << "%\n";
+ }
}
-
+
Problem& MSBOptimization::bestProblem()
{
assert(nachMinimierung==1);
diff --git a/mgizapp/src/mkcls/MSBOptimization.h b/mgizapp/src/mkcls/MSBOptimization.h
index ab30c98..b333175 100644
--- a/mgizapp/src/mkcls/MSBOptimization.h
+++ b/mgizapp/src/mkcls/MSBOptimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -28,39 +28,39 @@ USA.
#ifndef MSBOPTIMIZATION
-#define MSBOPTIMIZATION
+#define MSBOPTIMIZATION
#include "PopOptimization.h"
class MSBOptimization : public PopOptimization
- {
+{
- protected:
-
- Array<double> percentOfSteps;
- Array<double> percentOfRun;
+protected:
- int expectedSteps;
- short nachMinimierung;
-
- virtual void zInitialize();
-
+ Array<double> percentOfSteps;
+ Array<double> percentOfRun;
- public:
- MSBOptimization(Problem &s,int verf,int anz,Array<double> &pos,
- Array<double> &por);
-
+ int expectedSteps;
+ short nachMinimierung;
- virtual ~MSBOptimization(){}
+ virtual void zInitialize();
- virtual double minimize(int steps=-1);
-
- static void optimizeValues(Problem &p,int verfahren);
-
+public:
+ MSBOptimization(Problem &s,int verf,int anz,Array<double> &pos,
+ Array<double> &por);
+
+
+ virtual ~MSBOptimization() {}
+
+ virtual double minimize(int steps=-1);
+
+
+ static void optimizeValues(Problem &p,int verfahren);
+
+
+ Problem& bestProblem();
- Problem& bestProblem();
-
};
#endif
diff --git a/mgizapp/src/mkcls/MYOptimization.cpp b/mgizapp/src/mkcls/MYOptimization.cpp
index ced9d31..83fb6dd 100644
--- a/mgizapp/src/mkcls/MYOptimization.cpp
+++ b/mgizapp/src/mkcls/MYOptimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -26,57 +26,53 @@ USA.
#include "MYOptimization.h"
-MYOptimization::MYOptimization(Problem &p,int m)
-: IterOptimization(p,m),acceptFlagsNumber(0),acceptions(0),total(0)
+MYOptimization::MYOptimization(Problem &p,int m)
+ : IterOptimization(p,m),acceptFlagsNumber(0),acceptions(0),total(0)
{
}
MYOptimization::MYOptimization(MYOptimization &o)
-: IterOptimization(o),acceptFlagsNumber(0),acceptions(0),total(0)
+ : IterOptimization(o),acceptFlagsNumber(0),acceptions(0),total(0)
{
}
short MYOptimization::accept(double delta)
- {
- int doIt;
- int verbesserung = delta<0;
- if( delta < 0 )
- doIt=1;
- else
- {
- if(total>=NUMBER_OF_ACCEPTIONS)
- {
- double prob = acceptions/(float)(NUMBER_OF_ACCEPTIONS);
- double zuf = zufall01();
-
- doIt=zuf<prob;
- }
- else
- doIt=0;
- }
- if( total>=NUMBER_OF_ACCEPTIONS )
- {
- if( acceptFlags[acceptFlagsNumber] )
- acceptions--;
- }
- acceptFlags[acceptFlagsNumber]=verbesserung;
- if( verbesserung )
- acceptions++;
- total++;
- acceptFlagsNumber++;
- if(acceptFlagsNumber>=NUMBER_OF_ACCEPTIONS)
- acceptFlagsNumber=0;
- return doIt;
- }
-
-short MYOptimization::end()
- {
- return endFlag>0 && total>NUMBER_OF_ACCEPTIONS && acceptions==0;
- }
+{
+ int doIt;
+ int verbesserung = delta<0;
+ if( delta < 0 )
+ doIt=1;
+ else {
+ if(total>=NUMBER_OF_ACCEPTIONS) {
+ double prob = acceptions/(float)(NUMBER_OF_ACCEPTIONS);
+ double zuf = zufall01();
+
+ doIt=zuf<prob;
+ } else
+ doIt=0;
+ }
+ if( total>=NUMBER_OF_ACCEPTIONS ) {
+ if( acceptFlags[acceptFlagsNumber] )
+ acceptions--;
+ }
+ acceptFlags[acceptFlagsNumber]=verbesserung;
+ if( verbesserung )
+ acceptions++;
+ total++;
+ acceptFlagsNumber++;
+ if(acceptFlagsNumber>=NUMBER_OF_ACCEPTIONS)
+ acceptFlagsNumber=0;
+ return doIt;
+}
+
+short MYOptimization::end()
+{
+ return endFlag>0 && total>NUMBER_OF_ACCEPTIONS && acceptions==0;
+}
void MYOptimization::abkuehlen()
- {
- }
+{
+}
+
-
void MYOptimization::makeGraphOutput()
{
IterOptimization::makeGraphOutput();
diff --git a/mgizapp/src/mkcls/MYOptimization.h b/mgizapp/src/mkcls/MYOptimization.h
index a6ca70c..731e117 100644
--- a/mgizapp/src/mkcls/MYOptimization.h
+++ b/mgizapp/src/mkcls/MYOptimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,30 +32,31 @@ USA.
#define NUMBER_OF_ACCEPTIONS 100
-class MYOptimization: public IterOptimization {
-
- protected:
- virtual short accept(double delta);
-
+class MYOptimization: public IterOptimization
+{
- virtual void abkuehlen();
-
+protected:
+ virtual short accept(double delta);
- virtual short end();
-
- public:
- MYOptimization(Problem &p,int maxIter=-1);
-
+ virtual void abkuehlen();
- MYOptimization(MYOptimization &o);
-
- int acceptFlags[NUMBER_OF_ACCEPTIONS],acceptFlagsNumber;
- int acceptions,total;
+ virtual short end();
+
+
+public:
+ MYOptimization(Problem &p,int maxIter=-1);
+
+
+ MYOptimization(MYOptimization &o);
+
+
+ int acceptFlags[NUMBER_OF_ACCEPTIONS],acceptFlagsNumber;
+ int acceptions,total;
+
+ void makeGraphOutput();
- void makeGraphOutput();
-
};
#endif
diff --git a/mgizapp/src/mkcls/Optimization.cpp b/mgizapp/src/mkcls/Optimization.cpp
index 03e06df..60b3581 100644
--- a/mgizapp/src/mkcls/Optimization.cpp
+++ b/mgizapp/src/mkcls/Optimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
diff --git a/mgizapp/src/mkcls/Optimization.h b/mgizapp/src/mkcls/Optimization.h
index 4c43427..0a1d335 100644
--- a/mgizapp/src/mkcls/Optimization.h
+++ b/mgizapp/src/mkcls/Optimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -33,14 +33,14 @@ USA.
#include "Problem.h"
#include "general.h"
-class Optimization
+class Optimization
{
-
+
public:
- virtual double minimize(int steps)=0;
- virtual ~Optimization();
-
+ virtual double minimize(int steps)=0;
+ virtual ~Optimization();
+
};
#endif
diff --git a/mgizapp/src/mkcls/PopOptimization.cpp b/mgizapp/src/mkcls/PopOptimization.cpp
index 2e65a2c..70c9cfb 100644
--- a/mgizapp/src/mkcls/PopOptimization.cpp
+++ b/mgizapp/src/mkcls/PopOptimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -40,26 +40,32 @@ int compareProbAndOpt(const void *p,const void *j)
return +1;
}
bool operator<(const ProbAndOpt&a, const ProbAndOpt&b)
- {
- return a.prob->value()<b.prob->value();
- }
+{
+ return a.prob->value()<b.prob->value();
+}
bool operator==(const ProbAndOpt&a, const ProbAndOpt&b)
- {
- return a.prob->value()==b.prob->value();
- }
+{
+ return a.prob->value()==b.prob->value();
+}
+
+ostream& operator<<(ostream&o , const ProbAndOpt&)
+{
+ return o;
+}
+istream& operator>>(istream&i , ProbAndOpt&)
+{
+ return i;
+}
-ostream& operator<<(ostream&o , const ProbAndOpt&){return o;}
-istream& operator>>(istream&i , ProbAndOpt&){return i;}
-
-PopOptimization::PopOptimization(Problem &p,int verf,int anz)
-: probandopt(anz),initialisiert(0),verfahren(verf)
+PopOptimization::PopOptimization(Problem &p,int verf,int anz)
+ : probandopt(anz),initialisiert(0),verfahren(verf)
{
originalProblem = &p;
}
-
+
int PopOptimization::size()
{
return probandopt.size();
@@ -79,27 +85,26 @@ Optimization *PopOptimization::optimization(int i)
void PopOptimization::zInitialize()
{
- int i;
+ int i;
zufallSeed();
- for(i=0;i<size();i++)
- {
- probandopt[i].prob=originalProblem->makeEqualProblem();
- probandopt[i].prob->initialize();
- }
+ for(i=0; i<size(); i++) {
+ probandopt[i].prob=originalProblem->makeEqualProblem();
+ probandopt[i].prob->initialize();
+ }
zufallSeed();
- for(i=0;i<size();i++)
+ for(i=0; i<size(); i++)
probandopt[i].opt=(Optimization *)genIterOptimizer(verfahren,
- *(probandopt[i].prob),-1);
+ *(probandopt[i].prob),-1);
initialisiert=1;
}
-
+
void PopOptimization::sort()
{
assert(initialisiert);
-
+
probandopt.sort(size());
}
diff --git a/mgizapp/src/mkcls/PopOptimization.h b/mgizapp/src/mkcls/PopOptimization.h
index be8d4a2..93eb8dc 100644
--- a/mgizapp/src/mkcls/PopOptimization.h
+++ b/mgizapp/src/mkcls/PopOptimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,8 +32,7 @@ USA.
#include "Optimization.h"
-typedef struct
-{
+typedef struct {
Optimization *opt;
Problem *prob;
} ProbAndOpt;
@@ -47,43 +46,44 @@ inline DEFINE_STANDARD_COMPARE(ProbAndOpt);
int compareProbAndOpt(const void *p,const void *j);
-class PopOptimization : public Optimization {
+class PopOptimization : public Optimization
+{
+
+
+private:
+ Array<ProbAndOpt> probandopt;
+
+protected:
+ int initialisiert;
+ Problem *originalProblem;
+
+
+ int verfahren;
+
+
+ virtual void zInitialize();
+
+
+public:
+ PopOptimization(Problem &s,int verf,int anz);
+
+
+ virtual ~PopOptimization() {}
+
+ int size();
+
+ void sort();
- private:
- Array<ProbAndOpt> probandopt;
-
- protected:
- int initialisiert;
- Problem *originalProblem;
-
-
- int verfahren;
-
- virtual void zInitialize();
-
+ virtual Problem& bestProblem()=0;
- public:
- PopOptimization(Problem &s,int verf,int anz);
-
-
- virtual ~PopOptimization() {}
- int size();
-
-
- void sort();
-
+ Problem *problem(int i);
- virtual Problem& bestProblem()=0;
-
- Problem *problem(int i);
-
+ Optimization *optimization(int i);
- Optimization *optimization(int i);
-
};
#endif
diff --git a/mgizapp/src/mkcls/Problem.cpp b/mgizapp/src/mkcls/Problem.cpp
index 6e126c8..d57af3f 100644
--- a/mgizapp/src/mkcls/Problem.cpp
+++ b/mgizapp/src/mkcls/Problem.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -29,14 +29,14 @@ USA.
Problem::~Problem() {}
Problem::Problem(int max,int anz,int _initialisierung,int _auswertung,
- int _nachbarschaft)
-: initialized(0),curCompVal(0),curCompChange(0),maxCompVal(max),maxComp(anz),curComp(0),
- initialisierung(_initialisierung),auswertung(_auswertung),nachbarschaft(_nachbarschaft),
- numberOfFullEvaluations(0),numberOfPartEvaluations(0),numberOfDoChange(0)
-{
+ int _nachbarschaft)
+ : initialized(0),curCompVal(0),curCompChange(0),maxCompVal(max),maxComp(anz),curComp(0),
+ initialisierung(_initialisierung),auswertung(_auswertung),nachbarschaft(_nachbarschaft),
+ numberOfFullEvaluations(0),numberOfPartEvaluations(0),numberOfDoChange(0)
+{
if( verboseMode>1 )
- cout << "Initialization of Problem: " << maxComp << " " << maxCompVal
- << endl;
+ cout << "Initialization of Problem: " << maxComp << " " << maxCompVal
+ << endl;
}
void Problem::initialize(int i)
@@ -57,7 +57,7 @@ void Problem::doChange(ProblemChange &c)
assert (initialized);
curCompChange=1;
_doChange(c);
- numberOfDoChange++;
+ numberOfDoChange++;
}
void Problem::incrementDirection()
@@ -77,10 +77,10 @@ ProblemChange& Problem::change()
incrementDirection();
ProblemChange *p;
- int changeFound=_change(&p);
+ int changeFound=_change(&p);
curCompVal++;
if( changeFound==0 )
- return change();
+ return change();
else
return *p;
}
@@ -97,9 +97,11 @@ double Problem::valueChange(ProblemChange &x)
numberOfPartEvaluations++;
assert( initialized );
double currentValue=value();
- _doChange(x);numberOfDoChange++;
+ _doChange(x);
+ numberOfDoChange++;
double newValue=value();
- _undoChange(x);numberOfDoChange++;
+ _undoChange(x);
+ numberOfDoChange++;
assert( currentValue==value() );
return newValue-currentValue;
}
@@ -107,8 +109,8 @@ double Problem::valueChange(ProblemChange &x)
void Problem::dumpOn(ostream &strm)
{
assert( initialized );
- strm << "Problem(" << initialisierung << "," << auswertung << ","
- << nachbarschaft << ")\n";
+ strm << "Problem(" << initialisierung << "," << auswertung << ","
+ << nachbarschaft << ")\n";
strm << " #value: " << numberOfFullEvaluations << endl;
strm << "#valueChange: " << numberOfPartEvaluations << endl;
strm << " #doChange: " << numberOfDoChange << endl;
@@ -120,41 +122,46 @@ StatVar& Problem::deviationStatVar(Optimization &s,int anz)
StatVar &v=*new StatVar;
double cur=value();
int howOften=0;
- while( v.getNum()<anz )
- {
- if( howOften++>50000 )
- break;
- double neuer=s.minimize(1);
- if( neuer>cur )
- v.addValue(neuer-cur);
- cur=neuer;
- vassert(NULLFLOAT(cur-value()));
- }
+ while( v.getNum()<anz ) {
+ if( howOften++>50000 )
+ break;
+ double neuer=s.minimize(1);
+ if( neuer>cur )
+ v.addValue(neuer-cur);
+ cur=neuer;
+ vassert(NULLFLOAT(cur-value()));
+ }
return v;
}
void Problem::dumpInfos(ostream &strm)
{
- strm << "Problem: " << endl;
+ strm << "Problem: " << endl;
assert( initialized );
}
-double Problem::nicevalue(double)
-{
- return value();
+double Problem::nicevalue(double)
+{
+ return value();
}
-int Problem::maxDimensionVal(void) {return -1;}
-int Problem::maxDimension(void) {return -1;}
+int Problem::maxDimensionVal(void)
+{
+ return -1;
+}
+int Problem::maxDimension(void)
+{
+ return -1;
+}
ProblemChange::~ProblemChange()
- {
- }
-
+{
+}
+
ProblemChange::ProblemChange()
- {
- }
+{
+}
void Problem::setValuesFrom(Problem *p)
{
diff --git a/mgizapp/src/mkcls/Problem.h b/mgizapp/src/mkcls/Problem.h
index 337390e..1dd090c 100644
--- a/mgizapp/src/mkcls/Problem.h
+++ b/mgizapp/src/mkcls/Problem.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -28,7 +28,7 @@ USA.
#ifndef PROBLEMCHANGE
-#define PROBLEMCHANGE
+#define PROBLEMCHANGE
#include <iostream>
#include "general.h"
#include "StatVar.h"
@@ -38,122 +38,129 @@ class Optimization;
class ProblemChange
{
- public:
- virtual ~ProblemChange();
- ProblemChange();
+public:
+ virtual ~ProblemChange();
+ ProblemChange();
};
-class Problem {
+class Problem
+{
+
+private:
+ short initialized;
+ int curCompVal;
+ short curCompChange;
+ int maxCompVal;
+ int maxComp;
+
+
+protected:
+ int curComp;
+
+ void setValuesFrom(Problem *p);
+
+ virtual int maxDimensionVal(void) ;
+
+
+ virtual int maxDimension(void) ;
+
+
+ inline int curDimension(void) {
+ assert(maxComp!=-1);
+ return curComp;
+ }
+
+
+ inline int curDimensionVal(void) {
+ assert(maxComp!=-1);
+ return curCompVal;
+ }
+
+
+
+ virtual void _doChange(ProblemChange &c)=0;
+
+
+ virtual int _change(ProblemChange **p)=0;
+
- private:
- short initialized;
- int curCompVal;
- short curCompChange;
- int maxCompVal;
- int maxComp;
+ virtual void _undoChange(ProblemChange &c)=0;
-
- protected:
- int curComp;
- void setValuesFrom(Problem *p);
+ virtual void _initialize(int initialisierung)=0;
- virtual int maxDimensionVal(void) ;
-
- virtual int maxDimension(void) ;
-
-
- inline int curDimension(void) { assert(maxComp!=-1);return curComp;}
-
+ virtual double _value()=0;
- inline int curDimensionVal(void) { assert(maxComp!=-1);return curCompVal;}
-
+public:
+ Problem(int maxCompVal=-1,int maxComp=-1,int _initialisierung=0,
+ int _auswertung=0,int _nachbarschaft=0);
- virtual void _doChange(ProblemChange &c)=0;
-
+ virtual ~Problem();
- virtual int _change(ProblemChange **p)=0;
-
- virtual void _undoChange(ProblemChange &c)=0;
-
+ void doChange(ProblemChange &c);
- virtual void _initialize(int initialisierung)=0;
-
- virtual double _value()=0;
-
+ ProblemChange& change();
- public:
- Problem(int maxCompVal=-1,int maxComp=-1,int _initialisierung=0,
- int _auswertung=0,int _nachbarschaft=0);
- virtual ~Problem();
-
-
- void doChange(ProblemChange &c);
-
+ virtual double value();
- ProblemChange& change();
-
- virtual double value();
-
+ virtual double valueChange(ProblemChange &c);
- virtual double valueChange(ProblemChange &c);
-
- virtual void initialize(int a= -23);
-
+ virtual void initialize(int a= -23);
- inline virtual short endCriterion();
-
- virtual int maxNonBetterIterations()=0;
-
+ inline virtual short endCriterion();
- virtual int expectedNumberOfIterations()=0;
-
- virtual void dumpOn(ostream &strm);
-
+ virtual int maxNonBetterIterations()=0;
- virtual void dumpInfos(ostream &strm);
-
- virtual Problem *makeEqualProblem()=0;
-
+ virtual int expectedNumberOfIterations()=0;
+
+
+ virtual void dumpOn(ostream &strm);
+
+
+ virtual void dumpInfos(ostream &strm);
+
+
+ virtual Problem *makeEqualProblem()=0;
+
+
+ virtual double nicevalue(double vorher=1e100);
+
+
+ virtual StatVar& deviationStatVar(Optimization &s,int anz);
+
+
+ virtual void incrementDirection();
+
- virtual double nicevalue(double vorher=1e100);
-
- virtual StatVar& deviationStatVar(Optimization &s,int anz);
-
- virtual void incrementDirection();
-
-
-
-
int initialisierung;
int auswertung;
int nachbarschaft;
-
- int numberOfFullEvaluations;
- int numberOfPartEvaluations;
- int numberOfDoChange;
-
-
+ int numberOfFullEvaluations;
+ int numberOfPartEvaluations;
+ int numberOfDoChange;
+
+
+
};
inline short Problem::endCriterion()
-{
+{
return 0;
-};
+};
#endif
diff --git a/mgizapp/src/mkcls/ProblemTest.cpp b/mgizapp/src/mkcls/ProblemTest.cpp
index 675c8cb..8951159 100644
--- a/mgizapp/src/mkcls/ProblemTest.cpp
+++ b/mgizapp/src/mkcls/ProblemTest.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -55,38 +55,37 @@ int compareProblem(const void *p,const void *j)
IterOptimization *genIterOptimizer(int verfahren,Problem &problem,int maxIter)
{
IterOptimization *opt;
- switch(verfahren)
- {
- case HC_OPT:
- opt = new HCOptimization(problem,maxIter);
- break;
- case GDA_OPT:
- opt = new GDAOptimization(problem,maxIter);
- break;
- case SA_OPT:
- opt = new SAOptimization(problem,maxIter);
- break;
- case TA_OPT:
- opt = new TAOptimization(problem,maxIter);
- break;
- case RRT_OPT:
- opt = new RRTOptimization(problem,maxIter);
- break;
- case MY_OPT:
- opt = new MYOptimization(problem,maxIter);
- break;
- default:
- return 0;
- }
- problem.initialize();
+ switch(verfahren) {
+ case HC_OPT:
+ opt = new HCOptimization(problem,maxIter);
+ break;
+ case GDA_OPT:
+ opt = new GDAOptimization(problem,maxIter);
+ break;
+ case SA_OPT:
+ opt = new SAOptimization(problem,maxIter);
+ break;
+ case TA_OPT:
+ opt = new TAOptimization(problem,maxIter);
+ break;
+ case RRT_OPT:
+ opt = new RRTOptimization(problem,maxIter);
+ break;
+ case MY_OPT:
+ opt = new MYOptimization(problem,maxIter);
+ break;
+ default:
+ return 0;
+ }
+ problem.initialize();
return opt;
}
double solveProblem(int verbose,Problem &problem,int versuche,
- int optimierungsschritte,int verfahren,double &mean,
- StatVar &endNice,StatVar &auswertungen,StatVar &startNice,
- double maxClock,int *iterationsschritte)
+ int optimierungsschritte,int verfahren,double &mean,
+ StatVar &endNice,StatVar &auswertungen,StatVar &startNice,
+ double maxClock,int *iterationsschritte)
{
double smallestV=1e100;
Problem *bestP=0;
@@ -94,101 +93,104 @@ double solveProblem(int verbose,Problem &problem,int versuche,
StatVar dauer;
StatVar iterschritte;
- for(int i=0;i<versuche;i++)
- {
- if(verbose>2)
- {
- cout << " " << i << " of " << versuche << ".\n";
- cout.flush();
- }
- double vorher=clockSec();
-
- IterOptimization *opt=genIterOptimizer(verfahren,problem,
- optimierungsschritte);
- problem.numberOfPartEvaluations=0;
-
- startNice.addValue(problem.nicevalue());
- start.addValue(problem.value());
-
- double v=opt->minimize(optimierungsschritte);
-
- if( problem.numberOfPartEvaluations==0)
+ for(int i=0; i<versuche; i++) {
+ if(verbose>2) {
+ cout << " " << i << " of " << versuche << ".\n";
+ cout.flush();
+ }
+ double vorher=clockSec();
+
+ IterOptimization *opt=genIterOptimizer(verfahren,problem,
+ optimierungsschritte);
+ problem.numberOfPartEvaluations=0;
+
+ startNice.addValue(problem.nicevalue());
+ start.addValue(problem.value());
+
+ double v=opt->minimize(optimierungsschritte);
+
+ if( problem.numberOfPartEvaluations==0)
auswertungen.addValue(opt->getCurStep());
- else
+ else
auswertungen.addValue(problem.numberOfPartEvaluations);
- iterschritte.addValue(opt->getCurStep());
-
- endNice.addValue(problem.nicevalue());
- end.addValue(problem.value());
- dauer.addValue(clockSec()-vorher);
- if( verbose>2 )
- {
- cout << i << ". " << v << ": ";
- problem.dumpOn(cout);
- }
- delete opt;
- if( v<smallestV && verbose>1 )
- {
- bestP=problem.makeEqualProblem();
- smallestV=v;
- }
- if( verbose>2 )
- cout << " time: " << clockSec() << " best:" << endNice.quantil(0)
- << " this:" << problem.nicevalue() << endl;
- if( maxClock && clockSec()>maxClock )
- {
- if(verbose)
- cout << "Stop because of time limit ( " << (clockSec()-maxClock)
- << " Sekunden)\n";
- break;
- }
+ iterschritte.addValue(opt->getCurStep());
+
+ endNice.addValue(problem.nicevalue());
+ end.addValue(problem.value());
+ dauer.addValue(clockSec()-vorher);
+ if( verbose>2 ) {
+ cout << i << ". " << v << ": ";
+ problem.dumpOn(cout);
+ }
+ delete opt;
+ if( v<smallestV && verbose>1 ) {
+ bestP=problem.makeEqualProblem();
+ smallestV=v;
}
-
- if(verbose)
- {
- cout << "\n***** " << start.getNum() << " runs. (algorithm:";
- switch(verfahren)
- {
- case HC_OPT:
- cout << "HC";
- break;
- case RRT_OPT:
- cout << "RRT";
- break;
- case GDA_OPT:
- cout << "GDA";
- break;
- case TA_OPT:
- cout << "TA";
- break;
- case SA_OPT:
- cout << "SA";
- break;
- case MY_OPT:
- cout << "MY";
- break;
- default:
- cout << "!unknown!";
- }
- cout << ")*****\n";
- problem.dumpInfos(cout);
- cout << endl;
- cout << "start-costs: "; start.dumpOn(cout); cout << endl;
- cout << " end-costs: "; end.dumpOn(cout); cout << endl;
- cout << " start-pp: "; startNice.dumpOn(cout); cout << endl;
- cout << " end-pp: "; endNice.dumpOn(cout); cout << endl;
- cout << " iterations: "; auswertungen.dumpOn(cout); cout << endl;
- cout << " time: "; dauer.dumpOn(cout);
- cout << endl;
+ if( verbose>2 )
+ cout << " time: " << clockSec() << " best:" << endNice.quantil(0)
+ << " this:" << problem.nicevalue() << endl;
+ if( maxClock && clockSec()>maxClock ) {
+ if(verbose)
+ cout << "Stop because of time limit ( " << (clockSec()-maxClock)
+ << " Sekunden)\n";
+ break;
}
- if( bestP )
- {
- if(PrintBestTo)
- bestP->dumpOn(*PrintBestTo);
- else
- bestP->dumpOn(cout);
- delete bestP;
+ }
+
+ if(verbose) {
+ cout << "\n***** " << start.getNum() << " runs. (algorithm:";
+ switch(verfahren) {
+ case HC_OPT:
+ cout << "HC";
+ break;
+ case RRT_OPT:
+ cout << "RRT";
+ break;
+ case GDA_OPT:
+ cout << "GDA";
+ break;
+ case TA_OPT:
+ cout << "TA";
+ break;
+ case SA_OPT:
+ cout << "SA";
+ break;
+ case MY_OPT:
+ cout << "MY";
+ break;
+ default:
+ cout << "!unknown!";
}
+ cout << ")*****\n";
+ problem.dumpInfos(cout);
+ cout << endl;
+ cout << "start-costs: ";
+ start.dumpOn(cout);
+ cout << endl;
+ cout << " end-costs: ";
+ end.dumpOn(cout);
+ cout << endl;
+ cout << " start-pp: ";
+ startNice.dumpOn(cout);
+ cout << endl;
+ cout << " end-pp: ";
+ endNice.dumpOn(cout);
+ cout << endl;
+ cout << " iterations: ";
+ auswertungen.dumpOn(cout);
+ cout << endl;
+ cout << " time: ";
+ dauer.dumpOn(cout);
+ cout << endl;
+ }
+ if( bestP ) {
+ if(PrintBestTo)
+ bestP->dumpOn(*PrintBestTo);
+ else
+ bestP->dumpOn(cout);
+ delete bestP;
+ }
mean = end.getMean();
if( iterationsschritte )
*iterationsschritte=(int)(iterschritte.getMean());
@@ -199,24 +201,23 @@ double solveProblem(int verbose,Problem &problem,int versuche,
void multiSolveProblem(Problem &problem,int versuche,int maxSeconds)
{
- int i;
+ int i;
int maxLaeufe;
double rDummy;
StatVar end[MAX_OPT_NR],auswertungen[MAX_OPT_NR],start[MAX_OPT_NR];
double maxClock=clockSec()+maxSeconds;
if(maxSeconds<=0)maxClock=0;
solveProblem(ProblemTestVerboseMode,problem,versuche,-1,HC_OPT,rDummy,
- end[HC_OPT],auswertungen[HC_OPT],start[HC_OPT],maxClock);
+ end[HC_OPT],auswertungen[HC_OPT],start[HC_OPT],maxClock);
maxLaeufe=(int)(auswertungen[HC_OPT].getMean()*5);
- for(i=0;i<MAX_OPT_NR;i++)
- {
- if( i==HC_OPT )
- continue;
- double maxClock=clockSec()+maxSeconds;
- if(maxSeconds<=0)maxClock=0;
- solveProblem(ProblemTestVerboseMode,problem,versuche, -1,i,rDummy,end[i],
- auswertungen[i],start[i],maxClock);
- }
+ for(i=0; i<MAX_OPT_NR; i++) {
+ if( i==HC_OPT )
+ continue;
+ double maxClock=clockSec()+maxSeconds;
+ if(maxSeconds<=0)maxClock=0;
+ solveProblem(ProblemTestVerboseMode,problem,versuche, -1,i,rDummy,end[i],
+ auswertungen[i],start[i],maxClock);
+ }
end[HC_OPT].title = " HC";
end[SA_OPT].title = " SA";
end[GDA_OPT].title = " GDA";
@@ -224,28 +225,28 @@ void multiSolveProblem(Problem &problem,int versuche,int maxSeconds)
end[TA_OPT].title = " TA";
end[MY_OPT].title = " MY";
- for(i=0;i<MAX_OPT_NR;i++)
+ for(i=0; i<MAX_OPT_NR; i++)
end[i].quantil(0.5);
cout << "mean: \n";
compareStatVarQuantil=-1;
qsort(end,MAX_OPT_NR,sizeof(StatVar),compareStatVar);
- for(i=0;i<MAX_OPT_NR;i++)
+ for(i=0; i<MAX_OPT_NR; i++)
cout << end[i].title << " " << end[i].getMean() << endl;
cout << "\nbest: \n";
compareStatVarQuantil=0;
qsort(end,MAX_OPT_NR,sizeof(StatVar),compareStatVar);
- for(i=0;i<MAX_OPT_NR;i++)
- cout << end[i].title << " " << end[i].quantil(compareStatVarQuantil)
- << endl;
+ for(i=0; i<MAX_OPT_NR; i++)
+ cout << end[i].title << " " << end[i].quantil(compareStatVarQuantil)
+ << endl;
cout << "\n20%-quantil: \n";
compareStatVarQuantil=0.2;
qsort(end,MAX_OPT_NR,sizeof(StatVar),compareStatVar);
- for(i=0;i<MAX_OPT_NR;i++)
- cout << end[i].title << " " << end[i].quantil(compareStatVarQuantil)
- << endl;
+ for(i=0; i<MAX_OPT_NR; i++)
+ cout << end[i].title << " " << end[i].quantil(compareStatVarQuantil)
+ << endl;
}
diff --git a/mgizapp/src/mkcls/ProblemTest.h b/mgizapp/src/mkcls/ProblemTest.h
index 4bd8bda..63e7a52 100644
--- a/mgizapp/src/mkcls/ProblemTest.h
+++ b/mgizapp/src/mkcls/ProblemTest.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -44,8 +44,8 @@ extern short ProblemTestVerboseMode;
extern ofstream *PrintBestTo,*PrintBestTo2;
double solveProblem(int verbose,Problem &problem,int versuche,
-int optimierungsschritte,int verfahren,double &mean,StatVar &endValue,
-StatVar &laufzeit,StatVar &initValue,double maxSec= 0,int *iterationsschritte=0);
+ int optimierungsschritte,int verfahren,double &mean,StatVar &endValue,
+ StatVar &laufzeit,StatVar &initValue,double maxSec= 0,int *iterationsschritte=0);
diff --git a/mgizapp/src/mkcls/RRTOptimization.cpp b/mgizapp/src/mkcls/RRTOptimization.cpp
index 55e2122..2fd6860 100644
--- a/mgizapp/src/mkcls/RRTOptimization.cpp
+++ b/mgizapp/src/mkcls/RRTOptimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -31,24 +31,24 @@ double RRTOptimization::defaultAnnRate=0.6;
double RRTOptimization::defaultMultiple=2.0;
-
-RRTOptimization::RRTOptimization(Problem &p,double t,double dt,int m)
-: IterOptimization(p,m),deviation(t),deltaDeviation(dt)
-{
+
+RRTOptimization::RRTOptimization(Problem &p,double t,double dt,int m)
+ : IterOptimization(p,m),deviation(t),deltaDeviation(dt)
+{
assert(deviation>=0);
}
-
-RRTOptimization:: RRTOptimization(Problem &p,int m)
-: IterOptimization(p,m),deviation(-1),deltaDeviation(0)
+
+RRTOptimization:: RRTOptimization(Problem &p,int m)
+ : IterOptimization(p,m),deviation(-1),deltaDeviation(0)
{
}
-
+
RRTOptimization::RRTOptimization(RRTOptimization &o)
-: IterOptimization(o)
+ : IterOptimization(o)
{
deviation = o.deviation;
deltaDeviation= o.deltaDeviation;
@@ -56,37 +56,36 @@ RRTOptimization::RRTOptimization(RRTOptimization &o)
}
-
+
void RRTOptimization::zInitialize()
{
IterOptimization::zInitialize();
- if( deviation<0 )
- {
-
-
- int n;
-
- StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
-
- if( maxStep>0 )
- n=(int)(maxStep*4.0/5.0);
- else
- maxStep=n=(int)(problem.expectedNumberOfIterations()*defaultMultiple);
-
- deviation = v.quantil(defaultAnnRate);
- deltaDeviation = deviation/(float)n;
-
- if( verboseMode>0 )
- cout << "#Algorithm: Record-To-Record-Travel: (anfAnnRate="
- << defaultAnnRate << ",T=" << deviation << ",deltaT="
- << deltaDeviation << ")\n";
-
- curStep=0;
- endFlag=0;
- delete &v;
- problem.initialize();
- IterOptimization::zInitialize();
- }
+ if( deviation<0 ) {
+
+
+ int n;
+
+ StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
+
+ if( maxStep>0 )
+ n=(int)(maxStep*4.0/5.0);
+ else
+ maxStep=n=(int)(problem.expectedNumberOfIterations()*defaultMultiple);
+
+ deviation = v.quantil(defaultAnnRate);
+ deltaDeviation = deviation/(float)n;
+
+ if( verboseMode>0 )
+ cout << "#Algorithm: Record-To-Record-Travel: (anfAnnRate="
+ << defaultAnnRate << ",T=" << deviation << ",deltaT="
+ << deltaDeviation << ")\n";
+
+ curStep=0;
+ endFlag=0;
+ delete &v;
+ problem.initialize();
+ IterOptimization::zInitialize();
+ }
record=problem.value();
assert(deviation>=0);
}
@@ -97,28 +96,24 @@ short RRTOptimization::end()
}
void RRTOptimization::abkuehlen()
{
- if( deviation>=0 )
- {
- deviation -= deltaDeviation;
- if(deviation<0)
- deviation=0;
- }
+ if( deviation>=0 ) {
+ deviation -= deltaDeviation;
+ if(deviation<0)
+ deviation=0;
+ }
}
short RRTOptimization::accept(double delta)
{
if( deviation<0 )
return 1;
- else
- {
- if( delta + curValue - deviation < record )
- {
- if( delta + curValue < record )
- record = delta+curValue;
- return 1;
- }
- else
- return 0;
- }
+ else {
+ if( delta + curValue - deviation < record ) {
+ if( delta + curValue < record )
+ record = delta+curValue;
+ return 1;
+ } else
+ return 0;
+ }
}
void RRTOptimization::makeGraphOutput()
@@ -129,89 +124,80 @@ void RRTOptimization::makeGraphOutput()
-
+
double RRTOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
- int optimierungsschritte,int print)
+ int optimierungsschritte,int print)
{
- switch(typ)
- {
- case 1:
- {
- double bestPar=-1,best=1e100;
- if( print )
- cout << "#RRT-optimizeValues: Quantil: " << numParameter << endl;
- for(int i=0;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- if(i==0) defaultAnnRate=0.2;
- else defaultAnnRate = 0.3+(float)(0.6*i)/numParameter;
- solveProblem(0,p,proParameter,optimierungsschritte,RRT_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAnnRate;
- }
- if( print )
- {
- cout << defaultAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultAnnRate=0.8;
- return bestPar;
+ switch(typ) {
+ case 1: {
+ double bestPar=-1,best=1e100;
+ if( print )
+ cout << "#RRT-optimizeValues: Quantil: " << numParameter << endl;
+ for(int i=0; i<=numParameter; i++) {
+ StatVar end,laufzeit,init;
+ double now;
+ if(i==0) defaultAnnRate=0.2;
+ else defaultAnnRate = 0.3+(float)(0.6*i)/numParameter;
+ solveProblem(0,p,proParameter,optimierungsschritte,RRT_OPT,now,
+ end,laufzeit,init);
+ if( best>now ) {
+ best=now;
+ bestPar=defaultAnnRate;
+ }
+ if( print ) {
+ cout << defaultAnnRate << " ";
+ cout << end.getMean() << " " << end.quantil(0.2) << " "
+ << end.quantil(0.79) << " " << laufzeit.getMean() << " "
+ << end.quantil(0.0) << " " << end.getSigma() << " "
+ << end.getSigmaSmaller() << " " << end.getSigmaBigger()
+ << " " << now << endl;
+ }
+ }
+ if( print )
+ cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
+ "Bester Sigma SigmaSmaller SigmaBigger\n";
+ defaultAnnRate=0.8;
+ return bestPar;
+ }
+ break;
+ case 10: {
+ double i;
+ double bestPar=-1,best=1e100;
+ StatVar end,laufzeit,init;
+
+ if( print )
+ cout << "#RRT-optimizeValues: defaultMultiple" << 8 << endl;
+ for(i=0.5; i<=10; i+=1.5) {
+ double now;
+ defaultMultiple = i;
+ solveProblem(0,p,proParameter,optimierungsschritte,RRT_OPT,now,
+ end,laufzeit,init);
+ if( best>now ) {
+ best=now;
+ bestPar=defaultMultiple;
}
- break;
- case 10:
- {
- double i;
- double bestPar=-1,best=1e100;
- StatVar end,laufzeit,init;
-
- if( print )
- cout << "#RRT-optimizeValues: defaultMultiple" << 8 << endl;
- for(i=0.5;i<=10;i+=1.5)
- {
- double now;
- defaultMultiple = i;
- solveProblem(0,p,proParameter,optimierungsschritte,RRT_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultMultiple;
- }
- if( print )
- {
- cout << defaultMultiple << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultMultiple=2.0;
- return bestPar;
+ if( print ) {
+ cout << defaultMultiple << " ";
+ cout << end.getMean() << " " << end.quantil(0.2) << " "
+ << end.quantil(0.79) << " " << laufzeit.getMean() << " "
+ << end.quantil(0.0) << " " << end.getSigma() << " "
+ << end.getSigmaSmaller() << " " << end.getSigmaBigger()
+ << " " << now << endl;
}
- break;
- default:
- cerr << "Error: wrong parameter-type in RRTOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
}
- return 1e100;
+ if( print )
+ cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
+ "Bester Sigma SigmaSmaller SigmaBigger\n";
+ defaultMultiple=2.0;
+ return bestPar;
+ }
+ break;
+ default:
+ cerr << "Error: wrong parameter-type in RRTOptimization::optimizeValue ("
+ << typ << ")\n";
+ exit(1);
+ }
+ return 1e100;
}
diff --git a/mgizapp/src/mkcls/RRTOptimization.h b/mgizapp/src/mkcls/RRTOptimization.h
index 42ec6e2..2b5f59b 100644
--- a/mgizapp/src/mkcls/RRTOptimization.h
+++ b/mgizapp/src/mkcls/RRTOptimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -31,49 +31,50 @@ USA.
#define RRTOPTIMIZATION
#include "IterOptimization.h"
-class RRTOptimization : public IterOptimization {
-
+class RRTOptimization : public IterOptimization
+{
- private:
- double deviation;
- double deltaDeviation;
- double record;
- protected:
- virtual void zInitialize();
-
+private:
+ double deviation;
+ double deltaDeviation;
+ double record;
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
+protected:
+ virtual void zInitialize();
- virtual short end();
-
- virtual void makeGraphOutput();
-
+ virtual short accept(double delta);
- public:
- RRTOptimization(Problem &p,double temperatur,
- double deltaTemperatur,int maxIter=-1);
-
- RRTOptimization(Problem &p,int maxIter=-1);
-
+ virtual void abkuehlen();
- RRTOptimization(RRTOptimization &o);
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,int schritte= -1,int verbose=1);
-
+ virtual short end();
+
+
+ virtual void makeGraphOutput();
+
+
+public:
+ RRTOptimization(Problem &p,double temperatur,
+ double deltaTemperatur,int maxIter=-1);
+
+
+ RRTOptimization(Problem &p,int maxIter=-1);
+
+
+ RRTOptimization(RRTOptimization &o);
+
+
+ static double optimizeValue(Problem &p,int proParameter,
+ int numParameter,int typ,int schritte= -1,int verbose=1);
+
+
+ static double defaultAnnRate;
+
+ static double defaultMultiple;
- static double defaultAnnRate;
-
- static double defaultMultiple;
-
};
#endif
diff --git a/mgizapp/src/mkcls/SAOptimization.cpp b/mgizapp/src/mkcls/SAOptimization.cpp
index d5ebd26..3be62be 100644
--- a/mgizapp/src/mkcls/SAOptimization.cpp
+++ b/mgizapp/src/mkcls/SAOptimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -31,33 +31,33 @@ USA.
#include "ProblemTest.h"
-#define ALPHA 0.95
+#define ALPHA 0.95
double SAOptimization::defaultAnfAnnRate=0.9;
double SAOptimization::defaultEndAnnRate=1e-9;
double SAOptimization::defaultMultiple=2.0;
-
+
SAOptimization::SAOptimization(Problem &p,int m)
-: IterOptimization(p,m), temperatur(-1)
+ : IterOptimization(p,m), temperatur(-1)
{
}
-
-SAOptimization::SAOptimization(Problem &p,double t,double a,int s,int m)
-: IterOptimization(p,m),temperatur(t), alpha(a),schrittzahl(s)
+
+SAOptimization::SAOptimization(Problem &p,double t,double a,int s,int m)
+ : IterOptimization(p,m),temperatur(t), alpha(a),schrittzahl(s)
{
assert(alpha<1);
assert(schrittzahl>0);
assert(t>0);
}
-
+
SAOptimization::SAOptimization(SAOptimization &o)
-: IterOptimization(o)
+ : IterOptimization(o)
{
temperatur = o.temperatur;
endTemperatur = o.endTemperatur;
@@ -66,42 +66,41 @@ SAOptimization::SAOptimization(SAOptimization &o)
stepsForAbkuehlung = o.stepsForAbkuehlung;
}
-
+
void SAOptimization::zInitialize()
{
IterOptimization::zInitialize();
- if( temperatur<0)
- {
-
-
-
- StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
-
- if( maxStep>0 )
- stepsForAbkuehlung=(int)(maxStep*4.0/5.0);
- else
- maxStep=stepsForAbkuehlung=(int)(problem.expectedNumberOfIterations()*
- defaultMultiple);
-
- temperatur = v.getMean()/log(1/defaultAnfAnnRate);
- endTemperatur = v.getMean()/log(1/defaultEndAnnRate);
- schrittzahl = (int)(stepsForAbkuehlung/(log(endTemperatur/temperatur)/
- log(ALPHA)));
- if(schrittzahl==0)schrittzahl=1;
- alpha = ALPHA;
-
- if( verboseMode )
- cout << "#Algorithm: Simulated Annealing(anfAnnRate="
- << defaultAnfAnnRate <<",(endAnnRate=" << defaultEndAnnRate
- << ",T0=" << temperatur<< ",Te=" << endTemperatur<< ",schrittzahl="
- << schrittzahl<< ",stepsForAbkuehlung=" << stepsForAbkuehlung
- << ")\n";
- curStep=0;
- endFlag=0;
- delete &v;
- problem.initialize();
- IterOptimization::zInitialize();
- }
+ if( temperatur<0) {
+
+
+
+ StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
+
+ if( maxStep>0 )
+ stepsForAbkuehlung=(int)(maxStep*4.0/5.0);
+ else
+ maxStep=stepsForAbkuehlung=(int)(problem.expectedNumberOfIterations()*
+ defaultMultiple);
+
+ temperatur = v.getMean()/log(1/defaultAnfAnnRate);
+ endTemperatur = v.getMean()/log(1/defaultEndAnnRate);
+ schrittzahl = (int)(stepsForAbkuehlung/(log(endTemperatur/temperatur)/
+ log(ALPHA)));
+ if(schrittzahl==0)schrittzahl=1;
+ alpha = ALPHA;
+
+ if( verboseMode )
+ cout << "#Algorithm: Simulated Annealing(anfAnnRate="
+ << defaultAnfAnnRate <<",(endAnnRate=" << defaultEndAnnRate
+ << ",T0=" << temperatur<< ",Te=" << endTemperatur<< ",schrittzahl="
+ << schrittzahl<< ",stepsForAbkuehlung=" << stepsForAbkuehlung
+ << ")\n";
+ curStep=0;
+ endFlag=0;
+ delete &v;
+ problem.initialize();
+ IterOptimization::zInitialize();
+ }
}
short SAOptimization::end()
@@ -115,40 +114,35 @@ short SAOptimization::end()
}
void SAOptimization::abkuehlen()
{
- if(temperatur>=0)
- {
- if( curStep%schrittzahl == 0 )
- temperatur=temperatur * alpha;
- if( curStep> stepsForAbkuehlung)
- temperatur = 0;
- }
+ if(temperatur>=0) {
+ if( curStep%schrittzahl == 0 )
+ temperatur=temperatur * alpha;
+ if( curStep> stepsForAbkuehlung)
+ temperatur = 0;
+ }
}
short SAOptimization::accept(double delta)
{
if( temperatur<0 )
return 1;
- else
- {
- if( delta > 0 )
- {
- if( temperatur==0 )
- return 0;
- else
- {
- double z=zufall01();
- assert(z!=0.0);
- if(z==0.0)
- z+=1e-20;
- double e=exp(-delta/temperatur);
-
-
-
- return z+0.000000000001<=e;
- }
- }
- else
- return 1;
- }
+ else {
+ if( delta > 0 ) {
+ if( temperatur==0 )
+ return 0;
+ else {
+ double z=zufall01();
+ assert(z!=0.0);
+ if(z==0.0)
+ z+=1e-20;
+ double e=exp(-delta/temperatur);
+
+
+
+ return z+0.000000000001<=e;
+ }
+ } else
+ return 1;
+ }
}
void SAOptimization::makeGraphOutput()
@@ -159,121 +153,108 @@ void SAOptimization::makeGraphOutput()
-
+
double SAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,
- int typ,int optimierungsschritte,int print)
+ int typ,int optimierungsschritte,int print)
{
- switch(typ)
- {
- case 1:
- {
- double bestPar=-1,best=1e100;
- double now;
- if( print )
- cout << "#SA-optimizeValues: defaultAnfAnnRate" << endl;
- for(int i=0;i<numParameter;i++)
- {
- StatVar end,laufzeit,init;
- defaultAnfAnnRate=0.1 + (1.0/numParameter)*i;
- solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAnfAnnRate;
- }
- if( print )
- {
- cout << defaultAnfAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultAnfAnnRate=0.9;
- return bestPar;
+ switch(typ) {
+ case 1: {
+ double bestPar=-1,best=1e100;
+ double now;
+ if( print )
+ cout << "#SA-optimizeValues: defaultAnfAnnRate" << endl;
+ for(int i=0; i<numParameter; i++) {
+ StatVar end,laufzeit,init;
+ defaultAnfAnnRate=0.1 + (1.0/numParameter)*i;
+ solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,
+ end,laufzeit,init);
+ if( best>now ) {
+ best=now;
+ bestPar=defaultAnfAnnRate;
}
- break;
- case 2:
- {
- double bestPar=-1,best=1e100;
- double now;
- if( print )
- cout << "#Optimierung von SA: defaultEndAnnRate" << endl;
- for(int i=1;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- defaultEndAnnRate=1/(pow(10.0,i));
- solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultEndAnnRate;
- }
- if( print )
- {
- cout << defaultEndAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultEndAnnRate=1/10000.0;
- return bestPar;
+ if( print ) {
+ cout << defaultAnfAnnRate << " ";
+ cout << end.getMean() << " " << end.quantil(0.2) << " "
+ << end.quantil(0.79) << " " << laufzeit.getMean() << " "
+ << end.quantil(0.0) << " " << end.getSigma() << " "
+ << end.getSigmaSmaller() << " " << end.getSigmaBigger()
+ << " " << now << endl;
+ }
+ }
+ if( print )
+ cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
+ "Bester Sigma SigmaSmaller SigmaBigger\n";
+ defaultAnfAnnRate=0.9;
+ return bestPar;
+ }
+ break;
+ case 2: {
+ double bestPar=-1,best=1e100;
+ double now;
+ if( print )
+ cout << "#Optimierung von SA: defaultEndAnnRate" << endl;
+ for(int i=1; i<=numParameter; i++) {
+ StatVar end,laufzeit,init;
+ defaultEndAnnRate=1/(pow(10.0,i));
+ solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,end,
+ laufzeit,init);
+ if( best>now ) {
+ best=now;
+ bestPar=defaultEndAnnRate;
+ }
+ if( print ) {
+ cout << defaultEndAnnRate << " ";
+ cout << end.getMean() << " " << end.quantil(0.2) << " "
+ << end.quantil(0.79) << " " << laufzeit.getMean() << " "
+ << end.quantil(0.0) << " " << end.getSigma() << " "
+ << end.getSigmaSmaller() << " " << end.getSigmaBigger()
+ << " " << now << endl;
+ }
+ }
+ if( print )
+ cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
+ "Bester Sigma SigmaSmaller SigmaBigger\n";
+ defaultEndAnnRate=1/10000.0;
+ return bestPar;
+ }
+ break;
+ case 10: {
+ double bestPar=-1,best=1e100;
+
+ if( print )
+ cout << "#SA-optimizeValues: defaultMultiple " << 8 << endl;
+ for(int i=1; i<=6; i++) {
+ StatVar end,laufzeit,init;
+ double now;
+ defaultMultiple = i;
+ solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,end,
+ laufzeit,init);
+ if( best>now ) {
+ best=now;
+ bestPar=defaultMultiple;
}
- break;
- case 10:
- {
- double bestPar=-1,best=1e100;
-
- if( print )
- cout << "#SA-optimizeValues: defaultMultiple " << 8 << endl;
- for(int i=1;i<=6;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- defaultMultiple = i;
- solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultMultiple;
- }
- if( print )
- {
- cout << defaultMultiple << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultMultiple=2.0;
- return bestPar;
+ if( print ) {
+ cout << defaultMultiple << " ";
+ cout << end.getMean() << " " << end.quantil(0.2) << " "
+ << end.quantil(0.79) << " " << laufzeit.getMean() << " "
+ << end.quantil(0.0) << " " << end.getSigma() << " "
+ << end.getSigmaSmaller() << " " << end.getSigmaBigger()
+ << " " << now << endl;
}
- break;
- default:
- cerr << "Error: wrong parameter-type in SAOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
}
- return 1e100;
+ if( print )
+ cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
+ "Bester Sigma SigmaSmaller SigmaBigger\n";
+ defaultMultiple=2.0;
+ return bestPar;
+ }
+ break;
+ default:
+ cerr << "Error: wrong parameter-type in SAOptimization::optimizeValue ("
+ << typ << ")\n";
+ exit(1);
+ }
+ return 1e100;
}
diff --git a/mgizapp/src/mkcls/SAOptimization.h b/mgizapp/src/mkcls/SAOptimization.h
index 97c528b..b9fb929 100644
--- a/mgizapp/src/mkcls/SAOptimization.h
+++ b/mgizapp/src/mkcls/SAOptimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,55 +32,55 @@ USA.
#include "IterOptimization.h"
class SAOptimization : public IterOptimization
- {
-
-
- private:
- double temperatur;
- double endTemperatur;
- double alpha;
- int schrittzahl;
- int stepsForAbkuehlung;
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- SAOptimization(Problem &p,double temperatur,double alpha,
- int schrittzahl,int maxIter=-1);
-
-
- SAOptimization(Problem &p,int maxIter=-1);
-
-
- SAOptimization(SAOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,
- int schritte= -1,int verbose=1);
-
-
- static double defaultAnfAnnRate;
-
- static double defaultEndAnnRate;
-
- static double defaultMultiple;
-
-
-};
-#endif
-
+{
+
+
+private:
+ double temperatur;
+ double endTemperatur;
+ double alpha;
+ int schrittzahl;
+ int stepsForAbkuehlung;
+
+protected:
+ virtual void zInitialize();
+
+
+ virtual short accept(double delta);
+
+
+ virtual void abkuehlen();
+
+
+ virtual short end();
+
+
+ virtual void makeGraphOutput();
+
+
+public:
+ SAOptimization(Problem &p,double temperatur,double alpha,
+ int schrittzahl,int maxIter=-1);
+
+
+ SAOptimization(Problem &p,int maxIter=-1);
+
+
+ SAOptimization(SAOptimization &o);
+
+
+ static double optimizeValue(Problem &p,int proParameter,
+ int numParameter,int typ,
+ int schritte= -1,int verbose=1);
+
+
+ static double defaultAnfAnnRate;
+
+ static double defaultEndAnnRate;
+
+ static double defaultMultiple;
+
+
+};
+#endif
+
diff --git a/mgizapp/src/mkcls/StatVar.cpp b/mgizapp/src/mkcls/StatVar.cpp
index a4605ea..ffd3703 100644
--- a/mgizapp/src/mkcls/StatVar.cpp
+++ b/mgizapp/src/mkcls/StatVar.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -27,10 +27,10 @@ USA.
#include <iostream>
#include <cstdlib>
-double compareStatVarQuantil=-1;
+double compareStatVarQuantil=-1;
StatV::~StatV() {}
-
+
int doublecompare(const void *p,const void *j)
{
@@ -46,16 +46,13 @@ int compareStatVar(const void *p,const void *j)
{
double a;
double b;
- if(compareStatVarQuantil>=0)
- {
- a=((StatVar *)p)->quantil(compareStatVarQuantil);
- b=((StatVar *)j)->quantil(compareStatVarQuantil);
- }
- else
- {
- a=((StatVar *)p)->getMean();
- b=((StatVar *)j)->getMean();
- }
+ if(compareStatVarQuantil>=0) {
+ a=((StatVar *)p)->quantil(compareStatVarQuantil);
+ b=((StatVar *)j)->quantil(compareStatVarQuantil);
+ } else {
+ a=((StatVar *)p)->getMean();
+ b=((StatVar *)j)->getMean();
+ }
if(a==b)
return 0;
if(a<b)
@@ -69,14 +66,12 @@ double StatVar::getSigmaSmaller()
{
double ss=0;
int ns=0;
- for(int i=0;i<n;i++)
- {
- if( values[i]<getMean() )
- {
- ss+=(values[i]-getMean())*(values[i]-getMean());
- ns++;
- }
+ for(int i=0; i<n; i++) {
+ if( values[i]<getMean() ) {
+ ss+=(values[i]-getMean())*(values[i]-getMean());
+ ns++;
}
+ }
if( ss/ns>0 )
return sqrt(ss/ns);
else
@@ -86,12 +81,11 @@ double StatVar::getSigmaBigger()
{
double ss=0;
int ns=0;
- for(int i=0;i<n;i++)
- if( values[i]>getMean() )
- {
- ss+=(values[i]-getMean())*(values[i]-getMean());
- ns++;
- }
+ for(int i=0; i<n; i++)
+ if( values[i]>getMean() ) {
+ ss+=(values[i]-getMean())*(values[i]-getMean());
+ ns++;
+ }
if( ss/ns>0 )
return sqrt(ss/ns);
else
@@ -99,42 +93,39 @@ double StatVar::getSigmaBigger()
}
-
+
void StatV::dumpOn(ostream &strm)
{
- strm << "MEAN: " << getMean() << " (" << smallest << "-" << biggest
- << ") SIGMA:" << getSigma()<< " ";
+ strm << "MEAN: " << getMean() << " (" << smallest << "-" << biggest
+ << ") SIGMA:" << getSigma()<< " ";
}
-
+
double StatVar::quantil(double percent)
{
int index=(int)(n*percent);
if(index==n)
index=n-1;
assert(index>=0&&index<n);
- if(sortedFlag==0)
- {
- qsort(values.getPointerToData(),n,sizeof(double),doublecompare);
- assert(n<=values.size());
- sortedFlag=1;
- }
- if(index<0)
- {
- cerr << "WARNING: StatVar.cc\n";
- return 0.0;
- }
- else
+ if(sortedFlag==0) {
+ qsort(values.getPointerToData(),n,sizeof(double),doublecompare);
+ assert(n<=values.size());
+ sortedFlag=1;
+ }
+ if(index<0) {
+ cerr << "WARNING: StatVar.cc\n";
+ return 0.0;
+ } else
return values[index];
}
-
+
void StatVar::printValues(ostream &strm)
{
qsort(values.getPointerToData(),n,sizeof(double),doublecompare);
assert(n<=values.size());
- for(int i=0;i<n;i++)
+ for(int i=0; i<n; i++)
strm << i/(double)n << " " << values[i] << endl;
return;
}
diff --git a/mgizapp/src/mkcls/StatVar.h b/mgizapp/src/mkcls/StatVar.h
index edee026..2158bc2 100644
--- a/mgizapp/src/mkcls/StatVar.h
+++ b/mgizapp/src/mkcls/StatVar.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -38,95 +38,97 @@ extern double compareStatVarQuantil;
int compareStatVar(const void *p,const void *j);
class StatV
-
+
{
- protected:
- int n;
- double sum;
- double squareSum;
- double smallest,biggest;
-
- public:
- const char *title;
+protected:
+ int n;
+ double sum;
+ double squareSum;
+ double smallest,biggest;
+
+public:
+ const char *title;
StatV() : n(0),sum(0),squareSum(0),smallest(1e100),biggest(-1e100),title("") {}
virtual ~StatV();
-
-
- virtual void addValue(double a)
- {
- n++;
- sum+=a;
- squareSum+=a*a;
- if(smallest>a)
- smallest=a;
- if(biggest<a)
- biggest=a;
-
- }
-
-
- double getMean()
- { return sum/n; }
-
-
- double getSigma()
- {
- if(squareSum/n - getMean()*getMean()<=0)
- return 0.0;
- else
- return sqrt(squareSum/n - getMean()*getMean());
- }
-
-
-
- double getBiggest()
- { return biggest; }
-
-
- double getSmallest()
- { return smallest; }
-
-
- int getNum()
- { return n; }
-
+
+
+ virtual void addValue(double a) {
+ n++;
+ sum+=a;
+ squareSum+=a*a;
+ if(smallest>a)
+ smallest=a;
+ if(biggest<a)
+ biggest=a;
+
+ }
+
+
+ double getMean() {
+ return sum/n;
+ }
+
+
+ double getSigma() {
+ if(squareSum/n - getMean()*getMean()<=0)
+ return 0.0;
+ else
+ return sqrt(squareSum/n - getMean()*getMean());
+ }
+
+
+
+ double getBiggest() {
+ return biggest;
+ }
+
+
+ double getSmallest() {
+ return smallest;
+ }
+
+
+ int getNum() {
+ return n;
+ }
+
void dumpOn(ostream &strm);
-
+
};
class StatVar : public StatV
{
- private:
- Array<double> values;
- short sortedFlag;
- public:
- StatVar()
+private:
+ Array<double> values;
+ short sortedFlag;
+public:
+ StatVar()
: values(10,0.0,1),sortedFlag(0) {}
- virtual ~StatVar(){}
+ virtual ~StatVar() {}
double quantil(double percent=0.5);
-
-
- inline double value(int i)
- {return values[i];}
-
+
+
+ inline double value(int i) {
+ return values[i];
+ }
+
void printValues(ostream &strm);
-
-
- virtual void addValue(double a)
- {
- sortedFlag=0;
- values[n]=a;
- StatV::addValue(a);
- }
-
+
+
+ virtual void addValue(double a) {
+ sortedFlag=0;
+ values[n]=a;
+ StatV::addValue(a);
+ }
+
double getSigmaSmaller();
-
+
double getSigmaBigger();
-
+
};
diff --git a/mgizapp/src/mkcls/TAOptimization.cpp b/mgizapp/src/mkcls/TAOptimization.cpp
index 074ff62..71662e4 100644
--- a/mgizapp/src/mkcls/TAOptimization.cpp
+++ b/mgizapp/src/mkcls/TAOptimization.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -31,24 +31,24 @@ USA.
double TAOptimization::defaultAnnRate=0.4;
double TAOptimization::defaultMultiple=2.0;
-
-TAOptimization::TAOptimization(Problem &p,double t,double d,int m)
-: IterOptimization(p,m) , temperatur(t) , deltaTemperatur(d)
+
+TAOptimization::TAOptimization(Problem &p,double t,double d,int m)
+ : IterOptimization(p,m) , temperatur(t) , deltaTemperatur(d)
{
assert(t>0 && d>0);
}
-
+
TAOptimization::TAOptimization(Problem&p,int m)
-: IterOptimization(p,m), temperatur(-1)
+ : IterOptimization(p,m), temperatur(-1)
{
}
-
+
TAOptimization::TAOptimization(TAOptimization &o)
-: IterOptimization(o)
+ : IterOptimization(o)
{
temperatur= o.temperatur;
deltaTemperatur= o.deltaTemperatur;
@@ -56,58 +56,55 @@ TAOptimization::TAOptimization(TAOptimization &o)
-
+
void TAOptimization::zInitialize()
{
IterOptimization::zInitialize();
- if( temperatur<0)
- {
-
-
- int n;
-
- StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
-
- if(maxStep>0)
- n=(int)(maxStep*4.0/5.0);
- else
- maxStep=n=(int)(problem.expectedNumberOfIterations()*defaultMultiple);
-
- temperatur = v.quantil(defaultAnnRate);
- deltaTemperatur = temperatur/n;
-
- if( verboseMode>0 )
- cout << "#TA: (anfAnnRate="
- << defaultAnnRate << ",T=" << temperatur << ",deltaT="
- << deltaTemperatur << ")\n";
- curStep=0;
- endFlag=0;
- delete &v;
- }
+ if( temperatur<0) {
+
+
+ int n;
+
+ StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
+
+ if(maxStep>0)
+ n=(int)(maxStep*4.0/5.0);
+ else
+ maxStep=n=(int)(problem.expectedNumberOfIterations()*defaultMultiple);
+
+ temperatur = v.quantil(defaultAnnRate);
+ deltaTemperatur = temperatur/n;
+
+ if( verboseMode>0 )
+ cout << "#TA: (anfAnnRate="
+ << defaultAnnRate << ",T=" << temperatur << ",deltaT="
+ << deltaTemperatur << ")\n";
+ curStep=0;
+ endFlag=0;
+ delete &v;
+ }
}
short TAOptimization::end()
{
-
-
- if( temperatur>0 )
- {
- endFlag=0;
- bestStep=curStep;
- }
+
+
+ if( temperatur>0 ) {
+ endFlag=0;
+ bestStep=curStep;
+ }
return endFlag>0;
}
short TAOptimization::accept(double delta)
{
- if( temperatur<0 )
+ if( temperatur<0 )
+ return 1;
+ else if( delta < temperatur )
return 1;
else
- if( delta < temperatur )
- return 1;
- else
- return 0;
+ return 0;
}
void TAOptimization::abkuehlen()
@@ -124,84 +121,75 @@ void TAOptimization::makeGraphOutput()
-
+
double TAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
- int optimierungsschritte,int print)
+ int optimierungsschritte,int print)
{
- switch(typ)
- {
- case 1:
- {
- double bestPar=-1,best=1e100;
- if(print)cout << "#TA-optimizeValues: " << numParameter << endl;
- for(int i=0;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- defaultAnnRate = (float)(i)/numParameter;
- solveProblem(0,p,proParameter,optimierungsschritte,TA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAnnRate;
- }
- if( print)
- {
- cout << defaultAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit Bester"
- " Sigma SigmaSmaller SigmaBigger\n";
- defaultAnnRate=0.5;
- return bestPar;
+ switch(typ) {
+ case 1: {
+ double bestPar=-1,best=1e100;
+ if(print)cout << "#TA-optimizeValues: " << numParameter << endl;
+ for(int i=0; i<=numParameter; i++) {
+ StatVar end,laufzeit,init;
+ double now;
+ defaultAnnRate = (float)(i)/numParameter;
+ solveProblem(0,p,proParameter,optimierungsschritte,TA_OPT,now,end,
+ laufzeit,init);
+ if( best>now ) {
+ best=now;
+ bestPar=defaultAnnRate;
+ }
+ if( print) {
+ cout << defaultAnnRate << " ";
+ cout << end.getMean() << " " << end.quantil(0.2) << " "
+ << end.quantil(0.79) << " " << laufzeit.getMean() << " "
+ << end.quantil(0.0) << " " << end.getSigma() << " "
+ << end.getSigmaSmaller() << " " << end.getSigmaBigger()
+ << " " << now << endl;
+ }
+ }
+ if( print )
+ cout << "#Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit Bester"
+ " Sigma SigmaSmaller SigmaBigger\n";
+ defaultAnnRate=0.5;
+ return bestPar;
+ }
+ break;
+ case 10: {
+ double bestPar=-1,best=1e100;
+ if( print )
+ cout << "#TA-optimizeValues: defaultMultiple " << 10 << endl;
+ for(int i=1; i<=6; i++) {
+ StatVar end,laufzeit,init;
+ double now;
+ defaultMultiple = i;
+ solveProblem(0,p,proParameter,optimierungsschritte,TA_OPT,now,
+ end,laufzeit,init);
+ if( best>now ) {
+ best=now;
+ bestPar=defaultMultiple;
}
- break;
- case 10:
- {
- double bestPar=-1,best=1e100;
- if( print )
- cout << "#TA-optimizeValues: defaultMultiple " << 10 << endl;
- for(int i=1;i<=6;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- defaultMultiple = i;
- solveProblem(0,p,proParameter,optimierungsschritte,TA_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultMultiple;
- }
- if( print )
- {
- cout << defaultMultiple << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit Bester Sigma "
- " SigmaSmaller SigmaBigger\n";
- defaultMultiple=2.0;
- return bestPar;
+ if( print ) {
+ cout << defaultMultiple << " ";
+ cout << end.getMean() << " " << end.quantil(0.2) << " "
+ << end.quantil(0.79) << " " << laufzeit.getMean() << " "
+ << end.quantil(0.0) << " " << end.getSigma() << " "
+ << end.getSigmaSmaller() << " " << end.getSigmaBigger()
+ << " " << now << endl;
}
- break;
- default:
- cerr << "Error: wrong parameter-type in TAOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
}
+ if( print )
+ cout << "#Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit Bester Sigma "
+ " SigmaSmaller SigmaBigger\n";
+ defaultMultiple=2.0;
+ return bestPar;
+ }
+ break;
+ default:
+ cerr << "Error: wrong parameter-type in TAOptimization::optimizeValue ("
+ << typ << ")\n";
+ exit(1);
+ }
return 1e100;
}
diff --git a/mgizapp/src/mkcls/TAOptimization.h b/mgizapp/src/mkcls/TAOptimization.h
index 3382306..8f80534 100644
--- a/mgizapp/src/mkcls/TAOptimization.h
+++ b/mgizapp/src/mkcls/TAOptimization.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,47 +32,48 @@ USA.
#include "IterOptimization.h"
-class TAOptimization : public IterOptimization {
-
+class TAOptimization : public IterOptimization
+{
- private:
- double temperatur;
- double deltaTemperatur;
- protected:
- virtual void zInitialize();
-
+private:
+ double temperatur;
+ double deltaTemperatur;
- virtual short accept(double delta);
-
+protected:
+ virtual void zInitialize();
- virtual void abkuehlen();
-
- virtual short end();
-
+ virtual short accept(double delta);
- virtual void makeGraphOutput();
-
- public:
- TAOptimization(Problem &p,double temperatur,
- double deltaTemperatur,int maxIter=-1);
-
+ virtual void abkuehlen();
- TAOptimization(Problem &p,int maxIter=-1);
-
- TAOptimization(TAOptimization &o);
-
+ virtual short end();
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,int schritte= -1,int verbose=1);
-
- static double defaultAnnRate;
-
- static double defaultMultiple;
-
+ virtual void makeGraphOutput();
+
+
+public:
+ TAOptimization(Problem &p,double temperatur,
+ double deltaTemperatur,int maxIter=-1);
+
+
+ TAOptimization(Problem &p,int maxIter=-1);
+
+
+ TAOptimization(TAOptimization &o);
+
+
+ static double optimizeValue(Problem &p,int proParameter,
+ int numParameter,int typ,int schritte= -1,int verbose=1);
+
+
+ static double defaultAnnRate;
+
+ static double defaultMultiple;
+
};
#endif
diff --git a/mgizapp/src/mkcls/general.cpp b/mgizapp/src/mkcls/general.cpp
index 1d11e69..927142e 100644
--- a/mgizapp/src/mkcls/general.cpp
+++ b/mgizapp/src/mkcls/general.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -38,14 +38,14 @@ extern "C" {
#define srand48 srand
#define drand48() (rand()/RAND_MAX)
#endif
-
+
}
#include "general.h"
extern "C" {
#ifndef __linux__
-int getrusage(int who, struct rusage *rusage);
+ int getrusage(int who, struct rusage *rusage);
#endif
};
int verboseMode=0;
@@ -57,18 +57,18 @@ int verboseMode=0;
void myerror(int line,const char *file,const char *expression)
{
- cerr << "(general.h):Assertion failed: '" << expression << "' ::: b "
- << file << ":" << line << endl;
+ cerr << "(general.h):Assertion failed: '" << expression << "' ::: b "
+ << file << ":" << line << endl;
}
void imyerror(int line,const char *file,const char *expression)
{
- cerr << "Error: '" << expression << "' ::: in Source " << file
- << ":" << line << endl;
- #ifndef DEBUG
-
- #endif
+ cerr << "Error: '" << expression << "' ::: in Source " << file
+ << ":" << line << endl;
+#ifndef DEBUG
+
+#endif
}
@@ -115,7 +115,7 @@ int randomInt(int exclusive)
double clockSec()
{
#ifdef WIN32
- return 0;
+ return 0;
#else
#ifdef linux
enum __rusage_who who=RUSAGE_SELF;
@@ -125,5 +125,5 @@ double clockSec()
struct rusage rusage;
getrusage(who, &rusage);
return rusage.ru_utime.tv_sec+rusage.ru_utime.tv_usec/1000000.0;
-#endif
+#endif
}
diff --git a/mgizapp/src/mkcls/general.h b/mgizapp/src/mkcls/general.h
index 8631002..aaed9e1 100644
--- a/mgizapp/src/mkcls/general.h
+++ b/mgizapp/src/mkcls/general.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -45,19 +45,19 @@ USA.
-#define TEST_RANDOM_SEED 532567487
+#define TEST_RANDOM_SEED 532567487
-double zufall01();
-
+double zufall01();
-double zufall(double min,double max);
-
-int randomInt(int exclusive);
-
+double zufall(double min,double max);
+
+
+int randomInt(int exclusive);
+
+
+void zufallSeed(int z =TEST_RANDOM_SEED);
-void zufallSeed(int z =TEST_RANDOM_SEED);
-
@@ -70,7 +70,7 @@ void zufallSeed(int z =TEST_RANDOM_SEED);
-double clockSec();
+double clockSec();
extern int verboseMode;
diff --git a/mgizapp/src/mkcls/mkcls.cpp b/mgizapp/src/mkcls/mkcls.cpp
index 3a950cf..82708da 100644
--- a/mgizapp/src/mkcls/mkcls.cpp
+++ b/mgizapp/src/mkcls/mkcls.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -50,49 +50,49 @@ char *hapaxInitName=0;
-
-static int nLaeufe=1,nLaeufeReduce=3;
-
+static int nLaeufe=1,nLaeufeReduce=3;
+
+
static int optimizeParameterAnzahl=10;
-
+
static int IterOptVerf=TA_OPT;
-
+
static int MaxIterOptSteps= -1;
-
+
static int MaxSecs=0;
-
+
static int InitValue=INIT_RAN;
-
+
static int Criterion=CRITERION_ML;
-
+
static int Wwahl=W_DET_DECR;
-
+
static int Kwahl=K_BEST;
-
+
static int NumberCategories=100;
-
+
static int MinWordFrequency=0;
-
+
static int IterOptSet=0;
-
+
static KategProblem *p = 0;
-
+
char korpusName[1024]="train";
int korpusIsText=1;
@@ -101,36 +101,36 @@ char *FileForOther=0;
void printUsage(int r)
{
- cout <<
- "mkcls - a program for making word classes: Usage: \n"
- " mkcls [-nnum] [-ptrain] [-Vfile] opt\n"
-
-
-
-
-
-
- "-V output classes (Default: no file)\n"
-
-
- "-n number of optimization runs (Default: 1); larger number => better results\n"
-
- "-p filename of training corpus (Default: 'train')\n"
-
-
-
-
-
-
-
-
- "Example:\n"
- " mkcls -c80 -n10 -pin -Vout opt\n"
- " (generates 80 classes for the corpus 'in' and writes the classes in 'out')\n"
- "Literature: \n"
- " Franz Josef Och: »Maximum-Likelihood-Schätzung von Wortkategorien mit Verfahren\n"
- " der kombinatorischen Optimierung?Studienarbeit, Universität Erlangen-Nürnberg,\n"
- " Germany,1995. \n";
+ cout <<
+ "mkcls - a program for making word classes: Usage: \n"
+ " mkcls [-nnum] [-ptrain] [-Vfile] opt\n"
+
+
+
+
+
+
+ "-V output classes (Default: no file)\n"
+
+
+ "-n number of optimization runs (Default: 1); larger number => better results\n"
+
+ "-p filename of training corpus (Default: 'train')\n"
+
+
+
+
+
+
+
+
+ "Example:\n"
+ " mkcls -c80 -n10 -pin -Vout opt\n"
+ " (generates 80 classes for the corpus 'in' and writes the classes in 'out')\n"
+ "Literature: \n"
+ " Franz Josef Och: »Maximum-Likelihood-Schätzung von Wortkategorien mit Verfahren\n"
+ " der kombinatorischen Optimierung?Studienarbeit, Universität Erlangen-Nürnberg,\n"
+ " Germany,1995. \n";
exit(r);
}
@@ -147,7 +147,7 @@ void makeIterOpt()
double mean;
StatVar end,laufzeit,init;
solveProblem(1+(PrintBestTo!=0),*p,nLaeufe,MaxIterOptSteps,IterOptVerf,
- mean,end,laufzeit,init,maxTime);
+ mean,end,laufzeit,init,maxTime);
if( verboseMode>1 )
p->dumpOn(cout);
}
@@ -167,96 +167,80 @@ int makeMetaOpt(int argc,char **argv)
{
int ret=0;
- if(argc==4 || argc==3)
- {
- int typ=0;
- if( argc==4 )
- {
- sscanf(argv[3],"%d",&typ);
- assert(typ>0 && typ<=11 );
- }
- if( isdigit(argv[2][0]) )
- {
- int a;
- sscanf(argv[2],"%d",&a);
- switch(a)
- {
- case 1:
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- case 2:
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,2);
- break;
- case 3:
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,10);
- break;
- case 4:
- TAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- case 5:
- TAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,10);
- break;
- case 6:
- RRTOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- case 7:
- RRTOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,10);
- break;
- case 8:
- GDAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- default:
- cerr << "Error: Wrong number of parameter (" << argv[2]
- << ").\n";
- printUsage(1);
- }
- }
- else
- {
- if(strcasecmp(argv[2],"gda")==0)
- {
- GDAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
- else if(strcasecmp(argv[2],"ta")==0)
- {
- TAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
- else if(strcasecmp(argv[2],"rrt")==0)
- {
- RRTOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
- else if(strcasecmp(argv[2],"sa")==0)
- {
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
-
-
-
-
- else
- {
- cerr << "Error: unknown algorithm" << argv[2] << endl;
- printUsage(1);
- }
- }
+ if(argc==4 || argc==3) {
+ int typ=0;
+ if( argc==4 ) {
+ sscanf(argv[3],"%d",&typ);
+ assert(typ>0 && typ<=11 );
}
- else
- {
- cerr << "Error: wrong number of arguments: " << argc << endl;
- printUsage(1);
+ if( isdigit(argv[2][0]) ) {
+ int a;
+ sscanf(argv[2],"%d",&a);
+ switch(a) {
+ case 1:
+ SAOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,1);
+ break;
+ case 2:
+ SAOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,2);
+ break;
+ case 3:
+ SAOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,10);
+ break;
+ case 4:
+ TAOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,1);
+ break;
+ case 5:
+ TAOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,10);
+ break;
+ case 6:
+ RRTOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,1);
+ break;
+ case 7:
+ RRTOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,10);
+ break;
+ case 8:
+ GDAOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,1);
+ break;
+ default:
+ cerr << "Error: Wrong number of parameter (" << argv[2]
+ << ").\n";
+ printUsage(1);
+ }
+ } else {
+ if(strcasecmp(argv[2],"gda")==0) {
+ GDAOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,typ);
+ } else if(strcasecmp(argv[2],"ta")==0) {
+ TAOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,typ);
+ } else if(strcasecmp(argv[2],"rrt")==0) {
+ RRTOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,typ);
+ } else if(strcasecmp(argv[2],"sa")==0) {
+ SAOptimization::optimizeValue(*p,nLaeufe,
+ optimizeParameterAnzahl,typ);
+ }
+
+
+
+
+ else {
+ cerr << "Error: unknown algorithm" << argv[2] << endl;
+ printUsage(1);
+ }
}
+ } else {
+ cerr << "Error: wrong number of arguments: " << argc << endl;
+ printUsage(1);
+ }
return ret;
}
@@ -281,11 +265,10 @@ void setVerfahren(char *p)
IterOptVerf=SA_OPT;
else if(strcasecmp(p,"hc")==0)
IterOptVerf=HC_OPT;
- else
- {
- cerr << "Error: Unknown iterativ-optimizing algorithm '" << p << "'.\n";
- printUsage(1);
- }
+ else {
+ cerr << "Error: Unknown iterativ-optimizing algorithm '" << p << "'.\n";
+ printUsage(1);
+ }
}
@@ -300,16 +283,13 @@ void setInitValue(char *iv,char *fileForOther)
InitValue=INIT_LWRW;
else if(strcasecmp(iv,"freq")==0)
InitValue=INIT_FREQ;
- else if(strcasecmp(iv,"other")==0)
- {
- InitValue=INIT_OTHER;
- FileForOther=strdup(fileForOther);
- }
- else
- {
- cerr << "Error: Unknown initialization '" << p << "'.\n";;
- printUsage(1);
- }
+ else if(strcasecmp(iv,"other")==0) {
+ InitValue=INIT_OTHER;
+ FileForOther=strdup(fileForOther);
+ } else {
+ cerr << "Error: Unknown initialization '" << p << "'.\n";;
+ printUsage(1);
+ }
}
@@ -321,11 +301,10 @@ void setWwahl(const char *ww)
Wwahl=W_DET_DECR;
else if(strcasecmp(ww,"incr")==0)
Wwahl=W_DET_INCR;
- else
- {
- cerr << "Error: Unknown word-selection '" << ww << "'.\n";;
- printUsage(1);
- }
+ else {
+ cerr << "Error: Unknown word-selection '" << ww << "'.\n";;
+ printUsage(1);
+ }
}
@@ -337,11 +316,10 @@ void setKwahl(const char *kw)
Kwahl=K_RAN;
else if(strcasecmp(kw,"best")==0)
Kwahl=K_BEST;
- else
- {
- cerr << "Error: Unknown category-selection '" << kw << "'.\n";
- printUsage(1);
- }
+ else {
+ cerr << "Error: Unknown category-selection '" << kw << "'.\n";
+ printUsage(1);
+ }
}
@@ -352,65 +330,64 @@ void setParameter(const char *nr1,const char *nr2)
sscanf(nr1,"%d",&n1);
sscanf(nr2,"%f",&n2);
IterOptSet=1;
- switch(n1)
- {
- case 1:
- SAOptimization::defaultAnfAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_0 (SA) set to "
- << SAOptimization::defaultAnfAnnRate << endl;
- iassert(0<=SAOptimization::defaultAnfAnnRate&&
- SAOptimization::defaultAnfAnnRate<=1);
- break;
- case 2:
- SAOptimization::defaultEndAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_e (SA) set to "
- << SAOptimization::defaultEndAnnRate << endl;
- iassert(0<=SAOptimization::defaultEndAnnRate
- &&SAOptimization::defaultEndAnnRate<=1);
- break;
- case 3:
- SAOptimization::defaultMultiple=n2;
- if(verboseMode)cout << "Parameter nu_e (SA) set to "
- << SAOptimization::defaultMultiple << endl;
- iassert( SAOptimization::defaultMultiple>0 );
- break;
- case 4:
- TAOptimization::defaultAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_{TA} set to "
- << TAOptimization::defaultAnnRate << endl;
- iassert(0<=TAOptimization::defaultAnnRate
- &&TAOptimization::defaultAnnRate<=1);
- break;
- case 5:
- TAOptimization::defaultMultiple=n2;
- if(verboseMode)cout << "Parameter nu_{TA} set to "
- << TAOptimization::defaultMultiple << endl;
- iassert( TAOptimization::defaultMultiple>0 );
- break;
- case 6:
- RRTOptimization::defaultAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_{RRT} set to "
- << RRTOptimization::defaultAnnRate << endl;
- iassert(0<=RRTOptimization::defaultAnnRate
- && RRTOptimization::defaultAnnRate<=1);
- break;
- case 7:
- RRTOptimization::defaultMultiple=n2;
- if(verboseMode)cout << "Parameter nu_{RRT} set to "
- << RRTOptimization::defaultMultiple << endl;
- iassert( RRTOptimization::defaultMultiple>0 );
- break;
- case 8:
- GDAOptimization::defaultAlpha=n2;
- if(verboseMode)cout << "Parameter alpha set to "
- << GDAOptimization::defaultAlpha << endl;
- iassert(0<=GDAOptimization::defaultAlpha
- && GDAOptimization::defaultAlpha<1 );
- break;
- default:
- cerr << "Error: Wrong parameter number " << nr1 << " " << n1 << endl;
- printUsage(1);
- }
+ switch(n1) {
+ case 1:
+ SAOptimization::defaultAnfAnnRate=n2;
+ if(verboseMode)cout << "Parameter gamma_0 (SA) set to "
+ << SAOptimization::defaultAnfAnnRate << endl;
+ iassert(0<=SAOptimization::defaultAnfAnnRate&&
+ SAOptimization::defaultAnfAnnRate<=1);
+ break;
+ case 2:
+ SAOptimization::defaultEndAnnRate=n2;
+ if(verboseMode)cout << "Parameter gamma_e (SA) set to "
+ << SAOptimization::defaultEndAnnRate << endl;
+ iassert(0<=SAOptimization::defaultEndAnnRate
+ &&SAOptimization::defaultEndAnnRate<=1);
+ break;
+ case 3:
+ SAOptimization::defaultMultiple=n2;
+ if(verboseMode)cout << "Parameter nu_e (SA) set to "
+ << SAOptimization::defaultMultiple << endl;
+ iassert( SAOptimization::defaultMultiple>0 );
+ break;
+ case 4:
+ TAOptimization::defaultAnnRate=n2;
+ if(verboseMode)cout << "Parameter gamma_{TA} set to "
+ << TAOptimization::defaultAnnRate << endl;
+ iassert(0<=TAOptimization::defaultAnnRate
+ &&TAOptimization::defaultAnnRate<=1);
+ break;
+ case 5:
+ TAOptimization::defaultMultiple=n2;
+ if(verboseMode)cout << "Parameter nu_{TA} set to "
+ << TAOptimization::defaultMultiple << endl;
+ iassert( TAOptimization::defaultMultiple>0 );
+ break;
+ case 6:
+ RRTOptimization::defaultAnnRate=n2;
+ if(verboseMode)cout << "Parameter gamma_{RRT} set to "
+ << RRTOptimization::defaultAnnRate << endl;
+ iassert(0<=RRTOptimization::defaultAnnRate
+ && RRTOptimization::defaultAnnRate<=1);
+ break;
+ case 7:
+ RRTOptimization::defaultMultiple=n2;
+ if(verboseMode)cout << "Parameter nu_{RRT} set to "
+ << RRTOptimization::defaultMultiple << endl;
+ iassert( RRTOptimization::defaultMultiple>0 );
+ break;
+ case 8:
+ GDAOptimization::defaultAlpha=n2;
+ if(verboseMode)cout << "Parameter alpha set to "
+ << GDAOptimization::defaultAlpha << endl;
+ iassert(0<=GDAOptimization::defaultAlpha
+ && GDAOptimization::defaultAlpha<1 );
+ break;
+ default:
+ cerr << "Error: Wrong parameter number " << nr1 << " " << n1 << endl;
+ printUsage(1);
+ }
}
@@ -427,28 +404,23 @@ void setHapaxInitName(const char *s)
void setKorpus()
{
- if( korpusIsText )
- {
- if( (p=fromKModel(korpusName,NumberCategories,InitValue,Criterion,Wwahl|Kwahl,
- MinWordFrequency))==0)
- {
- cerr << "Error: Could not read the file '" << korpusName << "'.\n";
- printUsage(1);
- }
+ if( korpusIsText ) {
+ if( (p=fromKModel(korpusName,NumberCategories,InitValue,Criterion,Wwahl|Kwahl,
+ MinWordFrequency))==0) {
+ cerr << "Error: Could not read the file '" << korpusName << "'.\n";
+ printUsage(1);
}
- else
- {
- if( (p=fromNgrFile(korpusName,NumberCategories,InitValue,Criterion,Wwahl|Kwahl,
- MinWordFrequency))==0)
- {
- cerr << "Error: Could not read the file '" << korpusName << "'.\n";
- printUsage(1);
- }
- p->wordFreq.initializeIndex(*(p->words),'1',2,1+NumberCategories/2,!OneWithHapas);
- p->wordFreq.initializeIndex(*(p->words),'2',2+NumberCategories/2,1+NumberCategories,OneWithHapas);
+ } else {
+ if( (p=fromNgrFile(korpusName,NumberCategories,InitValue,Criterion,Wwahl|Kwahl,
+ MinWordFrequency))==0) {
+ cerr << "Error: Could not read the file '" << korpusName << "'.\n";
+ printUsage(1);
}
- if( IterOptSet==0 )
- KategProblemSetParameters(*p);
+ p->wordFreq.initializeIndex(*(p->words),'1',2,1+NumberCategories/2,!OneWithHapas);
+ p->wordFreq.initializeIndex(*(p->words),'2',2+NumberCategories/2,1+NumberCategories,OneWithHapas);
+ }
+ if( IterOptSet==0 )
+ KategProblemSetParameters(*p);
}
@@ -460,142 +432,133 @@ int main(int argc,char **argv)
{
double startTime=clockSec();
zufallSeed();
- while( argc>1 && argv[1][0]=='-' )
- {
-
- switch(argv[1][1])
- {
- case 'v':
- sscanf(argv[1]+2,"%d",&verboseMode);
- iassert(verboseMode>=0);
- break;
- case 'O':
- sscanf(argv[1]+2,"%d",&OneWithHapas);
- cout << "OneWithHapas: " << OneWithHapas << endl;
- break;
- case 'n':
- sscanf(argv[1]+2,"%d",&nLaeufe);
- nLaeufeReduce=nLaeufe;
- iassert( nLaeufe>=1 );
- break;
- case 'l':
- Criterion=1;
- if( argv[1][2] )
- {
- sscanf(argv[1]+2,"%lf",&rhoLo);
- if( verboseMode )
- cout << "Parameter rho (for LO) set to" << rhoLo << ".\n";
- iassert(0<=rhoLo && rhoLo<=1);
- }
- if( verboseMode )
- cout << "Criterion LO used.\n";
- break;
- case 'y':
- Criterion=2;
- if( argv[1][2] )
- {
- sscanf(argv[1]+2,"%lf",&SigmaVerfaelschung);
- if( verboseMode )
- cout << "Parameter rho (for LO) set to" << SigmaVerfaelschung << ".\n";
- iassert(0<SigmaVerfaelschung);
- }
- if( verboseMode )
- cout << "My special criterion used.\n";
- break;
- case 'p':
- setKorpusName(argv[1]+2);
- assert(argv[2]&&argv[2][0]!='-' || argv[2][0]!='i');
- break;
- case 'P':
- setKorpusName(argv[1]+2);
- korpusIsText=0;
- assert(argv[2]&&argv[2][0]!='-' || argv[2][0]!='i');
- break;
- case 'i':
- setInitValue(argv[1]+2,argv[2]);
- if( InitValue==INIT_OTHER )
- argv++,argc--;
- break;
- case 'h':
- setHapaxInitName(argv[1]+2);
- break;
- case 'k':
- setKwahl(argv[1]+2);
- break;
- case 'w':
- setWwahl(argv[1]+2);
- break;
- case 'c':
- sscanf(argv[1]+2,"%d",&NumberCategories);
- iassert(NumberCategories>=2);
- break;
- case 'm':
- sscanf(argv[1]+2,"%d",&MinWordFrequency);
- break;
- case 'e':
- setParameter(argv[1]+2,argv[2]);
- argv++,argc--;
- break;
- case 'a':
- setVerfahren(argv[1]+2);
- break;
- case 'r':
- {
- int s;
- sscanf(argv[1]+2,"%d",&s);
- zufallSeed(s);
- }
- break;
- case 'V':
- if(argv[1][2])
- {
- char str[1024];
- strcpy(str,argv[1]+2);
- PrintBestTo=new ofstream(str);
- strcat(str,".cats");
- PrintBestTo2=new ofstream(str);
- }
- else
- cout << "AUSGABE auf cout\n";
- break;
- case 'M':
- sscanf(argv[1]+2,"%d",&MaxIterOptSteps);
- break;
- case 's':
- sscanf(argv[1]+2,"%d",&MaxSecs);
- break;
- case 'N':
- sscanf(argv[1]+2,"%d",&optimizeParameterAnzahl);
- break;
- case 'o':
- GraphOutput = new ofstream(argv[1]+2);
- if( GraphOutput==0 )
- cerr << "Warning: Open failed for file '" << argv[1]+2 << "'.\n";
- break;
- default:
- cerr << "Fehlerhafte Option: " << argv[1] << endl;
- printUsage(1);
- }
- argv++;
- argc--;
- }
+ while( argc>1 && argv[1][0]=='-' ) {
-
- setKorpus();
- if( FileForOther )
- {
- fromCatFile(p,FileForOther);
- p->initialisierung=InitValue;
- p->_initialize(InitValue);
+ switch(argv[1][1]) {
+ case 'v':
+ sscanf(argv[1]+2,"%d",&verboseMode);
+ iassert(verboseMode>=0);
+ break;
+ case 'O':
+ sscanf(argv[1]+2,"%d",&OneWithHapas);
+ cout << "OneWithHapas: " << OneWithHapas << endl;
+ break;
+ case 'n':
+ sscanf(argv[1]+2,"%d",&nLaeufe);
+ nLaeufeReduce=nLaeufe;
+ iassert( nLaeufe>=1 );
+ break;
+ case 'l':
+ Criterion=1;
+ if( argv[1][2] ) {
+ sscanf(argv[1]+2,"%lf",&rhoLo);
+ if( verboseMode )
+ cout << "Parameter rho (for LO) set to" << rhoLo << ".\n";
+ iassert(0<=rhoLo && rhoLo<=1);
+ }
+ if( verboseMode )
+ cout << "Criterion LO used.\n";
+ break;
+ case 'y':
+ Criterion=2;
+ if( argv[1][2] ) {
+ sscanf(argv[1]+2,"%lf",&SigmaVerfaelschung);
+ if( verboseMode )
+ cout << "Parameter rho (for LO) set to" << SigmaVerfaelschung << ".\n";
+ iassert(0<SigmaVerfaelschung);
+ }
+ if( verboseMode )
+ cout << "My special criterion used.\n";
+ break;
+ case 'p':
+ setKorpusName(argv[1]+2);
+ assert(argv[2]&&argv[2][0]!='-' || argv[2][0]!='i');
+ break;
+ case 'P':
+ setKorpusName(argv[1]+2);
+ korpusIsText=0;
+ assert(argv[2]&&argv[2][0]!='-' || argv[2][0]!='i');
+ break;
+ case 'i':
+ setInitValue(argv[1]+2,argv[2]);
+ if( InitValue==INIT_OTHER )
+ argv++,argc--;
+ break;
+ case 'h':
+ setHapaxInitName(argv[1]+2);
+ break;
+ case 'k':
+ setKwahl(argv[1]+2);
+ break;
+ case 'w':
+ setWwahl(argv[1]+2);
+ break;
+ case 'c':
+ sscanf(argv[1]+2,"%d",&NumberCategories);
+ iassert(NumberCategories>=2);
+ break;
+ case 'm':
+ sscanf(argv[1]+2,"%d",&MinWordFrequency);
+ break;
+ case 'e':
+ setParameter(argv[1]+2,argv[2]);
+ argv++,argc--;
+ break;
+ case 'a':
+ setVerfahren(argv[1]+2);
+ break;
+ case 'r': {
+ int s;
+ sscanf(argv[1]+2,"%d",&s);
+ zufallSeed(s);
}
-
- if( hapaxInitName )
- {
- fromCatFile(p,hapaxInitName,0);
- p->fixInitLike();
+ break;
+ case 'V':
+ if(argv[1][2]) {
+ char str[1024];
+ strcpy(str,argv[1]+2);
+ PrintBestTo=new ofstream(str);
+ strcat(str,".cats");
+ PrintBestTo2=new ofstream(str);
+ } else
+ cout << "AUSGABE auf cout\n";
+ break;
+ case 'M':
+ sscanf(argv[1]+2,"%d",&MaxIterOptSteps);
+ break;
+ case 's':
+ sscanf(argv[1]+2,"%d",&MaxSecs);
+ break;
+ case 'N':
+ sscanf(argv[1]+2,"%d",&optimizeParameterAnzahl);
+ break;
+ case 'o':
+ GraphOutput = new ofstream(argv[1]+2);
+ if( GraphOutput==0 )
+ cerr << "Warning: Open failed for file '" << argv[1]+2 << "'.\n";
+ break;
+ default:
+ cerr << "Fehlerhafte Option: " << argv[1] << endl;
+ printUsage(1);
}
+ argv++;
+ argc--;
+ }
+
+
+ setKorpus();
+ if( FileForOther ) {
+ fromCatFile(p,FileForOther);
+ p->initialisierung=InitValue;
+ p->_initialize(InitValue);
+ }
+
+ if( hapaxInitName ) {
+ fromCatFile(p,hapaxInitName,0);
+ p->fixInitLike();
+ }
- double start2Time=clockSec();
+ double start2Time=clockSec();
if(argc>=2 && strcasecmp(argv[1],"opt")==0 )
makeIterOpt();
@@ -603,18 +566,16 @@ int main(int argc,char **argv)
makeMetaOpt(argc,argv);
else if(argc>=2 && strcasecmp(argv[1],"izr-opt")==0)
makeIzrOpt();
-
-
- else
- {
- makeIterOpt();
- }
- if( verboseMode )
- {
- cout << " full-time: " << clockSec()-startTime << endl;
- cout << "optimize-time: " << clockSec()-start2Time << endl;
- }
+
+ else {
+ makeIterOpt();
+ }
+
+ if( verboseMode ) {
+ cout << " full-time: " << clockSec()-startTime << endl;
+ cout << "optimize-time: " << clockSec()-start2Time << endl;
+ }
return 0;
}
diff --git a/mgizapp/src/mkcls/my.h b/mgizapp/src/mkcls/my.h
index ba06657..695ea0f 100644
--- a/mgizapp/src/mkcls/my.h
+++ b/mgizapp/src/mkcls/my.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
diff --git a/mgizapp/src/mkcls/myassert.h b/mgizapp/src/mkcls/myassert.h
index 0276ba3..1761f28 100644
--- a/mgizapp/src/mkcls/myassert.h
+++ b/mgizapp/src/mkcls/myassert.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
diff --git a/mgizapp/src/mkcls/myleda.h b/mgizapp/src/mkcls/myleda.h
index 7e3879a..4580eeb 100644
--- a/mgizapp/src/mkcls/myleda.h
+++ b/mgizapp/src/mkcls/myleda.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -57,10 +57,12 @@ template<class T>
class leda_set : public set<T>
{
public:
- bool member(const T&m) const
- { return this->count(m)!=0; }
- void del(const T&m)
- { this->erase(m); }
+ bool member(const T&m) const {
+ return this->count(m)!=0;
+ }
+ void del(const T&m) {
+ this->erase(m);
+ }
};
#define forall_set(a,b,c) for(a::iterator __i__=c.begin();__i__!=c.end()&&((b=*__i__),1);++__i__)
template<class T>
@@ -71,8 +73,8 @@ leda_set<T> operator&(const leda_set<T>&a,const leda_set<T>&b)
#ifdef WIN32
std::list<T> lst;
set_intersection(a.begin(),a.end(),b.begin(),b.end(),lst.begin());
- for(typename std::list<T>::iterator it = lst.begin() ;it!=lst.end();it++){
- c.insert(*it);
+ for(typename std::list<T>::iterator it = lst.begin() ; it!=lst.end(); it++) {
+ c.insert(*it);
}
#else
insert_iterator<set<T> > iter(c,c.begin());
@@ -85,13 +87,13 @@ leda_set<T> operator-(const leda_set<T>&a,const leda_set<T>&b)
{
leda_set<T>c;
-
-
+
+
#ifdef WIN32
std::list<T> lst;
set_difference(a.begin(),a.end(),b.begin(),b.end(),lst.begin());
- for(typename std::list<T>::iterator it = lst.begin() ;it!=lst.end();it++){
- c.insert(*it);
+ for(typename std::list<T>::iterator it = lst.begin() ; it!=lst.end(); it++) {
+ c.insert(*it);
}
#else
insert_iterator<set<T> > iter(c,c.begin());
@@ -115,28 +117,26 @@ class leda_d_array : public map<A,B>
private:
B init;
public:
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename map<A,B>::const_iterator pos=find(a);
- iassert(pos!=this->end());
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename map<A,B>::iterator pos=find(a);
- if( pos==this->end() )
- {
- insert(map<A,B>::value_type(a,init));
- pos=find(a);
- iassert(pos!=this->end());
- }
+ bool defined(const A&a) const {
+ return find(a)!=this->end();
+ }
+ const B&operator[](const A&a)const {
+ typename map<A,B>::const_iterator pos=find(a);
+ iassert(pos!=this->end());
+ if( pos==this->end() )
+ return init;
+ else
return pos->second;
+ }
+ B&operator[](const A&a) {
+ typename map<A,B>::iterator pos=find(a);
+ if( pos==this->end() ) {
+ insert(map<A,B>::value_type(a,init));
+ pos=find(a);
+ iassert(pos!=this->end());
}
+ return pos->second;
+ }
};
#define forall_defined_d(a,b,c,d) for(typename leda_d_array<a,b>::const_iterator __ii__=(d).begin();__ii__!=(d).end()&&((c=__ii__->first),1) ;++__ii__)
@@ -162,31 +162,38 @@ template<class T ,class _Pr = less<T> >
class my_hash
{
public:
- int operator()(const T&t)const {return Hash(t);}
+ int operator()(const T&t)const {
+ return Hash(t);
+ }
#ifdef WIN32
- enum
- { // parameters for hash table
- bucket_size = 1 // 0 < bucket_size
+ enum {
+ // parameters for hash table
+ bucket_size = 1 // 0 < bucket_size
};
my_hash()
- : comp()
- { // construct with default comparator
+ : comp() {
+ // construct with default comparator
}
my_hash(_Pr _Pred)
- : comp(_Pred)
- { // construct with _Pred comparator
+ : comp(_Pred) {
+ // construct with _Pred comparator
}
protected:
_Pr comp;
public:
- int operator()(const T&t , const T&t1)const {return comp(t,t1);}
+ int operator()(const T&t , const T&t1)const {
+ return comp(t,t1);
+ }
#endif
};
-inline int Hash(int value) { return value; }
+inline int Hash(int value)
+{
+ return value;
+}
#define MY_HASH_BASE hash_map<A,B,my_hash<A> >
-#if __GNUC__>2
+#if __GNUC__>2
#include <ext/hash_map>
using __gnu_cxx::hash_map;
using __gnu_cxx::hash;
@@ -202,27 +209,25 @@ public:
leda_h_array() {}
leda_h_array(const B&_init)
: MY_HASH_BASE(),init(_init) {}
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename MY_HASH_BASE::const_iterator pos=this->find(a);
-
- if( pos==this->end() )
- return init;
- else
- return pos->second;
+ bool defined(const A&a) const {
+ return find(a)!=this->end();
}
- B&operator[](const A&a)
- {
- typename MY_HASH_BASE::iterator pos=this->find(a);
- if( pos==this->end() )
- {
- this->insert(typename MY_HASH_BASE::value_type(a,init));
- pos=this->find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
+ const B&operator[](const A&a)const {
+ typename MY_HASH_BASE::const_iterator pos=this->find(a);
+
+ if( pos==this->end() )
+ return init;
+ else
+ return pos->second;
+ }
+ B&operator[](const A&a) {
+ typename MY_HASH_BASE::iterator pos=this->find(a);
+ if( pos==this->end() ) {
+ this->insert(typename MY_HASH_BASE::value_type(a,init));
+ pos=this->find(a);
+ iassert(pos!=this->end());
+ }
+ return pos->second;
}
};
@@ -237,7 +242,11 @@ public:
template<class T> int compare(const T&a,const T&b)
-{if(a==b)return 0; else if(a<b) return -1; else return 1;}
+{
+ if(a==b)return 0;
+ else if(a<b) return -1;
+ else return 1;
+}
template<class T,class U>
ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
@@ -245,13 +254,12 @@ ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
T t;
bool makeNl=0;
out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
+ forall_defined_h(T,U,t,w) {
+ if( makeNl )
+ out << "\n ";
+ out << "EL:" << t << " INH:" << w[t] << ".";
+ makeNl=1;
+ }
return out << "}\n";
}
template<class T,class U>
@@ -260,13 +268,12 @@ ostream & operator<<(ostream&out,const leda_d_array<T,U>&w)
T t;
bool makeNl=0;
out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
+ forall_defined_h(T,U,t,w) {
+ if( makeNl )
+ out << "\n ";
+ out << "EL:" << t << " INH:" << w[t] << ".";
+ makeNl=1;
+ }
return out << "}\n";
}
@@ -276,13 +283,12 @@ ostream&printSet(ostream&out,const leda_set<T>&s)
bool first=1;
T t;
out << "{";
- forall_set(typename set<T>,t,s)
- {
- if( first==0 )
- out << ", ";
- out << t;
- first=0;
- }
+ forall_set(typename set<T>,t,s) {
+ if( first==0 )
+ out << ", ";
+ out << t;
+ first=0;
+ }
return out << "}\n";
}
@@ -297,20 +303,20 @@ bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
{
A v;
forall_defined_h(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
+ if( !( p1[v]==p2[v]) ) return 0;
forall_defined_h(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
+ if( !( p1[v]==p2[v]) ) return 0;
+ return 1;
}
template<class A,class B>
bool operator==(const leda_d_array<A,B>&p1,const leda_d_array<A,B>&p2)
{
A v;
forall_defined_d(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
+ if( !( p1[v]==p2[v]) ) return 0;
forall_defined_d(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
+ if( !( p1[v]==p2[v]) ) return 0;
+ return 1;
}
diff --git a/mgizapp/src/mkcls/mystl.h b/mgizapp/src/mkcls/mystl.h
index bcda88d..8255ae4 100644
--- a/mgizapp/src/mkcls/mystl.h
+++ b/mgizapp/src/mkcls/mystl.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -27,7 +27,7 @@ USA.
#define MY_STL_H_DEFINED
#include <string>
#include <utility>
-#if __GNUC__>2
+#if __GNUC__>2
#include <ext/hash_map>
using __gnu_cxx::hash_map;
using __gnu_cxx::hash;
@@ -44,43 +44,48 @@ inline int Hash(const string& s)
{
int sum=0;
string::const_iterator i=s.begin(),end=s.end();
- for(;i!=end;i++)sum=5*sum+(*i);
+ for(; i!=end; i++)sum=5*sum+(*i);
return sum;
}
-template<class V> int Hash(const pair<V,V>&a)
-{ return Hash(a.first)+4*Hash(a.second); }
+template<class V> int Hash(const pair<V,V>&a)
+{
+ return Hash(a.first)+4*Hash(a.second);
+}
template<class T1,class T2>
istream& operator>>(istream &in,pair<T1,T2> &ir)
{
- char c;
-
- do in.get(c); while (in && isspace(c));
-
- if (!in) return in;
-
- if (c != '(') in.putback(c);
-
- in >> ir.first;
-
- do in.get(c); while (isspace(c));
- if (c != ',') in.putback(c);
-
- in >> ir.second;
-
- do in.get(c); while (c == ' ');
- if (c != ')') in.putback(c);
-
- return in;
+ char c;
+
+ do in.get(c);
+ while (in && isspace(c));
+
+ if (!in) return in;
+
+ if (c != '(') in.putback(c);
+
+ in >> ir.first;
+
+ do in.get(c);
+ while (isspace(c));
+ if (c != ',') in.putback(c);
+
+ in >> ir.second;
+
+ do in.get(c);
+ while (c == ' ');
+ if (c != ')') in.putback(c);
+
+ return in;
}
template<class T1,class T2>
ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
-{
+{
out << "(" << ir.first << "," << ir.second << ")";
return out;
-}
+}
void printSpaces(ostream&out,int n);
void mysplit(const string &s,string &s1,string &s2);
@@ -93,13 +98,15 @@ public:
A a;
B b;
C c;
- tri(){};
+ tri() {};
tri(const A&_a,const B&_b,const C&_c)
: a(_a),b(_b),c(_c) {}
};
template<class A,class B,class C>
bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
+{
+ return x.a==y.a&&x.b==y.b&&x.c==y.c;
+}
template<class A,class B,class C>
bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
diff --git a/mgizapp/src/model1.cpp b/mgizapp/src/model1.cpp
index 9b71d8f..74d3331 100644
--- a/mgizapp/src/model1.cpp
+++ b/mgizapp/src/model1.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -33,20 +33,20 @@ GLOBAL_PARAMETER2(int,Model1_Dump_Freq,"MODEL 1 DUMP FREQUENCY","t1","dump frequ
int NumberOfVALIalignments=100;
model1::model1(
- const char* efname,
- vcbList& evcblist,
- vcbList& fvcblist,
- tmodel<COUNT, PROB> &_tTable,
- Perplexity & _perp,
- sentenceHandler& _sHandler1,
- Perplexity* _testPerp,
- sentenceHandler* _testHandler,
- Perplexity& _trainViterbiPerp,
- Perplexity* _testViterbiPerp)
- :
+ const char* efname,
+ vcbList& evcblist,
+ vcbList& fvcblist,
+ tmodel<COUNT, PROB> &_tTable,
+ Perplexity & _perp,
+ sentenceHandler& _sHandler1,
+ Perplexity* _testPerp,
+ sentenceHandler* _testHandler,
+ Perplexity& _trainViterbiPerp,
+ Perplexity* _testViterbiPerp)
+ :
report_info(_perp,_sHandler1,_testPerp,_testHandler,_trainViterbiPerp,_testViterbiPerp),
- efFilename(efname), Elist(evcblist), Flist(fvcblist),
- eTotalWCount(Elist.totalVocab()), fTotalWCount(Flist.totalVocab()),
+ efFilename(efname), Elist(evcblist), Flist(fvcblist),
+ eTotalWCount(Elist.totalVocab()), fTotalWCount(Flist.totalVocab()),
noEnglishWords(Elist.size()), noFrenchWords(Flist.size()), tTable(_tTable),
evlist(Elist.getVocabList()), fvlist(Flist.getVocabList())
{}
@@ -65,570 +65,577 @@ model1::model1(const model1& m1, int _threadID) :
fvlist(m1.fvlist)
{}
-void model1::initialize_table_uniformly(sentenceHandler& sHandler1){
+void model1::initialize_table_uniformly(sentenceHandler& sHandler1)
+{
WordIndex i, j;
cout << "Initialize tTable\n";
sentPair sent ;
sHandler1.rewind();
- while(sHandler1.getNextSentence(sent)){
+ while(sHandler1.getNextSentence(sent)) {
Vector<WordIndex>& es = sent.eSent;
Vector<WordIndex>& fs = sent.fSent;
PROB uniform = 1.0/es.size() ;
for( i=0; i < es.size(); i++)
- for(j=1; j < fs.size(); j++)
- tTable.insert(es[i],fs[j],0,uniform);
+ for(j=1; j < fs.size(); j++)
+ tTable.insert(es[i],fs[j],0,uniform);
}
}
-struct em_loop_t{
- model1 *m1;
- int it;
- int nthread;
- Dictionary *dict;
- bool useDict;
- int result;
- pthread_t thread;
- int valid ;
+struct em_loop_t {
+ model1 *m1;
+ int it;
+ int nthread;
+ Dictionary *dict;
+ bool useDict;
+ int result;
+ pthread_t thread;
+ int valid ;
};
-
-void* exe_emloop(void *arg){
- em_loop_t* em =(em_loop_t *) arg;
- em->result = em->m1->em_thread(em->it,em->nthread,*em->dict,em->useDict);
- return arg;
+
+void* exe_emloop(void *arg)
+{
+ em_loop_t* em =(em_loop_t *) arg;
+ em->result = em->m1->em_thread(em->it,em->nthread,*em->dict,em->useDict);
+ return arg;
}
int model1::em_thread(int noIterations, int nthread, /*Perplexity& perp, sentenceHandler& sHandler1, */
- Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler,
+ Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler,
Perplexity& trainViterbiPerp, Perplexity* testViterbiPerp */ )
{
- string modelName="Model1",shortModelName="1";
- char b[2];
- b[1] = '\0';
- b[0] = '0' + nthread;
- time_t st = time(NULL);
- string tfile, number, alignfile, test_alignfile;
- bool dump_files = false ;
- cout << "==========================================================\n";
- cout << modelName << " Training Started at: "<< my_ctime(&st) << "\n";
- int it = noIterations;
- cout << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (Model1_Dump_Freq != 0) && ((it % Model1_Dump_Freq) == 0 || noIterations == it) && !NODUMPS ;
+ string modelName="Model1",shortModelName="1";
+ char b[2];
+ b[1] = '\0';
+ b[0] = '0' + nthread;
+ time_t st = time(NULL);
+ string tfile, number, alignfile, test_alignfile;
+ bool dump_files = false ;
+ cout << "==========================================================\n";
+ cout << modelName << " Training Started at: "<< my_ctime(&st) << "\n";
+ int it = noIterations;
+ cout << "-----------\n" << modelName << ": Iteration " << it << '\n';
+ dump_files = (Model1_Dump_Freq != 0) && ((it % Model1_Dump_Freq) == 0 || noIterations == it) && !NODUMPS ;
// dump_files = true;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- alignfile = Prefix + ".A" + shortModelName + "." + number + ".part" ;
- alignfile = alignfile + b;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ alignfile = Prefix + ".A" + shortModelName + "." + number + ".part" ;
+ alignfile = alignfile + b;
- em_loop(it,perp, sHandler1, false, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
- return 0;
+ em_loop(it,perp, sHandler1, false, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
+ return 0;
}
int model1::em_with_tricks(int noIterations, /*Perplexity& perp, sentenceHandler& sHandler1, */
- bool seedModel1, Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler,
- Perplexity& trainViterbiPerp, Perplexity* testViterbiPerp */
-, bool dumpCount , const char* dumpCountName, bool useString) // If specified, then will dump files before last iteration
+ bool seedModel1, Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler,
+ Perplexity& trainViterbiPerp, Perplexity* testViterbiPerp */
+ , bool dumpCount , const char* dumpCountName, bool useString) // If specified, then will dump files before last iteration
{
- double minErrors=1.0;int minIter=0;
- string modelName="Model1",shortModelName="1";
- time_t st, it_st, fn, it_fn;
- string tfile, number, alignfile, test_alignfile;
- bool dump_files = false ;
- st = time(NULL);
+ double minErrors=1.0;
+ int minIter=0;
+ string modelName="Model1",shortModelName="1";
+ time_t st, it_st, fn, it_fn;
+ string tfile, number, alignfile, test_alignfile;
+ bool dump_files = false ;
+ st = time(NULL);
+ sHandler1.rewind();
+ cout << "==========================================================\n";
+ cout << modelName << " Training Started at: "<< my_ctime(&st) << "\n";
+ for(int it = 1; it <= noIterations; it++) {
+ it_st = time(NULL);
+ cout << "-----------\n" << modelName << ": Iteration " << it << '\n';
+ dump_files = (Model1_Dump_Freq != 0) && ((it % Model1_Dump_Freq) == 0 || it == noIterations) && !NODUMPS ;
+ //dump_files = true;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ alignfile = Prefix + ".A" + shortModelName + "." + number+".part0" ;
+ test_alignfile = Prefix +".tst.A" + shortModelName + "." + number ;
+ initAL();
+ threadID = 0;
+ int th;
+ vector<em_loop_t> ths;
+ ths.resize(NCPUS);
sHandler1.rewind();
- cout << "==========================================================\n";
- cout << modelName << " Training Started at: "<< my_ctime(&st) << "\n";
- for(int it = 1; it <= noIterations; it++){
- it_st = time(NULL);
- cout << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (Model1_Dump_Freq != 0) && ((it % Model1_Dump_Freq) == 0 || it == noIterations) && !NODUMPS ;
- //dump_files = true;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- alignfile = Prefix + ".A" + shortModelName + "." + number+".part0" ;
- test_alignfile = Prefix +".tst.A" + shortModelName + "." + number ;
- initAL();
- threadID = 0;
- int th;
- vector<em_loop_t> ths;
- ths.resize(NCPUS);
- sHandler1.rewind();
- for (th=1;th<NCPUS;th++){
- ths[th].m1=this;
- ths[th].it = it;
- ths[th].nthread = th;
- ths[th].dict = & dictionary;
- ths[th].useDict = useDict;
- ths[th].result = 0;
- ths[th].valid = pthread_create(&(ths[th].thread),NULL,exe_emloop,&(ths[th]));
- if(ths[th].valid){
- cerr << "Error starting thread " << th << endl;
- }
- }
- em_loop(it,perp, sHandler1, seedModel1, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
- perp.record("Model1");
- trainViterbiPerp.record("Model1");
- errorReportAL(cout, "IBM-1");
-
- cerr << "Main thread done, waiting" << endl;;
- for (th=1;th<NCPUS;th++){
- pthread_join((ths[th].thread),NULL);
- cerr << "Thread " << th << "done" << endl;
- }
- if (testPerp && testHandler) // calculate test perplexity
- em_loop(it,*testPerp, *testHandler, seedModel1, dump_files, test_alignfile.c_str(), dictionary, useDict, *testViterbiPerp, true);
- if( errorsAL()<minErrors ) {
- minErrors=errorsAL();
- minIter=it;
- }
- //if (dump_files){
- // if( OutputInAachenFormat==1 )
- // tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
- //}
- cerr << "Normalizing T " << endl;
-
- /**
- If asked for dumping count table, just dump it.
- */
- if(dumpCount && it == noIterations){
- string realTableName = dumpCountName;
- realTableName += ".t.count";
- tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
- }
-
- tTable.normalizeTable(Elist, Flist);
- //cout << tTable.getProb(2,2) << endl;
- cerr << " DONE Normalizing " << endl;
- cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
- << " PERPLEXITY " << (*testPerp).perplexity()
- << '\n';
- cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<
- it<<") VITERBI TEST CROSS-ENTROPY "
- << (*testViterbiPerp).cross_entropy()
- << " PERPLEXITY " << (*testViterbiPerp).perplexity()
- << '\n';
- if (dump_files){
- if( OutputInAachenFormat==0 )
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),
- Flist.getVocabList(),OutputInAachenFormat);
- }
- it_fn = time(NULL);
- cout << "Model 1 Iteration: " << it<< " took: " << difftime(it_fn, it_st) << " seconds\n";
-
-
+ for (th=1; th<NCPUS; th++) {
+ ths[th].m1=this;
+ ths[th].it = it;
+ ths[th].nthread = th;
+ ths[th].dict = & dictionary;
+ ths[th].useDict = useDict;
+ ths[th].result = 0;
+ ths[th].valid = pthread_create(&(ths[th].thread),NULL,exe_emloop,&(ths[th]));
+ if(ths[th].valid) {
+ cerr << "Error starting thread " << th << endl;
+ }
+ }
+ em_loop(it,perp, sHandler1, seedModel1, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
+ perp.record("Model1");
+ trainViterbiPerp.record("Model1");
+ errorReportAL(cout, "IBM-1");
+
+ cerr << "Main thread done, waiting" << endl;;
+ for (th=1; th<NCPUS; th++) {
+ pthread_join((ths[th].thread),NULL);
+ cerr << "Thread " << th << "done" << endl;
+ }
+ if (testPerp && testHandler) // calculate test perplexity
+ em_loop(it,*testPerp, *testHandler, seedModel1, dump_files, test_alignfile.c_str(), dictionary, useDict, *testViterbiPerp, true);
+ if( errorsAL()<minErrors ) {
+ minErrors=errorsAL();
+ minIter=it;
}
- fn = time(NULL) ;
- cout << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- return minIter;
+ //if (dump_files){
+ // if( OutputInAachenFormat==1 )
+ // tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
+ //}
+ cerr << "Normalizing T " << endl;
+
+ /**
+ If asked for dumping count table, just dump it.
+ */
+ if(dumpCount && it == noIterations) {
+ string realTableName = dumpCountName;
+ realTableName += ".t.count";
+ tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
+ }
+
+ tTable.normalizeTable(Elist, Flist);
+ //cout << tTable.getProb(2,2) << endl;
+ cerr << " DONE Normalizing " << endl;
+ cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
+ << " PERPLEXITY " << perp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
+ << " PERPLEXITY " << (*testPerp).perplexity()
+ << '\n';
+ cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
+ << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ": ("<<
+ it<<") VITERBI TEST CROSS-ENTROPY "
+ << (*testViterbiPerp).cross_entropy()
+ << " PERPLEXITY " << (*testViterbiPerp).perplexity()
+ << '\n';
+ if (dump_files) {
+ if( OutputInAachenFormat==0 )
+ tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),
+ Flist.getVocabList(),OutputInAachenFormat);
+ }
+ it_fn = time(NULL);
+ cout << "Model 1 Iteration: " << it<< " took: " << difftime(it_fn, it_st) << " seconds\n";
+
+
+ }
+ fn = time(NULL) ;
+ cout << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
+ return minIter;
}
-bool model1::load_table(const char* tname){
+bool model1::load_table(const char* tname)
+{
/* This function loads the t table from the given file; use it
when you want to load results from previous t training
without doing any new training.
NAS, 7/11/99
*/
- cout << "Model1: loading t table \n" ;
- return tTable.readProbTable(tname);
+ cout << "Model1: loading t table \n" ;
+ return tTable.readProbTable(tname);
}
-
+
extern float MINCOUNTINCREASE;
-void model1::em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1,
- bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Perplexity& viterbi_perp, bool test)
+void model1::em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1,
+ bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Perplexity& viterbi_perp, bool test)
{
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS)
- of2.open(alignfile);
- PROB uniform = 1.0/noFrenchWords ;
- sentPair sent ;
-
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
- double viterbi_score = 1 ;
-
+ WordIndex i, j, l, m ;
+ double cross_entropy;
+ int pair_no=0 ;
+ perp.clear();
+ viterbi_perp.clear();
+ ofstream of2;
+ // for each sentence pair in the corpus
+ if (dump_alignment||FEWDUMPS)
+ of2.open(alignfile);
+ PROB uniform = 1.0/noFrenchWords ;
+ sentPair sent ;
+
+ while(sHandler1.getNextSentence(sent)) {
+ Vector<WordIndex>& es = sent.eSent;
+ Vector<WordIndex>& fs = sent.fSent;
+ const float so = sent.getCount();
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ cross_entropy = log(1.0);
+ Vector<WordIndex> viterbi_alignment(fs.size());
+ double viterbi_score = 1 ;
+
#ifdef WIN32
- bool *eindict = new bool[l + 1];
- bool *findict = new bool[m + 1];
- bool **indict = new bool*[m + 1];
- for(int _i = 0; _i < m+1; _i++)
- indict[_i] = new bool[l + 1];
+ bool *eindict = new bool[l + 1];
+ bool *findict = new bool[m + 1];
+ bool **indict = new bool*[m + 1];
+ for(int _i = 0; _i < m+1; _i++)
+ indict[_i] = new bool[l + 1];
#else
- bool eindict[l + 1];
- bool findict[m + 1];
- bool indict[m + 1][l + 1];
+ bool eindict[l + 1];
+ bool findict[m + 1];
+ bool indict[m + 1][l + 1];
#endif
- if(it == 1 && useDict){
- for(unsigned int dummy = 0; dummy <= l; dummy++) eindict[dummy] = false;
- for(unsigned int dummy = 0; dummy <= m; dummy++){
- findict[dummy] = false;
- for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++)
- indict[dummy][dummy2] = false;
- }
- for(j = 0; j <= m; j++)
- for(i = 0; i <= l; i++)
- if(dict.indict(fs[j], es[i])){
- eindict[i] = findict[j] = indict[j][i] = true;
- }
+ if(it == 1 && useDict) {
+ for(unsigned int dummy = 0; dummy <= l; dummy++) eindict[dummy] = false;
+ for(unsigned int dummy = 0; dummy <= m; dummy++) {
+ findict[dummy] = false;
+ for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++)
+ indict[dummy][dummy2] = false;
+ }
+ for(j = 0; j <= m; j++)
+ for(i = 0; i <= l; i++)
+ if(dict.indict(fs[j], es[i])) {
+ eindict[i] = findict[j] = indict[j][i] = true;
+ }
+ }
+
+ for(j=1; j <= m; j++) {
+ // entries that map fs to all possible ei in this sentence.
+ Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table
+ LpPair<COUNT,PROB> **sPtrCachePtr;
+
+ PROB denom = 0.0;
+ WordIndex best_i = 0 ; // i for which fj is best maped to ei
+ PROB word_best_score = 0 ; // score for the best mapping of fj
+ if (it == 1 && !seedModel1) {
+ denom = uniform * es.size() ;
+ word_best_score = uniform ;
+ } else
+ for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++) {
+ PROB e(0.0) ;
+ (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ;
+ if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
+ e = (*((*sPtrCachePtr))).prob;
+ else e = PROB_SMOOTH ;
+ denom += e ;
+ if (e > word_best_score) {
+ word_best_score = e ;
+ best_i = i ;
+ }
}
-
- for(j=1; j <= m; j++){
- // entries that map fs to all possible ei in this sentence.
- Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table
- LpPair<COUNT,PROB> **sPtrCachePtr;
-
- PROB denom = 0.0;
- WordIndex best_i = 0 ; // i for which fj is best maped to ei
- PROB word_best_score = 0 ; // score for the best mapping of fj
- if (it == 1 && !seedModel1){
- denom = uniform * es.size() ;
- word_best_score = uniform ;
- }
- else
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- PROB e(0.0) ;
- (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ;
- if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- denom += e ;
- if (e > word_best_score){
- word_best_score = e ;
- best_i = i ;
- }
- }
- viterbi_alignment[j] = best_i ;
- viterbi_score *= word_best_score ; /// denom ;
- if (denom == 0){
- if (test)
- cerr << "WARNING: denom is zero (TEST)\n";
- else
- cerr << "WARNING: denom is zero (TRAIN)\n";
- }
- cross_entropy += log(denom) ;
- if (!test){
- if(denom > 0){
- COUNT val = COUNT(so) / (COUNT) double(denom) ;
- /* this if loop implements a constraint on counting:
- count(es[i], fs[j]) is implemented if and only if
- es[i] and fs[j] occur together in the dictionary,
- OR
- es[i] does not occur in the dictionary with any fs[x] and
- fs[j] does not occur in the dictionary with any es[y]
- */
- if(it == 1 && useDict){
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- if(indict[j][i] || (!findict[j] && !eindict[i])){
- PROB e(0.0) ;
- if (it == 1 && !seedModel1)
- e = uniform ;
- else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- COUNT x=e*val;
- if( (it==1 && !seedModel1)||x>MINCOUNTINCREASE )
- /* if ((*sPtrCachePtr) != 0)
- (*((*sPtrCachePtr))).count += x;
- else */
- tTable.incCount(es[i], fs[j], x);
- } /* end of if */
- } /* end of for i */
- } /* end of it == 1 */
- // Old code:
- else{
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- //for(i=0; i <= l; i++) {
- PROB e(0.0) ;
- if (it == 1 && !seedModel1)
- e = uniform ;
- else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- //if( !(i==0) )
- //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl;
- COUNT x=e*val;
- if( pair_no==VerboseSentence )
- cout << i << "(" << evlist[es[i]].word << ")," << j << "(" << fvlist[fs[j]].word << ")=" << x << endl;
- if( (it==1 && !seedModel1)||x>MINCOUNTINCREASE ){
- /*if( NoEmptyWord==0 || i!=0 )
- if ((*sPtrCachePtr) != 0)
- (*((*sPtrCachePtr))).count += x;
- else */
- //cerr << i << " " << j << " (+) " << endl;
- //cerr.flush();
- //cerr << es[i] << " " << fs[j] << " (=) "<< endl;
- //cerr.flush();
- tTable.incCount(es[i], fs[j], x);
- //cerr << es[i] << " " << fs[j] << " (-) "<< endl;
- //cerr.flush();
- }
- } /* end of for i */
- } // end of else
- } // end of if (denom > 0)
- }// if (!test)
- } // end of for (j) ;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cerr << sent << "CE: " << cross_entropy << " " << so << endl;
- perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1);
- if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000))
- printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
- addAL(viterbi_alignment,sent.sentenceNo,l);
- pair_no++;
+ viterbi_alignment[j] = best_i ;
+ viterbi_score *= word_best_score ; /// denom ;
+ if (denom == 0) {
+ if (test)
+ cerr << "WARNING: denom is zero (TEST)\n";
+ else
+ cerr << "WARNING: denom is zero (TRAIN)\n";
+ }
+ cross_entropy += log(denom) ;
+ if (!test) {
+ if(denom > 0) {
+ COUNT val = COUNT(so) / (COUNT) double(denom) ;
+ /* this if loop implements a constraint on counting:
+ count(es[i], fs[j]) is implemented if and only if
+ es[i] and fs[j] occur together in the dictionary,
+ OR
+ es[i] does not occur in the dictionary with any fs[x] and
+ fs[j] does not occur in the dictionary with any es[y]
+ */
+ if(it == 1 && useDict) {
+ for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++) {
+ if(indict[j][i] || (!findict[j] && !eindict[i])) {
+ PROB e(0.0) ;
+ if (it == 1 && !seedModel1)
+ e = uniform ;
+ else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
+ e = (*((*sPtrCachePtr))).prob;
+ else e = PROB_SMOOTH ;
+ COUNT x=e*val;
+ if( (it==1 && !seedModel1)||x>MINCOUNTINCREASE )
+ /* if ((*sPtrCachePtr) != 0)
+ (*((*sPtrCachePtr))).count += x;
+ else */
+ tTable.incCount(es[i], fs[j], x);
+ } /* end of if */
+ } /* end of for i */
+ } /* end of it == 1 */
+ // Old code:
+ else {
+ for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++) {
+ //for(i=0; i <= l; i++) {
+ PROB e(0.0) ;
+ if (it == 1 && !seedModel1)
+ e = uniform ;
+ else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
+ e = (*((*sPtrCachePtr))).prob;
+ else e = PROB_SMOOTH ;
+ //if( !(i==0) )
+ //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl;
+ COUNT x=e*val;
+ if( pair_no==VerboseSentence )
+ cout << i << "(" << evlist[es[i]].word << ")," << j << "(" << fvlist[fs[j]].word << ")=" << x << endl;
+ if( (it==1 && !seedModel1)||x>MINCOUNTINCREASE ) {
+ /*if( NoEmptyWord==0 || i!=0 )
+ if ((*sPtrCachePtr) != 0)
+ (*((*sPtrCachePtr))).count += x;
+ else */
+ //cerr << i << " " << j << " (+) " << endl;
+ //cerr.flush();
+ //cerr << es[i] << " " << fs[j] << " (=) "<< endl;
+ //cerr.flush();
+ tTable.incCount(es[i], fs[j], x);
+ //cerr << es[i] << " " << fs[j] << " (-) "<< endl;
+ //cerr.flush();
+ }
+ } /* end of for i */
+ } // end of else
+ } // end of if (denom > 0)
+ }// if (!test)
+ } // end of for (j) ;
+ sHandler1.setProbOfSentence(sent,cross_entropy);
+ //cerr << sent << "CE: " << cross_entropy << " " << so << endl;
+ perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1);
+ viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1);
+ if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000))
+ printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
+ addAL(viterbi_alignment,sent.sentenceNo,l);
+ pair_no++;
#ifdef WIN32
- delete[] eindict;
- delete[] findict;
- for(int _i = 0; _i < m+1; _i++)
- delete[] indict[_i];
- delete[] indict;
+ delete[] eindict;
+ delete[] findict;
+ for(int _i = 0; _i < m+1; _i++)
+ delete[] indict[_i];
+ delete[] indict;
#endif
- } /* of while */
+ } /* of while */
}
-CTTableDiff<COUNT,PROB>* model1::one_step_em(int it, bool seedModel1,
- Dictionary& dictionary, bool useDict){
- CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
- double minErrors=1.0;
- string modelName="Model1",shortModelName="1";
- time_t st, it_st, fn;
- string tfile, number, alignfile, test_alignfile;
- bool dump_files = false ;
- st = time(NULL);
- sHandler1.rewind();
- cout << "==========================================================\n";
- cout << modelName << " Training Started at: "<< my_ctime(&st) << "\n";
- it_st = time(NULL);
- cout << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (Model1_Dump_Freq != 0) && ((it % Model1_Dump_Freq) == 0) && !NODUMPS ;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- alignfile = Prefix + ".A1" ;
- test_alignfile = Prefix +".tst.A" + shortModelName + "." + number ;
- initAL();
- em_loop_1(diff,it,perp, sHandler1, seedModel1,
- dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
- //if (testPerp && testHandler) // calculate test perplexity
- // em_loop(it,*testPerp, *testHandler, seedModel1, dump_files, test_alignfile.c_str(), dictionary, useDict, *testViterbiPerp, true);
- if( errorsAL()<minErrors ) minErrors=errorsAL();
- fn = time(NULL) ;
- cout << "Partial " << modelName << " Training took: " << difftime(fn, it_st) << " seconds\n";
- return diff;
- }
+CTTableDiff<COUNT,PROB>* model1::one_step_em(int it, bool seedModel1,
+ Dictionary& dictionary, bool useDict)
+{
+ CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
+ double minErrors=1.0;
+ string modelName="Model1",shortModelName="1";
+ time_t st, it_st, fn;
+ string tfile, number, alignfile, test_alignfile;
+ bool dump_files = false ;
+ st = time(NULL);
+ sHandler1.rewind();
+ cout << "==========================================================\n";
+ cout << modelName << " Training Started at: "<< my_ctime(&st) << "\n";
+ it_st = time(NULL);
+ cout << "-----------\n" << modelName << ": Iteration " << it << '\n';
+ dump_files = (Model1_Dump_Freq != 0) && ((it % Model1_Dump_Freq) == 0) && !NODUMPS ;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ alignfile = Prefix + ".A1" ;
+ test_alignfile = Prefix +".tst.A" + shortModelName + "." + number ;
+ initAL();
+ em_loop_1(diff,it,perp, sHandler1, seedModel1,
+ dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
+ //if (testPerp && testHandler) // calculate test perplexity
+ // em_loop(it,*testPerp, *testHandler, seedModel1, dump_files, test_alignfile.c_str(), dictionary, useDict, *testViterbiPerp, true);
+ if( errorsAL()<minErrors ) minErrors=errorsAL();
+ fn = time(NULL) ;
+ cout << "Partial " << modelName << " Training took: " << difftime(fn, it_st) << " seconds\n";
+ return diff;
+}
- void model1::combine_one(CTTableDiff<COUNT,PROB>* cb){
- cb->AugmentTTable(tTable);
- }
-
- void model1::recombine(){
- tTable.normalizeTable(Elist, Flist);
- }
-
- void save_table(const char* tname){
-/* if (dump_files){
- * if( OutputInAachenFormat==0 )
- * tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- */
+void model1::combine_one(CTTableDiff<COUNT,PROB>* cb)
+{
+ cb->AugmentTTable(tTable);
+}
- }
+void model1::recombine()
+{
+ tTable.normalizeTable(Elist, Flist);
+}
+
+void save_table(const char* tname)
+{
+ /* if (dump_files){
+ * if( OutputInAachenFormat==0 )
+ * tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
+ */
+
+}
-
-void model1::em_loop_1(CTTableDiff<COUNT,PROB> *diff,int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1,
- bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Perplexity& viterbi_perp, bool test) {
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS)
- of2.open(alignfile);
- PROB uniform = 1.0/noFrenchWords ;
- sentPair sent ;
- sHandler1.rewind();
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
- double viterbi_score = 1 ;
+
+void model1::em_loop_1(CTTableDiff<COUNT,PROB> *diff,int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1,
+ bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Perplexity& viterbi_perp, bool test)
+{
+ WordIndex i, j, l, m ;
+ double cross_entropy;
+ int pair_no=0 ;
+ perp.clear();
+ viterbi_perp.clear();
+ ofstream of2;
+ // for each sentence pair in the corpus
+ if (dump_alignment||FEWDUMPS)
+ of2.open(alignfile);
+ PROB uniform = 1.0/noFrenchWords ;
+ sentPair sent ;
+ sHandler1.rewind();
+ while(sHandler1.getNextSentence(sent)) {
+ Vector<WordIndex>& es = sent.eSent;
+ Vector<WordIndex>& fs = sent.fSent;
+ const float so = sent.getCount();
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ cross_entropy = log(1.0);
+ Vector<WordIndex> viterbi_alignment(fs.size());
+ double viterbi_score = 1 ;
#ifdef WIN32
- bool *eindict = new bool[l + 1];
- bool *findict = new bool[m + 1];
- bool **indict = new bool*[m + 1];
- for(int _i = 0; _i < m+1; _i++)
- indict[_i] = new bool[l + 1];
+ bool *eindict = new bool[l + 1];
+ bool *findict = new bool[m + 1];
+ bool **indict = new bool*[m + 1];
+ for(int _i = 0; _i < m+1; _i++)
+ indict[_i] = new bool[l + 1];
#else
- bool eindict[l + 1];
- bool findict[m + 1];
- bool indict[m + 1][l + 1];
+ bool eindict[l + 1];
+ bool findict[m + 1];
+ bool indict[m + 1][l + 1];
#endif
- if(it == 1 && useDict){
- for(unsigned int dummy = 0; dummy <= l; dummy++) eindict[dummy] = false;
- for(unsigned int dummy = 0; dummy <= m; dummy++){
- findict[dummy] = false;
- for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++)
- indict[dummy][dummy2] = false;
- }
- for(j = 0; j <= m; j++)
- for(i = 0; i <= l; i++)
- if(dict.indict(fs[j], es[i])){
- eindict[i] = findict[j] = indict[j][i] = true;
- }
- }
-
- for(j=1; j <= m; j++){
- // entries that map fs to all possible ei in this sentence.
- Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table
- //Vector<COUNT *> sPtrCacheDif(es.size(),0); // cache pointers to table
- LpPair<COUNT,PROB> **sPtrCachePtr;
- //COUNT **sPtrCachePtrDif;
-
- PROB denom = 0.0;
- WordIndex best_i = 0 ; // i for which fj is best maped to ei
- PROB word_best_score = 0 ; // score for the best mapping of fj
- if (it == 1 && !seedModel1){
- denom = uniform * es.size() ;
- word_best_score = uniform ;
- }
- else {
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- PROB e(0.0) ;
- (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ;
- //(*sPtrCachePtrDif) = diff->GetPtr(es[i], fs[j]) ;
- if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- denom += e ;
- if (e > word_best_score){
- word_best_score = e ;
- best_i = i ;
- }
- }
+ if(it == 1 && useDict) {
+ for(unsigned int dummy = 0; dummy <= l; dummy++) eindict[dummy] = false;
+ for(unsigned int dummy = 0; dummy <= m; dummy++) {
+ findict[dummy] = false;
+ for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++)
+ indict[dummy][dummy2] = false;
+ }
+ for(j = 0; j <= m; j++)
+ for(i = 0; i <= l; i++)
+ if(dict.indict(fs[j], es[i])) {
+ eindict[i] = findict[j] = indict[j][i] = true;
+ }
+ }
+
+ for(j=1; j <= m; j++) {
+ // entries that map fs to all possible ei in this sentence.
+ Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table
+ //Vector<COUNT *> sPtrCacheDif(es.size(),0); // cache pointers to table
+ LpPair<COUNT,PROB> **sPtrCachePtr;
+ //COUNT **sPtrCachePtrDif;
+
+ PROB denom = 0.0;
+ WordIndex best_i = 0 ; // i for which fj is best maped to ei
+ PROB word_best_score = 0 ; // score for the best mapping of fj
+ if (it == 1 && !seedModel1) {
+ denom = uniform * es.size() ;
+ word_best_score = uniform ;
+ } else {
+ for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++) {
+ PROB e(0.0) ;
+ (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ;
+ //(*sPtrCachePtrDif) = diff->GetPtr(es[i], fs[j]) ;
+ if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
+ e = (*((*sPtrCachePtr))).prob;
+ else e = PROB_SMOOTH ;
+ denom += e ;
+ if (e > word_best_score) {
+ word_best_score = e ;
+ best_i = i ;
+ }
+ }
+ }
+ viterbi_alignment[j] = best_i ;
+ viterbi_score *= word_best_score ; /// denom ;
+ if (denom == 0) {
+ if (test)
+ cerr << "WARNING: denom is zero (TEST)\n";
+ else
+ cerr << "WARNING: denom is zero (TRAIN)\n";
+ }
+ cross_entropy += log(denom) ;
+ if (!test) {
+ if(denom > 0) {
+ COUNT val = COUNT(so) / (COUNT) double(denom) ;
+ /* this if loop implements a constraint on counting:
+ count(es[i], fs[j]) is implemented if and only if
+ es[i] and fs[j] occur together in the dictionary,
+ OR
+ es[i] does not occur in the dictionary with any fs[x] and
+ fs[j] does not occur in the dictionary with any es[y]
+ */
+ if(it == 1 && useDict) {
+ for((i=0),(sPtrCachePtr=&sPtrCache[0]);
+ i <= l; i++,sPtrCachePtr++) {
+ if(indict[j][i] || (!findict[j] && !eindict[i])) {
+ PROB e(0.0) ;
+ if (it == 1 && !seedModel1)
+ e = uniform ;
+ else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
+ e = (*((*sPtrCachePtr))).prob;
+ else e = PROB_SMOOTH ;
+ COUNT x=e*val;
+ if( it==1||x>MINCOUNTINCREASE ) {
+ /*if ((*sPtrCachePtr) != 0){
+ (*((*sPtrCachePtr))).count += x;
+ } else {*/
+ tTable.incCount(es[i], fs[j], x);
+ //}
+ diff->incCount(es[i], fs[j], x);
}
- viterbi_alignment[j] = best_i ;
- viterbi_score *= word_best_score ; /// denom ;
- if (denom == 0){
- if (test)
- cerr << "WARNING: denom is zero (TEST)\n";
- else
- cerr << "WARNING: denom is zero (TRAIN)\n";
+ } /* end of if */
+ } /* end of for i */
+ } /* end of it == 1 */
+ // Old code:
+ else {
+ for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++) {
+ //for(i=0; i <= l; i++) {
+ PROB e(0.0) ;
+ if (it == 1 && !seedModel1)
+ e = uniform ;
+ else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
+ e = (*((*sPtrCachePtr))).prob;
+ else e = PROB_SMOOTH ;
+ //if( !(i==0) )
+ //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl;
+ COUNT x=e*val;
+ if( pair_no==VerboseSentence )
+ cout << i << "(" << evlist[es[i]].word << "),"
+ << j << "(" << fvlist[fs[j]].word << ")=" << x << endl;
+ if( it==1||x>MINCOUNTINCREASE )
+ if( NoEmptyWord==0 || ( NoEmptyWord==0 || i!=0 )) {
+ /*if ((*sPtrCachePtr) != 0){
+ (*((*sPtrCachePtr))).count += x;
+ } else */
+ tTable.incCount(es[i], fs[j], x);
+ diff->incCount(es[i], fs[j], x);
}
- cross_entropy += log(denom) ;
- if (!test){
- if(denom > 0){
- COUNT val = COUNT(so) / (COUNT) double(denom) ;
- /* this if loop implements a constraint on counting:
- count(es[i], fs[j]) is implemented if and only if
- es[i] and fs[j] occur together in the dictionary,
- OR
- es[i] does not occur in the dictionary with any fs[x] and
- fs[j] does not occur in the dictionary with any es[y]
- */
- if(it == 1 && useDict){
- for((i=0),(sPtrCachePtr=&sPtrCache[0]);
- i <= l; i++,sPtrCachePtr++){
- if(indict[j][i] || (!findict[j] && !eindict[i])){
- PROB e(0.0) ;
- if (it == 1 && !seedModel1)
- e = uniform ;
- else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- COUNT x=e*val;
- if( it==1||x>MINCOUNTINCREASE ){
- /*if ((*sPtrCachePtr) != 0){
- (*((*sPtrCachePtr))).count += x;
- } else {*/
- tTable.incCount(es[i], fs[j], x);
- //}
- diff->incCount(es[i], fs[j], x);
- }
- } /* end of if */
- } /* end of for i */
- } /* end of it == 1 */
- // Old code:
- else{
- for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
- //for(i=0; i <= l; i++) {
- PROB e(0.0) ;
- if (it == 1 && !seedModel1)
- e = uniform ;
- else if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH)
- e = (*((*sPtrCachePtr))).prob;
- else e = PROB_SMOOTH ;
- //if( !(i==0) )
- //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl;
- COUNT x=e*val;
- if( pair_no==VerboseSentence )
- cout << i << "(" << evlist[es[i]].word << "),"
- << j << "(" << fvlist[fs[j]].word << ")=" << x << endl;
- if( it==1||x>MINCOUNTINCREASE )
- if( NoEmptyWord==0 || ( NoEmptyWord==0 || i!=0 )){
- /*if ((*sPtrCachePtr) != 0){
- (*((*sPtrCachePtr))).count += x;
- } else */
- tTable.incCount(es[i], fs[j], x);
- diff->incCount(es[i], fs[j], x);
- }
- } /* end of for i */
- } // end of else
- } // end of if (denom > 0)
- }// if (!test)
- } // end of for (j) ;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cerr << sent << "CE: " << cross_entropy << " " << so << endl;
- perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1);
- if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000))
- printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
- addAL(viterbi_alignment,sent.sentenceNo,l);
- pair_no++;
+ } /* end of for i */
+ } // end of else
+ } // end of if (denom > 0)
+ }// if (!test)
+ } // end of for (j) ;
+ sHandler1.setProbOfSentence(sent,cross_entropy);
+ //cerr << sent << "CE: " << cross_entropy << " " << so << endl;
+ perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1);
+ viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1);
+ if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000))
+ printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
+ addAL(viterbi_alignment,sent.sentenceNo,l);
+ pair_no++;
#ifdef WIN32
- delete[] eindict;
- delete[] findict;
- for(int _i = 0; _i < m+1; _i++)
- delete[] indict[_i];
- delete[] indict;
+ delete[] eindict;
+ delete[] findict;
+ for(int _i = 0; _i < m+1; _i++)
+ delete[] indict[_i];
+ delete[] indict;
#endif
- } /* of while */
- sHandler1.rewind();
- perp.record("Model1");
- viterbi_perp.record("Model1");
- errorReportAL(cout, "IBM-1");
+ } /* of while */
+ sHandler1.rewind();
+ perp.record("Model1");
+ viterbi_perp.record("Model1");
+ errorReportAL(cout, "IBM-1");
- }
+}
diff --git a/mgizapp/src/model1.h b/mgizapp/src/model1.h
index 142ca4f..1a2c909 100644
--- a/mgizapp/src/model1.h
+++ b/mgizapp/src/model1.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -23,7 +23,7 @@ USA.
#define _model1_h 1
#include <cassert>
-
+
#include <iostream>
#include <strstream>
#include <algorithm>
@@ -56,159 +56,179 @@ using __gnu_cxx::hash_map;
extern int NumberOfVALIalignments;
-class report_info{
- public:
- Mutex alLock;
+class report_info
+{
+public:
+ Mutex alLock;
Perplexity& perp;
sentenceHandler& sHandler1;
Perplexity* testPerp;
sentenceHandler* testHandler;
- Perplexity& trainViterbiPerp;
+ Perplexity& trainViterbiPerp;
Perplexity* testViterbiPerp;
report_info(Perplexity& _perp,
- sentenceHandler& _sHandler1,
- Perplexity* _testPerp,
- sentenceHandler* _testHandler,
- Perplexity& _trainViterbiPerp,
- Perplexity* _testViterbiPerp)
+ sentenceHandler& _sHandler1,
+ Perplexity* _testPerp,
+ sentenceHandler* _testHandler,
+ Perplexity& _trainViterbiPerp,
+ Perplexity* _testViterbiPerp)
:
- perp(_perp),
- sHandler1(_sHandler1),
- testPerp(_testPerp),
- testHandler(_testHandler),
- trainViterbiPerp(_trainViterbiPerp),
- testViterbiPerp(_testViterbiPerp)
- {}
-
- report_info(const report_info &rp) :
- perp(rp.perp),
- sHandler1(rp.sHandler1),
- testPerp(rp.testPerp),
- testHandler(rp.testHandler),
- trainViterbiPerp(rp.trainViterbiPerp),
- testViterbiPerp(rp.testViterbiPerp)
- {}
+ perp(_perp),
+ sHandler1(_sHandler1),
+ testPerp(_testPerp),
+ testHandler(_testHandler),
+ trainViterbiPerp(_trainViterbiPerp),
+ testViterbiPerp(_testViterbiPerp)
+ {}
+
+ report_info(const report_info &rp) :
+ perp(rp.perp),
+ sHandler1(rp.sHandler1),
+ testPerp(rp.testPerp),
+ testHandler(rp.testHandler),
+ trainViterbiPerp(rp.trainViterbiPerp),
+ testViterbiPerp(rp.testViterbiPerp)
+ {}
};
-class model1 : public report_info{
+class model1 : public report_info
+{
public:
- string efFilename;
- vcbList& Elist ;
- vcbList& Flist ;
- double eTotalWCount ; // size of source copus in number of words
- double fTotalWCount ; // size of target corpus in number of words
- int noEnglishWords;
- int noFrenchWords;
- tmodel<COUNT, PROB>&tTable;
- Vector<WordEntry>& evlist ;
- Vector<WordEntry>& fvlist ;
- int threadID;
+ string efFilename;
+ vcbList& Elist ;
+ vcbList& Flist ;
+ double eTotalWCount ; // size of source copus in number of words
+ double fTotalWCount ; // size of target corpus in number of words
+ int noEnglishWords;
+ int noFrenchWords;
+ tmodel<COUNT, PROB>&tTable;
+ Vector<WordEntry>& evlist ;
+ Vector<WordEntry>& fvlist ;
+ int threadID;
public:
- int ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch;
- int ALmissingVALI,ALtoomuchVALI,ALeventsMissingVALI,ALeventsToomuchVALI;
- int ALmissingTEST,ALtoomuchTEST,ALeventsMissingTEST,ALeventsToomuchTEST;
- model1 (const char* efname, vcbList& evcblist, vcbList& fvcblist,tmodel<COUNT, PROB>&_tTable,Perplexity& _perp,
- sentenceHandler& _sHandler1,
- Perplexity* _testPerp,
- sentenceHandler* _testHandler,
- Perplexity& _trainViterbiPerp,
- Perplexity* _testViterbiPerp);
-
- model1 (const model1& m1, int _threadID=0);
- void initialize_table_uniformly(sentenceHandler& sHandler1);
-
- int em_with_tricks(int noIterations,
- bool seedModel1, Dictionary& dictionary, bool useDict, bool dumpCount = false,
- const char* dumpCountName = NULL, bool useString = false);
- int em_thread(int noIterations, int thread,Dictionary& dictionary, bool useDict);
- bool load_table(const char* tname);
- void readVocabFile(const char* fname, Vector<WordEntry>& vlist, int& vsize,
- int& total);
- inline Vector<WordEntry>& getEnglishVocabList(void)const {return Elist.getVocabList();};
- inline Vector<WordEntry>& getFrenchVocabList(void)const {return Flist.getVocabList();};
- inline double getETotalWCount(void) const {return eTotalWCount;};
- inline double getFTotalWCount(void) const {return fTotalWCount;};
- inline int getNoEnglishWords(void) const {return noEnglishWords;};
- inline int getNoFrenchWords(void) const {return noFrenchWords;};
- inline tmodel<COUNT, PROB>& getTTable(void) {return tTable;};
- inline string& getEFFilename(void) {return efFilename;};
-
+ int ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch;
+ int ALmissingVALI,ALtoomuchVALI,ALeventsMissingVALI,ALeventsToomuchVALI;
+ int ALmissingTEST,ALtoomuchTEST,ALeventsMissingTEST,ALeventsToomuchTEST;
+ model1 (const char* efname, vcbList& evcblist, vcbList& fvcblist,tmodel<COUNT, PROB>&_tTable,Perplexity& _perp,
+ sentenceHandler& _sHandler1,
+ Perplexity* _testPerp,
+ sentenceHandler* _testHandler,
+ Perplexity& _trainViterbiPerp,
+ Perplexity* _testViterbiPerp);
+
+ model1 (const model1& m1, int _threadID=0);
+ void initialize_table_uniformly(sentenceHandler& sHandler1);
+
+ int em_with_tricks(int noIterations,
+ bool seedModel1, Dictionary& dictionary, bool useDict, bool dumpCount = false,
+ const char* dumpCountName = NULL, bool useString = false);
+ int em_thread(int noIterations, int thread,Dictionary& dictionary, bool useDict);
+ bool load_table(const char* tname);
+ void readVocabFile(const char* fname, Vector<WordEntry>& vlist, int& vsize,
+ int& total);
+ inline Vector<WordEntry>& getEnglishVocabList(void)const {
+ return Elist.getVocabList();
+ };
+ inline Vector<WordEntry>& getFrenchVocabList(void)const {
+ return Flist.getVocabList();
+ };
+ inline double getETotalWCount(void) const {
+ return eTotalWCount;
+ };
+ inline double getFTotalWCount(void) const {
+ return fTotalWCount;
+ };
+ inline int getNoEnglishWords(void) const {
+ return noEnglishWords;
+ };
+ inline int getNoFrenchWords(void) const {
+ return noFrenchWords;
+ };
+ inline tmodel<COUNT, PROB>& getTTable(void) {
+ return tTable;
+ };
+ inline string& getEFFilename(void) {
+ return efFilename;
+ };
+
////////////////////////////////////////////////////////////////
// Added by Qin Gao To Enable Parallel Training
////////////////////////////////////////////////////////////////
- CTTableDiff<COUNT,PROB>* one_step_em(int it ,bool seedModel1, Dictionary& dictionary,
- bool useDict);
-
- void recombine();
-
- void combine_one(CTTableDiff<COUNT,PROB>* cb);
-
- void save_table(const char* tname);
-
-
-
-
+ CTTableDiff<COUNT,PROB>* one_step_em(int it ,bool seedModel1, Dictionary& dictionary,
+ bool useDict);
+
+ void recombine();
+
+ void combine_one(CTTableDiff<COUNT,PROB>* cb);
+
+ void save_table(const char* tname);
+
+
+
+
////////////////////////////////////////////////////////////////
// END OF QIN GAO's CODE
////////////////////////////////////////////////////////////////
private:
- void em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool , const char*, Dictionary& dictionary, bool useDict,
- Perplexity& viterbiperp, bool=false);
- void em_loop_1(CTTableDiff<COUNT,PROB> *diff,int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool , const char*, Dictionary& dictionary, bool useDict,
- Perplexity& viterbiperp, bool=false);
- friend class model2;
- friend class hmm;
+ void em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool , const char*, Dictionary& dictionary, bool useDict,
+ Perplexity& viterbiperp, bool=false);
+ void em_loop_1(CTTableDiff<COUNT,PROB> *diff,int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool , const char*, Dictionary& dictionary, bool useDict,
+ Perplexity& viterbiperp, bool=false);
+ friend class model2;
+ friend class hmm;
public:
- void addAL(const Vector<WordIndex>& viterbi_alignment,int pair_no,int l){
- alLock.lock();
- if( pair_no<=int(ReferenceAlignment.size()) ){
- //cerr << "AL: " << viterbi_alignment << " " << pair_no << endl;
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
- if( pair_no<=NumberOfVALIalignments ){
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissingVALI,ALtoomuchVALI,ALeventsMissingVALI,ALeventsToomuchVALI,pair_no);
- }
- if( pair_no>NumberOfVALIalignments ){
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissingTEST,ALtoomuchTEST,ALeventsMissingTEST,ALeventsToomuchTEST,pair_no);
- }
- }
- alLock.unlock();
+ void addAL(const Vector<WordIndex>& viterbi_alignment,int pair_no,int l) {
+ alLock.lock();
+ if( pair_no<=int(ReferenceAlignment.size()) ) {
+ //cerr << "AL: " << viterbi_alignment << " " << pair_no << endl;
+ ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
+ if( pair_no<=NumberOfVALIalignments ) {
+ ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissingVALI,ALtoomuchVALI,ALeventsMissingVALI,ALeventsToomuchVALI,pair_no);
+ }
+ if( pair_no>NumberOfVALIalignments ) {
+ ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissingTEST,ALtoomuchTEST,ALeventsMissingTEST,ALeventsToomuchTEST,pair_no);
+ }
+ }
+ alLock.unlock();
+ }
+ void initAL() {
+ ALmissingVALI=ALtoomuchVALI=ALeventsMissingVALI=ALeventsToomuchVALI=ALmissingTEST=ALtoomuchTEST=ALeventsMissingTEST=ALeventsToomuchTEST=ALmissing=ALtoomuch=ALeventsMissing=ALeventsToomuch=0;
+ }
+ double errorsAL()const {
+ if( ALeventsMissingVALI+ALeventsToomuchVALI ) {
+ return (ALmissingVALI+ALtoomuchVALI)/double(ALeventsMissingVALI+ALeventsToomuchVALI);
+ } else {
+ return 0.0;
+ }
+ }
+ void errorReportAL(ostream&out,string m)const {
+ if( ALeventsMissing+ALeventsToomuch ) {
+ out << "alignmentErrors (" << m << "): "
+ << 100.0*(ALmissing+ALtoomuch)/double(ALeventsMissing+ALeventsToomuch)
+ << " recall: " << 100.0*(1.0-ALmissing/double(ALeventsMissing))
+ << " precision: " << 100.0*(1.0-ALtoomuch/double(ALeventsToomuch))
+ << " (missing:" << ALmissing << "/" << ALeventsMissing << " " << ALtoomuch
+ << " " << ALeventsToomuch << ")\n";
}
- void initAL(){ALmissingVALI=ALtoomuchVALI=ALeventsMissingVALI=ALeventsToomuchVALI=ALmissingTEST=ALtoomuchTEST=ALeventsMissingTEST=ALeventsToomuchTEST=ALmissing=ALtoomuch=ALeventsMissing=ALeventsToomuch=0;}
- double errorsAL()const{
- if( ALeventsMissingVALI+ALeventsToomuchVALI ){
- return (ALmissingVALI+ALtoomuchVALI)/double(ALeventsMissingVALI+ALeventsToomuchVALI);
- }else{
- return 0.0;
- }
+ if( ALeventsMissingVALI+ALeventsToomuchVALI ) {
+ out << "alignmentErrors VALI (" << m << "): "
+ << 100.0*(ALmissingVALI+ALtoomuchVALI)/double(ALeventsMissingVALI+ALeventsToomuchVALI)
+ << " recall: " << 100.0*(1.0-ALmissingVALI/double(ALeventsMissingVALI))
+ << " precision: " << 100.0*(1.0-ALtoomuchVALI/double(ALeventsToomuchVALI))
+ << " (missing:" << ALmissingVALI << "/" << ALeventsMissingVALI << " " << ALtoomuchVALI
+ << " " << ALeventsToomuchVALI << ")\n";
}
- void errorReportAL(ostream&out,string m)const{
- if( ALeventsMissing+ALeventsToomuch ){
- out << "alignmentErrors (" << m << "): "
- << 100.0*(ALmissing+ALtoomuch)/double(ALeventsMissing+ALeventsToomuch)
- << " recall: " << 100.0*(1.0-ALmissing/double(ALeventsMissing))
- << " precision: " << 100.0*(1.0-ALtoomuch/double(ALeventsToomuch))
- << " (missing:" << ALmissing << "/" << ALeventsMissing << " " << ALtoomuch
- << " " << ALeventsToomuch << ")\n";
- }
- if( ALeventsMissingVALI+ALeventsToomuchVALI ){
- out << "alignmentErrors VALI (" << m << "): "
- << 100.0*(ALmissingVALI+ALtoomuchVALI)/double(ALeventsMissingVALI+ALeventsToomuchVALI)
- << " recall: " << 100.0*(1.0-ALmissingVALI/double(ALeventsMissingVALI))
- << " precision: " << 100.0*(1.0-ALtoomuchVALI/double(ALeventsToomuchVALI))
- << " (missing:" << ALmissingVALI << "/" << ALeventsMissingVALI << " " << ALtoomuchVALI
- << " " << ALeventsToomuchVALI << ")\n";
- }
- if( ALeventsMissingTEST+ALeventsToomuchTEST ){
- out << "alignmentErrors TEST(" << m << "): "
- << 100.0*(ALmissingTEST+ALtoomuchTEST)/double(ALeventsMissingTEST+ALeventsToomuchTEST)
- << " recall: " << 100.0*(1.0-ALmissingTEST/double(ALeventsMissingTEST))
- << " precision: " << 100.0*(1.0-ALtoomuchTEST/double(ALeventsToomuchTEST))
- << " (missing:" << ALmissingTEST << "/" << ALeventsMissingTEST << " " << ALtoomuchTEST
- << " " << ALeventsToomuchTEST << ")\n";
- }
+ if( ALeventsMissingTEST+ALeventsToomuchTEST ) {
+ out << "alignmentErrors TEST(" << m << "): "
+ << 100.0*(ALmissingTEST+ALtoomuchTEST)/double(ALeventsMissingTEST+ALeventsToomuchTEST)
+ << " recall: " << 100.0*(1.0-ALmissingTEST/double(ALeventsMissingTEST))
+ << " precision: " << 100.0*(1.0-ALtoomuchTEST/double(ALeventsToomuchTEST))
+ << " (missing:" << ALmissingTEST << "/" << ALeventsMissingTEST << " " << ALtoomuchTEST
+ << " " << ALeventsToomuchTEST << ")\n";
}
+ }
};
#endif
diff --git a/mgizapp/src/model2.cpp b/mgizapp/src/model2.cpp
index dddde77..c0e6c4f 100644
--- a/mgizapp/src/model2.cpp
+++ b/mgizapp/src/model2.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -30,206 +30,210 @@ extern short NoEmptyWord;
GLOBAL_PARAMETER2(int,Model2_Dump_Freq,"MODEL 2 DUMP FREQUENCY","t2","dump frequency of Model 2",PARLEV_OUTPUT,0);
-model2::model2(model1& m,amodel<PROB>&_aTable,amodel<COUNT>&_aCountTable):
+model2::model2(model1& m,amodel<PROB>&_aTable,amodel<COUNT>&_aCountTable):
model1(m),aTable(_aTable),aCountTable(_aCountTable)
{ }
-void model2::initialize_table_uniformly(sentenceHandler& sHandler1){
- // initialize the aTable uniformly (run this before running em_with_tricks)
- int n=0;
- sentPair sent ;
- sHandler1.rewind();
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- WordIndex l = es.size() - 1;
- WordIndex m = fs.size() - 1;
- n++;
- if(1<=m&&aTable.getValue(l,m,l,m)<=PROB_SMOOTH){
- PROB uniform_val = 1.0 / (l+1) ;
- for(WordIndex j=1; j <= m; j++)
- for(WordIndex i=0; i <= l; i++)
- aTable.setValue(i,j, l, m, uniform_val);
- }
+void model2::initialize_table_uniformly(sentenceHandler& sHandler1)
+{
+ // initialize the aTable uniformly (run this before running em_with_tricks)
+ int n=0;
+ sentPair sent ;
+ sHandler1.rewind();
+ while(sHandler1.getNextSentence(sent)) {
+ Vector<WordIndex>& es = sent.eSent;
+ Vector<WordIndex>& fs = sent.fSent;
+ WordIndex l = es.size() - 1;
+ WordIndex m = fs.size() - 1;
+ n++;
+ if(1<=m&&aTable.getValue(l,m,l,m)<=PROB_SMOOTH) {
+ PROB uniform_val = 1.0 / (l+1) ;
+ for(WordIndex j=1; j <= m; j++)
+ for(WordIndex i=0; i <= l; i++)
+ aTable.setValue(i,j, l, m, uniform_val);
}
+ }
}
-int model2::em_with_tricks(int noIterations,bool dumpCount,
- const char* dumpCountName, bool useString){
- double minErrors=1.0;int minIter=0;
- string modelName="Model2",shortModelName="2";
- time_t it_st, st, it_fn, fn;
- string tfile, afile, number, alignfile, test_alignfile;
- int pair_no = 0;
- bool dump_files = false ;
- ofstream of2 ;
- st = time(NULL) ;
- sHandler1.rewind();
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st) << " iter: " << noIterations << "\n";
- for(int it=1; it <= noIterations ; it++){
- pair_no = 0;
- it_st = time(NULL) ;
- cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (Model2_Dump_Freq != 0) && ((it % Model2_Dump_Freq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- alignfile = Prefix + ".A" + shortModelName + "." + number ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- aCountTable.clear();
- initAL();
- em_loop(perp, sHandler1, dump_files, alignfile.c_str(), trainViterbiPerp, false);
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- if (testPerp && testHandler)
- em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true);
- if (dump_files&&OutputInAachenFormat==1)
- tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
-
- if(dumpCount && it == noIterations){
- string realTableName = dumpCountName;
- realTableName += ".t.count";
- tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
- string realATableName = dumpCountName;
- realATableName += ".a.count";
- aCountTable.printRealTable(realATableName.c_str());
- }
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
- << " PERPLEXITY " << (*testPerp).perplexity()
- << '\n';
- cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
- << " PERPLEXITY " << testViterbiPerp->perplexity()
- << '\n';
- if (dump_files) {
- if(OutputInAachenFormat==0)
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- aCountTable.printTable(afile.c_str());
- }
- it_fn = time(NULL) ;
- cout << modelName << " Iteration: " << it<< " took: " << difftime(it_fn, it_st) << " seconds\n";
- } // end of iterations
+int model2::em_with_tricks(int noIterations,bool dumpCount,
+ const char* dumpCountName, bool useString)
+{
+ double minErrors=1.0;
+ int minIter=0;
+ string modelName="Model2",shortModelName="2";
+ time_t it_st, st, it_fn, fn;
+ string tfile, afile, number, alignfile, test_alignfile;
+ int pair_no = 0;
+ bool dump_files = false ;
+ ofstream of2 ;
+ st = time(NULL) ;
+ sHandler1.rewind();
+ cout << "\n==========================================================\n";
+ cout << modelName << " Training Started at: " << my_ctime(&st) << " iter: " << noIterations << "\n";
+ for(int it=1; it <= noIterations ; it++) {
+ pair_no = 0;
+ it_st = time(NULL) ;
+ cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
+ dump_files = (Model2_Dump_Freq != 0) && ((it % Model2_Dump_Freq) == 0) && !NODUMPS;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ afile = Prefix + ".a" + shortModelName + "." + number ;
+ alignfile = Prefix + ".A" + shortModelName + "." + number ;
+ test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
aCountTable.clear();
- fn = time(NULL) ;
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- // cout << "tTable contains " << tTable.getHash().bucket_count()
- // << " buckets and " << tTable.getHash().size() << " entries." ;
- cout << "==========================================================\n";
- return minIter;
+ initAL();
+ em_loop(perp, sHandler1, dump_files, alignfile.c_str(), trainViterbiPerp, false);
+ if( errorsAL()<minErrors ) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ if (testPerp && testHandler)
+ em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true);
+ if (dump_files&&OutputInAachenFormat==1)
+ tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
+
+ if(dumpCount && it == noIterations) {
+ string realTableName = dumpCountName;
+ realTableName += ".t.count";
+ tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
+ string realATableName = dumpCountName;
+ realATableName += ".a.count";
+ aCountTable.printRealTable(realATableName.c_str());
+ }
+ tTable.normalizeTable(Elist, Flist);
+ aCountTable.normalize(aTable);
+ cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
+ << " PERPLEXITY " << perp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
+ << " PERPLEXITY " << (*testPerp).perplexity()
+ << '\n';
+ cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
+ << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
+ << " PERPLEXITY " << testViterbiPerp->perplexity()
+ << '\n';
+ if (dump_files) {
+ if(OutputInAachenFormat==0)
+ tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
+ aCountTable.printTable(afile.c_str());
+ }
+ it_fn = time(NULL) ;
+ cout << modelName << " Iteration: " << it<< " took: " << difftime(it_fn, it_st) << " seconds\n";
+ } // end of iterations
+ aCountTable.clear();
+ fn = time(NULL) ;
+ cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
+ // cout << "tTable contains " << tTable.getHash().bucket_count()
+ // << " buckets and " << tTable.getHash().size() << " entries." ;
+ cout << "==========================================================\n";
+ return minIter;
}
-void model2::load_table(const char* aname){
+void model2::load_table(const char* aname)
+{
/* This function loads the a table from the given file; use it
when you want to load results from previous a training without
doing any new training.
NAS, 7/11/99
*/
- cout << "Model2: loading a table \n";
- aTable.readTable(aname);
+ cout << "Model2: loading a table \n";
+ aTable.readTable(aname);
}
-void model2::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test)
+void model2::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
+ bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
+ bool test)
{
- massert( aTable.is_distortion==0 );
- massert( aCountTable.is_distortion==0 );
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
-
- vector<double> ferts(evlist.size());
-
- sHandler1.rewind();
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
- double viterbi_score = 1;
- for(j=1; j <= m; j++){
- Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table
- // entries that map fs to all possible ei in this sentence.
- PROB denom = 0.0;
- PROB e = 0.0, word_best_score = 0;
- WordIndex best_i = 0 ; // i for which fj is best maped to ei
- for(i=0; i <= l; i++){
- sPtrCache[i] = tTable.getPtr(es[i], fs[j]) ;
- if (sPtrCache[i] != 0 &&(*(sPtrCache[i])).prob > PROB_SMOOTH )
- e = (*(sPtrCache[i])).prob * aTable.getValue(i,j, l, m) ;
- else e = PROB_SMOOTH * aTable.getValue(i,j, l, m);
- denom += e ;
- if (e > word_best_score){
- word_best_score = e ;
- best_i = i ;
- }
- }
- viterbi_alignment[j] = best_i ;
- viterbi_score *= word_best_score; ///denom ;
- cross_entropy += log(denom) ;
- if (denom == 0){
- if (test)
- cerr << "WARNING: denom is zero (TEST)\n";
- else
- cerr << "WARNING: denom is zero (TRAIN)\n";
- }
- if (!test){
- if(denom > 0){
- COUNT val = COUNT(so) / (COUNT) double(denom) ;
- for( i=0; i <= l; i++){
- PROB e(0.0);
- if (sPtrCache[i] != 0 && (*(sPtrCache[i])).prob > PROB_SMOOTH)
- e = (*(sPtrCache[i])).prob ;
- else e = PROB_SMOOTH ;
- e *= aTable.getValue(i,j, l, m);
- COUNT temp = COUNT(e) * val ;
- if( NoEmptyWord==0 || i!=0 )
- if (sPtrCache[i] != 0)
- (*(sPtrCache[i])).count += temp ;
- else
- tTable.incCount(es[i], fs[j], temp);
- aCountTable.addValue(i,j, l, m,temp) ;
- } /* end of for i */
- } // end of if (denom > 0)
- }// if (!test)
- } // end of for (j) ;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score), so, l, m,1);
- if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
- addAL(viterbi_alignment,sent.sentenceNo,l);
- pair_no++;
- } /* of while */
- sHandler1.rewind();
- perp.record("Model2");
- viterbi_perp.record("Model2");
- errorReportAL(cout,"IBM-2");
+ massert( aTable.is_distortion==0 );
+ massert( aCountTable.is_distortion==0 );
+ WordIndex i, j, l, m ;
+ double cross_entropy;
+ int pair_no=0 ;
+ perp.clear();
+ viterbi_perp.clear();
+ ofstream of2;
+ // for each sentence pair in the corpus
+ if (dump_alignment||FEWDUMPS )
+ of2.open(alignfile);
+ sentPair sent ;
+
+ vector<double> ferts(evlist.size());
+
+ sHandler1.rewind();
+ while(sHandler1.getNextSentence(sent)) {
+ Vector<WordIndex>& es = sent.eSent;
+ Vector<WordIndex>& fs = sent.fSent;
+ const float so = sent.getCount();
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ cross_entropy = log(1.0);
+ Vector<WordIndex> viterbi_alignment(fs.size());
+ double viterbi_score = 1;
+ for(j=1; j <= m; j++) {
+ Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table
+ // entries that map fs to all possible ei in this sentence.
+ PROB denom = 0.0;
+ PROB e = 0.0, word_best_score = 0;
+ WordIndex best_i = 0 ; // i for which fj is best maped to ei
+ for(i=0; i <= l; i++) {
+ sPtrCache[i] = tTable.getPtr(es[i], fs[j]) ;
+ if (sPtrCache[i] != 0 &&(*(sPtrCache[i])).prob > PROB_SMOOTH )
+ e = (*(sPtrCache[i])).prob * aTable.getValue(i,j, l, m) ;
+ else e = PROB_SMOOTH * aTable.getValue(i,j, l, m);
+ denom += e ;
+ if (e > word_best_score) {
+ word_best_score = e ;
+ best_i = i ;
+ }
+ }
+ viterbi_alignment[j] = best_i ;
+ viterbi_score *= word_best_score; ///denom ;
+ cross_entropy += log(denom) ;
+ if (denom == 0) {
+ if (test)
+ cerr << "WARNING: denom is zero (TEST)\n";
+ else
+ cerr << "WARNING: denom is zero (TRAIN)\n";
+ }
+ if (!test) {
+ if(denom > 0) {
+ COUNT val = COUNT(so) / (COUNT) double(denom) ;
+ for( i=0; i <= l; i++) {
+ PROB e(0.0);
+ if (sPtrCache[i] != 0 && (*(sPtrCache[i])).prob > PROB_SMOOTH)
+ e = (*(sPtrCache[i])).prob ;
+ else e = PROB_SMOOTH ;
+ e *= aTable.getValue(i,j, l, m);
+ COUNT temp = COUNT(e) * val ;
+ if( NoEmptyWord==0 || i!=0 )
+ if (sPtrCache[i] != 0)
+ (*(sPtrCache[i])).count += temp ;
+ else
+ tTable.incCount(es[i], fs[j], temp);
+ aCountTable.addValue(i,j, l, m,temp) ;
+ } /* end of for i */
+ } // end of if (denom > 0)
+ }// if (!test)
+ } // end of for (j) ;
+ sHandler1.setProbOfSentence(sent,cross_entropy);
+ perp.addFactor(cross_entropy, so, l, m,1);
+ viterbi_perp.addFactor(log(viterbi_score), so, l, m,1);
+ if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000) )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
+ addAL(viterbi_alignment,sent.sentenceNo,l);
+ pair_no++;
+ } /* of while */
+ sHandler1.rewind();
+ perp.record("Model2");
+ viterbi_perp.record("Model2");
+ errorReportAL(cout,"IBM-2");
}
diff --git a/mgizapp/src/model2.h b/mgizapp/src/model2.h
index d379f22..0533ca3 100644
--- a/mgizapp/src/model2.h
+++ b/mgizapp/src/model2.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -23,7 +23,7 @@ USA.
#define _model2_h 1
#include <cassert>
-
+
#include <iostream>
#include <algorithm>
#include <functional>
@@ -44,27 +44,32 @@ using __gnu_cxx::hash_map;
#include <ctime>
#include "TTables.h"
-#include "ATables.h"
+#include "ATables.h"
#include "getSentence.h"
#include "defs.h"
#include "model1.h"
#include "Perplexity.h"
#include "vocab.h"
-class model2 : public model1{
+class model2 : public model1
+{
public:
- amodel<PROB>&aTable;
- amodel<COUNT>&aCountTable;
+ amodel<PROB>&aTable;
+ amodel<COUNT>&aCountTable;
public:
- model2(model1& m1,amodel<PROB>&,amodel<COUNT>&);
- void initialize_table_uniformly(sentenceHandler&);
- int em_with_tricks(int iterations,bool dumpCount = false,
- const char* dumpCountName = NULL, bool useString = false);
- void load_table(const char* aname);
- inline amodel<PROB>& getATable(void) {return aTable;};
- inline amodel<COUNT>& getACountTable(void) {return aCountTable;};
- void em_loop(Perplexity& perp,sentenceHandler& sHandler1, bool dump_files,const char* alignfile, Perplexity&, bool test);
- friend class model3;
+ model2(model1& m1,amodel<PROB>&,amodel<COUNT>&);
+ void initialize_table_uniformly(sentenceHandler&);
+ int em_with_tricks(int iterations,bool dumpCount = false,
+ const char* dumpCountName = NULL, bool useString = false);
+ void load_table(const char* aname);
+ inline amodel<PROB>& getATable(void) {
+ return aTable;
+ };
+ inline amodel<COUNT>& getACountTable(void) {
+ return aCountTable;
+ };
+ void em_loop(Perplexity& perp,sentenceHandler& sHandler1, bool dump_files,const char* alignfile, Perplexity&, bool test);
+ friend class model3;
};
#endif
diff --git a/mgizapp/src/model2to3.cpp b/mgizapp/src/model2to3.cpp
index 4c6d729..72f1c97 100644
--- a/mgizapp/src/model2to3.cpp
+++ b/mgizapp/src/model2to3.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -37,75 +37,72 @@ double get_sum_of_partitions(int n, int source_pos, double alpha[_MAX_FERTILITY]
int part[_MAX_FERTILITY], mult[_MAX_FERTILITY];
done = false ;
- init = true ;
- for (i = 0 ; i < _MAX_FERTILITY ; i++){
+ init = true ;
+ for (i = 0 ; i < _MAX_FERTILITY ; i++) {
part[i] = mult[i] = 0 ;
}
-
+
//printf("Entering get sum of partitions\n");
- while(! done){
+ while(! done) {
total_partitions_considered++;
- if (init){
+ if (init) {
part[1] = n ;
mult[1] = 1 ;
- num_parts = 1 ;
+ num_parts = 1 ;
init = false ;
- }
- else {
- if ((part[num_parts] > 1) || (num_parts > 1)){
- if (part[num_parts] == 1){
- s = part[num_parts-1] + mult[num_parts];
- k = num_parts - 1;
- }
- else {
- s = part[num_parts];
- k = num_parts ;
- }
- w = part[k] - 1 ;
- u = s / w ;
- v = s % w ;
- mult[k] -= 1 ;
- if (mult[k] == 0)
- k1 = k ;
- else k1 = k + 1 ;
- mult[k1] = u ;
- part[k1] = w ;
- if (v == 0){
- num_parts = k1 ;
- }
- else {
- mult[k1+1] = 1 ;
- part[k1+1] = v ;
- num_parts = k1 + 1;
- }
- } /* of if num_parts > 1 || part[num_parts] > 1 */
+ } else {
+ if ((part[num_parts] > 1) || (num_parts > 1)) {
+ if (part[num_parts] == 1) {
+ s = part[num_parts-1] + mult[num_parts];
+ k = num_parts - 1;
+ } else {
+ s = part[num_parts];
+ k = num_parts ;
+ }
+ w = part[k] - 1 ;
+ u = s / w ;
+ v = s % w ;
+ mult[k] -= 1 ;
+ if (mult[k] == 0)
+ k1 = k ;
+ else k1 = k + 1 ;
+ mult[k1] = u ;
+ part[k1] = w ;
+ if (v == 0) {
+ num_parts = k1 ;
+ } else {
+ mult[k1+1] = 1 ;
+ part[k1+1] = v ;
+ num_parts = k1 + 1;
+ }
+ } /* of if num_parts > 1 || part[num_parts] > 1 */
else {
- done = true ;
+ done = true ;
}
}
/* of else of if(init) */
- if (!done){
+ if (!done) {
prod = 1.0 ;
if (n != 0)
- for (i = 1 ; i <= num_parts ; i++){
- prod *= pow(alpha[part[i]][source_pos], mult[i]) / factorial(mult[i]) ;
- }
+ for (i = 1 ; i <= num_parts ; i++) {
+ prod *= pow(alpha[part[i]][source_pos], mult[i]) / factorial(mult[i]) ;
+ }
sum += prod ;
}
- } /* of while */
+ } /* of while */
if (sum < 0) sum = 0 ;
return(sum) ;
}
-void model3::estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp, Perplexity& trainVPerp,
- bool simple, bool dump_files,bool updateT)
+void model3::estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp, Perplexity& trainVPerp,
+ bool simple, bool dump_files,bool updateT)
{
string tfile, nfile, dfile, p0file, afile, alignfile;
WordIndex i, j, l, m, max_fertility_here, k ;
PROB val, temp_mult[MAX_SENTENCE_LENGTH_ALLOWED][MAX_SENTENCE_LENGTH_ALLOWED];
double cross_entropy;
- double beta, sum,
- alpha[_MAX_FERTILITY][MAX_SENTENCE_LENGTH_ALLOWED];
+ double beta, sum,
+ alpha[_MAX_FERTILITY][MAX_SENTENCE_LENGTH_ALLOWED];
double total, temp, r ;
dCountTable.clear();
@@ -117,109 +114,107 @@ void model3::estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp, Perple
perp.clear() ;
trainVPerp.clear() ;
ofstream of2;
- if (dump_files){
+ if (dump_files) {
alignfile = Prefix +".A2to3";
of2.open(alignfile.c_str());
}
if (simple) cerr <<"Using simple estimation for fertilties\n";
sHandler1.rewind() ;
sentPair sent ;
- while(sHandler1.getNextSentence(sent)){
+ while(sHandler1.getNextSentence(sent)) {
Vector<WordIndex>& es = sent.eSent;
Vector<WordIndex>& fs = sent.fSent;
const float count = sent.getCount();
Vector<WordIndex> viterbi_alignment(fs.size());
l = es.size() - 1;
m = fs.size() - 1;
- cross_entropy = log(1.0);
+ cross_entropy = log(1.0);
double viterbi_score = 1 ;
PROB word_best_score ; // score for the best mapping of fj
- for(j = 1 ; j <= m ; j++){
- word_best_score = 0 ; // score for the best mapping of fj
+ for(j = 1 ; j <= m ; j++) {
+ word_best_score = 0 ; // score for the best mapping of fj
Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0);
total = 0 ;
WordIndex best_i = 0 ;
- for(i = 0; i <= l ; i++){
- sPtrCache[i] = tTable.getPtr(es[i], fs[j]) ;
- if (sPtrCache[i] != 0 && (*(sPtrCache[i])).prob > PROB_SMOOTH) // if valid pointer
- temp_mult[i][j]= (*(sPtrCache[i])).prob * aTable.getValue(i, j, l, m) ;
- else
- temp_mult[i][j] = PROB_SMOOTH * aTable.getValue(i, j, l, m) ;
- total += temp_mult[i][j] ;
- if (temp_mult[i][j] > word_best_score){
- word_best_score = temp_mult[i][j] ;
- best_i = i ;
- }
- } // end of for (i)
+ for(i = 0; i <= l ; i++) {
+ sPtrCache[i] = tTable.getPtr(es[i], fs[j]) ;
+ if (sPtrCache[i] != 0 && (*(sPtrCache[i])).prob > PROB_SMOOTH) // if valid pointer
+ temp_mult[i][j]= (*(sPtrCache[i])).prob * aTable.getValue(i, j, l, m) ;
+ else
+ temp_mult[i][j] = PROB_SMOOTH * aTable.getValue(i, j, l, m) ;
+ total += temp_mult[i][j] ;
+ if (temp_mult[i][j] > word_best_score) {
+ word_best_score = temp_mult[i][j] ;
+ best_i = i ;
+ }
+ } // end of for (i)
viterbi_alignment[j] = best_i ;
viterbi_score *= word_best_score ; /// total ;
- cross_entropy += log(total) ;
- if (total == 0){
- cerr << "WARNING: total is zero (TRAIN)\n";
- viterbi_score = 0 ;
+ cross_entropy += log(total) ;
+ if (total == 0) {
+ cerr << "WARNING: total is zero (TRAIN)\n";
+ viterbi_score = 0 ;
}
- if (total > 0){
- for(i = 0; i <= l ; i++){
- temp_mult[i][j] /= total ;
- if (temp_mult[i][j] == 1) // smooth to prevent underflow
- temp_mult[i][j] = 0.99 ;
- else if (temp_mult[i][j] == 0)
- temp_mult[i][j] = PROB_SMOOTH ;
- val = temp_mult[i][j] * PROB(count) ;
- if ( val > PROB_SMOOTH) {
- if( updateT )
- {
- if (sPtrCache[i] != 0)
- (*(sPtrCache[i])).count += val ;
- else
- tTable.incCount(es[i], fs[j], val);
- }
- aCountTable.addValue(i, j, l, m,val);
- if (0 != i)
- dCountTable.addValue(j, i, l, m,val);
- }
- } // for (i = ..)
+ if (total > 0) {
+ for(i = 0; i <= l ; i++) {
+ temp_mult[i][j] /= total ;
+ if (temp_mult[i][j] == 1) // smooth to prevent underflow
+ temp_mult[i][j] = 0.99 ;
+ else if (temp_mult[i][j] == 0)
+ temp_mult[i][j] = PROB_SMOOTH ;
+ val = temp_mult[i][j] * PROB(count) ;
+ if ( val > PROB_SMOOTH) {
+ if( updateT ) {
+ if (sPtrCache[i] != 0)
+ (*(sPtrCache[i])).count += val ;
+ else
+ tTable.incCount(es[i], fs[j], val);
+ }
+ aCountTable.addValue(i, j, l, m,val);
+ if (0 != i)
+ dCountTable.addValue(j, i, l, m,val);
+ }
+ } // for (i = ..)
} // for (if total ...)
} // end of for (j ...)
if (dump_files)
printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
addAL(viterbi_alignment,sent.sentenceNo,l);
- if (!simple){
+ if (!simple) {
max_fertility_here = min(WordIndex(m+1), MAX_FERTILITY);
- for (i = 1; i <= l ; i++) {
- for ( k = 1; k < max_fertility_here; k++){
- beta = 0 ;
- alpha[k][i] = 0 ;
- for (j = 1 ; j <= m ; j++){
- temp = temp_mult[i][j];
- if (temp > 0.95) temp = 0.95; // smooth to prevent under/over flow
- else if (temp < 0.05) temp = 0.05;
- beta += pow(temp/(1.0-temp), (double) k);
- }
- alpha[k][i] = beta * pow((double) -1, (double) (k+1)) / (double) k ;
- }
+ for (i = 1; i <= l ; i++) {
+ for ( k = 1; k < max_fertility_here; k++) {
+ beta = 0 ;
+ alpha[k][i] = 0 ;
+ for (j = 1 ; j <= m ; j++) {
+ temp = temp_mult[i][j];
+ if (temp > 0.95) temp = 0.95; // smooth to prevent under/over flow
+ else if (temp < 0.05) temp = 0.05;
+ beta += pow(temp/(1.0-temp), (double) k);
+ }
+ alpha[k][i] = beta * pow((double) -1, (double) (k+1)) / (double) k ;
+ }
}
- for (i = 1; i <= l ; i++){
- r = 1;
- for (j = 1 ; j <= m ; j++)
- r *= (1 - temp_mult[i][j]);
- for (k = 0 ; k < max_fertility_here ; k++){
- sum = get_sum_of_partitions(k, i, alpha);
- temp = r * sum * count;
- nCountTable.addValue(es[i], k,temp);
- } // end of for (k ..)
+ for (i = 1; i <= l ; i++) {
+ r = 1;
+ for (j = 1 ; j <= m ; j++)
+ r *= (1 - temp_mult[i][j]);
+ for (k = 0 ; k < max_fertility_here ; k++) {
+ sum = get_sum_of_partitions(k, i, alpha);
+ temp = r * sum * count;
+ nCountTable.addValue(es[i], k,temp);
+ } // end of for (k ..)
} // end of for (i == ..)
} // end of if (!simple)
perp.addFactor(cross_entropy, count, l, m,1);
trainVPerp.addFactor(log(viterbi_score), count, l, m,1);
- } // end of while
+ } // end of while
sHandler1.rewind();
cerr << "Normalizing t, a, d, n count tables now ... " ;
- if( dump_files && OutputInAachenFormat==1 )
- {
- tfile = Prefix + ".t2to3" ;
- tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
- }
+ if( dump_files && OutputInAachenFormat==1 ) {
+ tfile = Prefix + ".t2to3" ;
+ tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
+ }
if( updateT )
tTable.normalizeTable(Elist, Flist);
aCountTable.normalize(aTable);
@@ -227,32 +222,32 @@ void model3::estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp, Perple
if (!simple)
nCountTable.normalize(nTable,&Elist.getVocabList());
else {
- for (i = 0 ; i< Elist.uniqTokens() ; i++){
- if (0 < MAX_FERTILITY){
- nTable.addValue(i,0,PROB(0.2));
- if (1 < MAX_FERTILITY){
- nTable.addValue(i,1,PROB(0.65));
- if (2 < MAX_FERTILITY){
- nTable.addValue(i,2,PROB(0.1));
- if (3 < MAX_FERTILITY)
- nTable.addValue(i,3,PROB(0.04));
- PROB val = 0.01/(MAX_FERTILITY-4);
- for (k = 4 ; k < MAX_FERTILITY ; k++)
- nTable.addValue(i, k,val);
- }
- }
+ for (i = 0 ; i< Elist.uniqTokens() ; i++) {
+ if (0 < MAX_FERTILITY) {
+ nTable.addValue(i,0,PROB(0.2));
+ if (1 < MAX_FERTILITY) {
+ nTable.addValue(i,1,PROB(0.65));
+ if (2 < MAX_FERTILITY) {
+ nTable.addValue(i,2,PROB(0.1));
+ if (3 < MAX_FERTILITY)
+ nTable.addValue(i,3,PROB(0.04));
+ PROB val = 0.01/(MAX_FERTILITY-4);
+ for (k = 4 ; k < MAX_FERTILITY ; k++)
+ nTable.addValue(i, k,val);
+ }
+ }
}
}
- } // end of else (!simple)
+ } // end of else (!simple)
p0 = 0.95;
p1 = 0.05;
- if (dump_files){
+ if (dump_files) {
tfile = Prefix + ".t2to3" ;
afile = Prefix + ".a2to3" ;
nfile = Prefix + ".n2to3" ;
dfile = Prefix + ".d2to3" ;
p0file = Prefix + ".p0_2to3" ;
-
+
if( OutputInAachenFormat==0 )
tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
aTable.printTable(afile.c_str());
@@ -263,43 +258,40 @@ void model3::estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp, Perple
of.close();
}
errorReportAL(cerr,"IBM-2");
- if(simple)
- {
- perp.record("T2To3");
- trainVPerp.record("T2To3");
- }
- else
- {
- perp.record("ST2To3");
- trainVPerp.record("ST2To3");
- }
+ if(simple) {
+ perp.record("T2To3");
+ trainVPerp.record("T2To3");
+ } else {
+ perp.record("ST2To3");
+ trainVPerp.record("ST2To3");
+ }
}
-void model3::transferSimple(/*model1& m1, model2& m2, */ sentenceHandler& sHandler1,
- bool dump_files, Perplexity& perp, Perplexity& trainVPerp,bool updateT)
+void model3::transferSimple(/*model1& m1, model2& m2, */ sentenceHandler& sHandler1,
+ bool dump_files, Perplexity& perp, Perplexity& trainVPerp,bool updateT)
{
- /*
+ /*
This function performs simple Model 2 -> Model 3 transfer.
It sets values for n and p without considering Model 2's ideas.
It sets d values based on a.
*/
time_t st, fn;
// just inherit these from the previous models, to avoid data duplication
-
+
st = time(NULL);
cerr << "==========================================================\n";
- cerr << "\nTransfer started at: "<< my_ctime(&st) << '\n';
-
+ cerr << "\nTransfer started at: "<< my_ctime(&st) << '\n';
+
cerr << "Simple tranfer of Model2 --> Model3 (i.e. estimating initial parameters of Model3 from Model2 tables)\n";
-
+
estimate_t_a_d(sHandler1, perp, trainVPerp, true, dump_files,updateT) ;
fn = time(NULL) ;
cerr << "\nTransfer: TRAIN CROSS-ENTROPY " << perp.cross_entropy()
<< " PERPLEXITY " << perp.perplexity() << '\n';
cerr << "\nTransfer took: " << difftime(fn, st) << " seconds\n";
- cerr << "\nTransfer Finished at: "<< my_ctime(&fn) << '\n';
+ cerr << "\nTransfer Finished at: "<< my_ctime(&fn) << '\n';
cerr << "==========================================================\n";
-
+
}
@@ -309,50 +301,50 @@ void model3::transfer(sentenceHandler& sHandler1,bool dump_files, Perplexity& pe
transferSimple(sHandler1,dump_files,perp, trainVPerp,updateT);
{
time_t st, fn ;
-
+
st = time(NULL);
cerr << "==========================================================\n";
- cerr << "\nTransfer started at: "<< my_ctime(&st) << '\n';
+ cerr << "\nTransfer started at: "<< my_ctime(&st) << '\n';
cerr << "Transfering Model2 --> Model3 (i.e. estimating initial parameters of Model3 from Model2 tables)\n";
-
+
p1_count = p0_count = 0 ;
-
+
estimate_t_a_d(sHandler1, perp, trainVPerp, false, dump_files,updateT);
-
-
-
+
+
+
/* Below is a made-up stab at transferring t & a probs to p0/p1.
(Method not documented in IBM paper).
It seems to give p0 = .96, which may be right for Model 2, or may not.
I'm commenting it out for now and hardwiring p0 = .90 as above. -Kevin
-
+
// compute p0, p1 counts
Vector<LogProb> nm(Elist.uniqTokens(),0.0);
-
+
for(i=0; i < Elist.uniqTokens(); i++){
for(k=1; k < MAX_FERTILITY; k++){
nm[i] += nTable.getValue(i, k) * (LogProb) k;
}
}
-
+
LogProb mprime;
// sentenceHandler sHandler1(efFilename.c_str());
// sentPair sent ;
-
+
while(sHandler1.getNextSentence(sent)){
Vector<WordIndex>& es = sent.eSent;
Vector<WordIndex>& fs = sent.fSent;
const float count = sent.noOccurrences;
-
+
l = es.size() - 1;
m = fs.size() - 1;
mprime = 0 ;
for (i = 1; i <= l ; i++){
mprime += nm[es[i]] ;
}
- mprime = LogProb((int((double) mprime + 0.5))); // round mprime to nearest integer
+ mprime = LogProb((int((double) mprime + 0.5))); // round mprime to nearest integer
if ((mprime < m) && (2 * mprime >= m)) {
- // cerr << "updating both p0_count and p1_count, mprime: " << mprime <<
+ // cerr << "updating both p0_count and p1_count, mprime: " << mprime <<
// "m = " << m << "\n";
p1_count += (m - (double) mprime) * count ;
p0_count += (2 * (double) mprime - m) * count ;
@@ -360,15 +352,15 @@ void model3::transfer(sentenceHandler& sHandler1,bool dump_files, Perplexity& pe
}
else {
// p1_count += 0 ;
- // cerr << "updating only p0_count, mprime: " << mprime <<
+ // cerr << "updating only p0_count, mprime: " << mprime <<
// "m = " << m << "\n";
p0_count += double(m * count) ;
// cerr << "p0_count = "<<p0_count << " , p1_count = " << p1_count << endl ;
}
}
-
- // normalize p1, p0
-
+
+ // normalize p1, p0
+
cerr << "p0_count = "<<p0_count << " , p1_count = " << p1_count << endl ;
p1 = p1_count / (p1_count + p0_count ) ;
p0 = 1 - p1;
@@ -383,16 +375,16 @@ void model3::transfer(sentenceHandler& sHandler1,bool dump_files, Perplexity& pe
p0 = p0 - (LogProb) SMOOTH_THRESHOLD ;
}
*/
-
+
fn = time(NULL) ;
cerr << "\nTransfer: TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- // cerr << "tTable contains " << tTable.getHash().bucket_count()
+ << " PERPLEXITY " << perp.perplexity() << '\n';
+ // cerr << "tTable contains " << tTable.getHash().bucket_count()
// << " buckets and " << tTable.getHash().size() << " entries." ;
cerr << "\nTransfer took: " << difftime(fn, st) << " seconds\n";
- cerr << "\nTransfer Finished at: "<< my_ctime(&fn) << endl;
+ cerr << "\nTransfer Finished at: "<< my_ctime(&fn) << endl;
cerr << "==========================================================\n";
-
+
}
}
diff --git a/mgizapp/src/model3.cpp b/mgizapp/src/model3.cpp
index f98e569..39d5514 100644
--- a/mgizapp/src/model3.cpp
+++ b/mgizapp/src/model3.cpp
@@ -8,14 +8,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -45,164 +45,170 @@ GLOBAL_PARAMETER4(int,Model3_Dump_Freq,"MODEL 345 DUMP FREQUENCY","MODEL 3 DUMP
extern int Transfer_Dump_Freq;
model3::model3(model2& m2, amodel<PROB>& d, nmodel<PROB>& n) :
- model2(m2), dTable(d), dCountTable(true), nTable(n),//m2.getNoEnglishWords()+1, MAX_FERTILITY),
- nCountTable(m2.getNoEnglishWords()+1, MAX_FERTILITY), h(0) {
- ewordclasses = fwordclasses = NULL;
+ model2(m2), dTable(d), dCountTable(true), nTable(n),//m2.getNoEnglishWords()+1, MAX_FERTILITY),
+ nCountTable(m2.getNoEnglishWords()+1, MAX_FERTILITY), h(0)
+{
+ ewordclasses = fwordclasses = NULL;
}
model3::model3(model3& m3, amodel<PROB>& d, nmodel<PROB>& n, amodel<COUNT>& a) :
- model2(*(&m3), m3.aTable, a), dTable(d), dCountTable(true), nTable(n),//m2.getNoEnglishWords()+1, MAX_FERTILITY),
- nCountTable(m3.getNoEnglishWords()+1, MAX_FERTILITY), h(0) {
- ewordclasses = fwordclasses = NULL;
+ model2(*(&m3), m3.aTable, a), dTable(d), dCountTable(true), nTable(n),//m2.getNoEnglishWords()+1, MAX_FERTILITY),
+ nCountTable(m3.getNoEnglishWords()+1, MAX_FERTILITY), h(0)
+{
+ ewordclasses = fwordclasses = NULL;
}
void model3::load_tables(const char *nfile, const char *dfile,
- const char *p0file) {
- cout << "Model3: loading n, d, p0 tables \n";
-
- nTable.readNTable(nfile);
- dTable.readTable(dfile);
- ifstream inf(p0file);
- if ( !inf)
- cerr << "Can not open: " << p0file << '\n';
- else {
- cout << "Reading p0 value from " << p0file << "\n";
- inf >> p0;
- inf.close();
- p1 = 1 - p0;
- }
- cout << "p0 is: " << p0 << " p1:" << p1 << '\n';
+ const char *p0file)
+{
+ cout << "Model3: loading n, d, p0 tables \n";
+
+ nTable.readNTable(nfile);
+ dTable.readTable(dfile);
+ ifstream inf(p0file);
+ if ( !inf)
+ cerr << "Can not open: " << p0file << '\n';
+ else {
+ cout << "Reading p0 value from " << p0file << "\n";
+ inf >> p0;
+ inf.close();
+ p1 = 1 - p0;
+ }
+ cout << "p0 is: " << p0 << " p1:" << p1 << '\n';
}
-model3::~model3() {
- dTable.clear();
- dCountTable.clear();
- nTable.clear();
- nCountTable.clear();
- if(h==NULL && ewordclasses!=NULL && fwordclasses!=NULL){
- delete ewordclasses;
- delete fwordclasses;
- }
+model3::~model3()
+{
+ dTable.clear();
+ dCountTable.clear();
+ nTable.clear();
+ nCountTable.clear();
+ if(h==NULL && ewordclasses!=NULL && fwordclasses!=NULL) {
+ delete ewordclasses;
+ delete fwordclasses;
+ }
}
-void model3::em(int noIterations, sentenceHandler& sHandler1) {
-
- LogProb all_prob, aprob, temp;
- WordIndex i, j, l, m;
- time_t it_st, st, it_fn, fn;
- string tfile, dfile, nfile, p0file, afile, number;
-
- st = time(NULL) ;
- cout << "\n" << "Starting Model3: Training";
- // sentenceHandler sHandler1(efFilename.c_str());
- sHandler1.rewind();
- for (int it=1; it <= noIterations; it++) {
- it_st = time(NULL) ;
- cout << "\n" << "Model3: Iteration " << it;
-
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- tfile = Prefix + ".t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- dfile = Prefix + ".d3." + number;
- p0file = Prefix + ".p0_3." + number;
- // tCountTable.clear();
- dCountTable.clear();
- nCountTable.clear();
- p0_count = 0.0;
- p1_count = 0.0;
- all_prob = 0;
- sentPair sent;
- while (sHandler1.getNextSentence(sent)) {
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 1000) == 0)
- cout <<sent.sentenceNo << '\n';
- Vector<WordIndex> A(fs.size(),/*-1*/0);
- Vector<WordIndex> Fert(es.size(),0);
- LogProb lcount=(LogProb)count;
- l = es.size()-1;
- m = fs.size()-1;
- WordIndex x, y;
- all_prob = prob_of_target_given_source(tTable, fs, es);
- if (all_prob == 0)
- cout << "\n" <<"all_prob = 0";
-
- for (x = 0; x < pow(l+1.0, double(m)) ; x++) { // For all possible alignmets A
- y = x;
- for (j = 1; j <= m; j++) {
- A[j] = y % (l+1);
- y /= (l+1);
- }
- for (i = 0; i <= l; i++)
- Fert[i] = 0;
- for (j = 1; j <= m; j++)
- Fert[A[j]]++;
- if (2 * Fert[0] <= m) { /* consider alignments that has Fert[0] less than
- half the number of words in French sentence */
- aprob = prob_of_target_and_alignment_given_source(A, Fert,
- tTable, fs, es);
- temp = aprob/all_prob;
- LogProb templcount = temp*lcount;
-
- for (j = 1; j <= m; j++) {
- tTable.incCount(es[A[j]], fs[j], templcount);
- if (0 != A[j])
- dCountTable.addValue(j, A[j], l, m, templcount);
- }
- for (i = 0; i <= l; i++) {
- nCountTable.addValue(es[i], Fert[i], templcount);
- //cout << "AFTER INC2: " << templcount << " " << nCountTable.getRef(es[i], Fert[i]) << '\n';
- }
- p1_count += double(temp) * (Fert[0] * count);
- p0_count += double(temp) * ((m - 2 * Fert[0]) * count);
- }
- } /* of looping over all alignments */
- } /* of sentence pair E, F */
- sHandler1.rewind();
-
- // normalize tables
- if (OutputInAachenFormat==1)
- tTable.printCountTable(tfile.c_str(), Elist.getVocabList(),
- Flist.getVocabList(), 1);
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- dCountTable.normalize(dTable);
- nCountTable.normalize(nTable, &Elist.getVocabList());
-
- // normalize p1 & p0
-
- if (p1_count + p0_count != 0) {
- p1 = p1_count / (p1_count + p0_count );
- p0 = 1 - p1;
- } else {
- p1 = p0 = 0;
- }
- // print tables
- if (OutputInAachenFormat==0)
- tTable.printProbTable(tfile.c_str(), Elist.getVocabList(),
- Flist.getVocabList(), OutputInAachenFormat);
- dTable.printTable(dfile.c_str());
- nTable.printNTable(Elist.uniqTokens(), nfile.c_str(),
- Elist.getVocabList(), OutputInAachenFormat);
- ofstream of(p0file.c_str());
- of << p0;
- of.close();
- it_fn = time(NULL) ;
- cout << "\n" << "Model3 Iteration "<<it<<" took: " << difftime(it_fn,
- it_st) << " seconds\n";
-
- } /* of iterations */
- fn = time(NULL) ;
- cout << "\n" << "Entire Model3 Training took: " << difftime(fn, st)
- << " seconds\n";
+void model3::em(int noIterations, sentenceHandler& sHandler1)
+{
+
+ LogProb all_prob, aprob, temp;
+ WordIndex i, j, l, m;
+ time_t it_st, st, it_fn, fn;
+ string tfile, dfile, nfile, p0file, afile, number;
+
+ st = time(NULL) ;
+ cout << "\n" << "Starting Model3: Training";
+ // sentenceHandler sHandler1(efFilename.c_str());
+ sHandler1.rewind();
+ for (int it=1; it <= noIterations; it++) {
+ it_st = time(NULL) ;
+ cout << "\n" << "Model3: Iteration " << it;
+
+ // set up the names of the files where the tables will be printed
+ int n = it;
+ number = "";
+ do {
+ //mj changed next line
+ number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ tfile = Prefix + ".t3." + number;
+ afile = Prefix + ".a3." + number;
+ nfile = Prefix + ".n3." + number;
+ dfile = Prefix + ".d3." + number;
+ p0file = Prefix + ".p0_3." + number;
+ // tCountTable.clear();
+ dCountTable.clear();
+ nCountTable.clear();
+ p0_count = 0.0;
+ p1_count = 0.0;
+ all_prob = 0;
+ sentPair sent;
+ while (sHandler1.getNextSentence(sent)) {
+ Vector<WordIndex>& es = sent.eSent;
+ Vector<WordIndex>& fs = sent.fSent;
+ const float count = sent.getCount();
+ if ((sent.sentenceNo % 1000) == 0)
+ cout <<sent.sentenceNo << '\n';
+ Vector<WordIndex> A(fs.size(),/*-1*/0);
+ Vector<WordIndex> Fert(es.size(),0);
+ LogProb lcount=(LogProb)count;
+ l = es.size()-1;
+ m = fs.size()-1;
+ WordIndex x, y;
+ all_prob = prob_of_target_given_source(tTable, fs, es);
+ if (all_prob == 0)
+ cout << "\n" <<"all_prob = 0";
+
+ for (x = 0; x < pow(l+1.0, double(m)) ; x++) { // For all possible alignmets A
+ y = x;
+ for (j = 1; j <= m; j++) {
+ A[j] = y % (l+1);
+ y /= (l+1);
+ }
+ for (i = 0; i <= l; i++)
+ Fert[i] = 0;
+ for (j = 1; j <= m; j++)
+ Fert[A[j]]++;
+ if (2 * Fert[0] <= m) {
+ /* consider alignments that has Fert[0] less than
+ half the number of words in French sentence */
+ aprob = prob_of_target_and_alignment_given_source(A, Fert,
+ tTable, fs, es);
+ temp = aprob/all_prob;
+ LogProb templcount = temp*lcount;
+
+ for (j = 1; j <= m; j++) {
+ tTable.incCount(es[A[j]], fs[j], templcount);
+ if (0 != A[j])
+ dCountTable.addValue(j, A[j], l, m, templcount);
+ }
+ for (i = 0; i <= l; i++) {
+ nCountTable.addValue(es[i], Fert[i], templcount);
+ //cout << "AFTER INC2: " << templcount << " " << nCountTable.getRef(es[i], Fert[i]) << '\n';
+ }
+ p1_count += double(temp) * (Fert[0] * count);
+ p0_count += double(temp) * ((m - 2 * Fert[0]) * count);
+ }
+ } /* of looping over all alignments */
+ } /* of sentence pair E, F */
+ sHandler1.rewind();
+
+ // normalize tables
+ if (OutputInAachenFormat==1)
+ tTable.printCountTable(tfile.c_str(), Elist.getVocabList(),
+ Flist.getVocabList(), 1);
+ tTable.normalizeTable(Elist, Flist);
+ aCountTable.normalize(aTable);
+ dCountTable.normalize(dTable);
+ nCountTable.normalize(nTable, &Elist.getVocabList());
+
+ // normalize p1 & p0
+
+ if (p1_count + p0_count != 0) {
+ p1 = p1_count / (p1_count + p0_count );
+ p0 = 1 - p1;
+ } else {
+ p1 = p0 = 0;
+ }
+ // print tables
+ if (OutputInAachenFormat==0)
+ tTable.printProbTable(tfile.c_str(), Elist.getVocabList(),
+ Flist.getVocabList(), OutputInAachenFormat);
+ dTable.printTable(dfile.c_str());
+ nTable.printNTable(Elist.uniqTokens(), nfile.c_str(),
+ Elist.getVocabList(), OutputInAachenFormat);
+ ofstream of(p0file.c_str());
+ of << p0;
+ of.close();
+ it_fn = time(NULL) ;
+ cout << "\n" << "Model3 Iteration "<<it<<" took: " << difftime(it_fn,
+ it_st) << " seconds\n";
+
+ } /* of iterations */
+ fn = time(NULL) ;
+ cout << "\n" << "Entire Model3 Training took: " << difftime(fn, st)
+ << " seconds\n";
}
//-----------------------------------------------------------------------
@@ -227,7 +233,7 @@ void model3::em(int noIterations, sentenceHandler& sHandler1) {
Vector<char> vac(m+1,0);
for(PositionIndex i=1;i<=l;i++)
{
- PositionIndex cur_j=al.als_i[i];
+ PositionIndex cur_j=al.als_i[i];
cout << "LOOP: " << i << " " << cur_j << '\n';
PositionIndex prev_j=0;
PositionIndex k=0;
@@ -237,11 +243,11 @@ void model3::em(int noIterations, sentenceHandler& sHandler1) {
assert(vac[cur_j]==0);
vac[cur_j]=1;
for(unsigned int q=0;q<vac.size();q++)cout << (vac[q]?'1':'0') << ' ';
- cout << '\n';
+ cout << '\n';
cout << i << " " << cur_j << ": d1(" << vacancies(vac,cur_j) << "|" << vacancies(vac,al.get_center(prev_cept)) << "," << vac_all << "+" << -al.fert(i)<< "+" << +k << ")\n" << '\n';
prev_j=cur_j;
cur_j=al.als_j[cur_j].next;
- }
+ }
while(cur_j) { // process following words of cept
k++;
vac_all--;
@@ -249,7 +255,7 @@ void model3::em(int noIterations, sentenceHandler& sHandler1) {
int vprev=vacancies(vac,prev_j);
cout << "PREV: " << prev_j << '\n';
for(unsigned int q=0;q<vac.size();q++)cout << (vac[q]?'1':'0') << ' ';
- cout << '\n';
+ cout << '\n';
cout << i << " " << cur_j << ": d>1(" << vacancies(vac,cur_j) << "-" << vprev << "|" << vac_all<< "+" << -al.fert(i)<< "+" << +k << ")\n" << '\n';
prev_j=cur_j;
cur_j=al.als_j[cur_j].next;
@@ -265,1097 +271,1109 @@ void model3::em(int noIterations, sentenceHandler& sHandler1) {
extern short DoViterbiTraining;
struct m3_em_loop_t {
- model3 *m;
- int done;
- int valid;
- string alignfile;
- string modelName;
- int it;
- bool dump_files;
- char toModel, fromModel;
- pthread_t thread;
- d4model* d4;
- d5model* d5;
- bool final;
- m3_em_loop_t() :
- m(0), done(0), valid(0),d4(0),d5(0) {
- }
- ;
+ model3 *m;
+ int done;
+ int valid;
+ string alignfile;
+ string modelName;
+ int it;
+ bool dump_files;
+ char toModel, fromModel;
+ pthread_t thread;
+ d4model* d4;
+ d5model* d5;
+ bool final;
+ m3_em_loop_t() :
+ m(0), done(0), valid(0),d4(0),d5(0) {
+ }
+ ;
};
-void* m3_exe_emloop(void *arg) {
- m3_em_loop_t* em =(m3_em_loop_t *) arg;
- em->m->viterbi_thread(em->it, em->alignfile, em->dump_files, *(em->d4),*(em->d5),em->final,em->fromModel,em->toModel,em->modelName);
- em->done = -1;
- return arg;
+void* m3_exe_emloop(void *arg)
+{
+ m3_em_loop_t* em =(m3_em_loop_t *) arg;
+ em->m->viterbi_thread(em->it, em->alignfile, em->dump_files, *(em->d4),*(em->d5),em->final,em->fromModel,em->toModel,em->modelName);
+ em->done = -1;
+ return arg;
}
-void model3::viterbi_thread(int it, string alignfile, bool dump_files,d4model& d4m,d5model& d5m,bool final,char fromModel,char toModel,string& modelName) {
+void model3::viterbi_thread(int it, string alignfile, bool dump_files,d4model& d4m,d5model& d5m,bool final,char fromModel,char toModel,string& modelName)
+{
#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, dump_files, alignfile.c_str(), true, modelName,final
- switch (toModel) {
- case '3':{
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TRAIN_ARGS,h,(void*)0);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
- break;
- default:
- abort();
- }
- break;
- }
- case '4': {
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d4model>(TRAIN_ARGS,h,&d4m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void, d4model>(TRAIN_ARGS, (void*)0,&d4m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>(TRAIN_ARGS , &d4m,&d4m);
- break;
- default:
- abort();
- }
- }
- break;
- case '5': {
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d5model>(TRAIN_ARGS,h,&d5m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void, d5model>(TRAIN_ARGS, (void*)0,&d5m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model, d5model>(TRAIN_ARGS, &d4m,&d5m);
- break;
- case '5':
- viterbi_loop_with_tricks<transpair_model5, d5model, d5model>(TRAIN_ARGS, &d5m,&d5m);
- break;
- default:
- abort();
- }
- }
- break;
- default:
- abort();
- }
+ switch (toModel) {
+ case '3': {
+ switch (fromModel) {
+ case 'H':
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TRAIN_ARGS,h,(void*)0);
+ break;
+ case '3':
+ viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
+ break;
+ default:
+ abort();
+ }
+ break;
+ }
+ case '4': {
+ switch (fromModel) {
+ case 'H':
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d4model>(TRAIN_ARGS,h,&d4m);
+ break;
+ case '3':
+ viterbi_loop_with_tricks<transpair_model3, void, d4model>(TRAIN_ARGS, (void*)0,&d4m);
+ break;
+ case '4':
+ viterbi_loop_with_tricks<transpair_model4, d4model, d4model>(TRAIN_ARGS , &d4m,&d4m);
+ break;
+ default:
+ abort();
+ }
+ }
+ break;
+ case '5': {
+ switch (fromModel) {
+ case 'H':
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d5model>(TRAIN_ARGS,h,&d5m);
+ break;
+ case '3':
+ viterbi_loop_with_tricks<transpair_model3, void, d5model>(TRAIN_ARGS, (void*)0,&d5m);
+ break;
+ case '4':
+ viterbi_loop_with_tricks<transpair_model4, d4model, d5model>(TRAIN_ARGS, &d4m,&d5m);
+ break;
+ case '5':
+ viterbi_loop_with_tricks<transpair_model5, d5model, d5model>(TRAIN_ARGS, &d5m,&d5m);
+ break;
+ default:
+ abort();
+ }
+ }
+ break;
+ default:
+ abort();
+ }
}
extern short NCPUS;
int model3::viterbi(int noIterationsModel3, int noIterationsModel4,
- int noIterationsModel5, int noIterationsModel6, const char* prev_d4,const char* prev_d4_2,bool dumpCount,
- const char* dumpCountName, bool useString) {
- double minErrors=1.0;
- int minIter=0;
- if(ewordclasses==NULL)
- ewordclasses = new WordClasses;
- if(fwordclasses==NULL)
- fwordclasses = new WordClasses;
- d4model d4m(MAX_SENTENCE_LENGTH,*ewordclasses,*fwordclasses);
- if(prev_d4){
- string previous_d4model = prev_d4;
-
- string previous_d4model_1 = prev_d4_2;
- cerr << "We are going to read d4 table from " << previous_d4model << "," << previous_d4model_1 << endl;
- d4m.readProbTable(previous_d4model.c_str(),previous_d4model_1.c_str());
- }
- if(h==NULL)
- d4m.makeWordClasses(Elist, Flist, SourceVocabClassesFilename,
- TargetVocabClassesFilename,Elist,Flist);
-
- d5model d5m(d4m);
- //d5m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
- // TargetVocabFilename+".classes");
- time_t it_st, st, it_fn, fn;
- bool dump_files;
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- st = time(NULL);
- sHandler1.rewind();
- if (testPerp && testHandler)
- (*testHandler).rewind();
- string trainingString;
-
- trainingString+=(prev_d4 ? '4' : (h ? 'H' : '3'));
- for (int i=0; i<noIterationsModel3; ++i)
- trainingString+='3';
- for (int i=0; i<noIterationsModel4; ++i)
- trainingString+='4';
- for (int i=0; i<noIterationsModel5; ++i)
- trainingString+='5';
- for (int i=0; i<noIterationsModel6; ++i)
- trainingString+='6';
- cout << "\n==========================================================\n";
- cout << "Starting "<<trainingString<<": Viterbi Training";
- cout << "\n "<<trainingString<<" Training Started at: "<< my_ctime(&st)
- << '\n';
-
-
- vector<m3_em_loop_t> th;
- th.resize(NCPUS);
-
- int k;
-
- for(k = 1; k< NCPUS; k++){
- th[k].m = this;
- th[k].d4 = &d4m;
- th[k].d5 = &d5m;
+ int noIterationsModel5, int noIterationsModel6, const char* prev_d4,const char* prev_d4_2,bool dumpCount,
+ const char* dumpCountName, bool useString)
+{
+ double minErrors=1.0;
+ int minIter=0;
+ if(ewordclasses==NULL)
+ ewordclasses = new WordClasses;
+ if(fwordclasses==NULL)
+ fwordclasses = new WordClasses;
+ d4model d4m(MAX_SENTENCE_LENGTH,*ewordclasses,*fwordclasses);
+ if(prev_d4) {
+ string previous_d4model = prev_d4;
+
+ string previous_d4model_1 = prev_d4_2;
+ cerr << "We are going to read d4 table from " << previous_d4model << "," << previous_d4model_1 << endl;
+ d4m.readProbTable(previous_d4model.c_str(),previous_d4model_1.c_str());
+ }
+ if(h==NULL)
+ d4m.makeWordClasses(Elist, Flist, SourceVocabClassesFilename,
+ TargetVocabClassesFilename,Elist,Flist);
+
+ d5model d5m(d4m);
+ //d5m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
+ // TargetVocabFilename+".classes");
+ time_t it_st, st, it_fn, fn;
+ bool dump_files;
+ string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
+ alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
+ st = time(NULL);
+ sHandler1.rewind();
+ if (testPerp && testHandler)
+ (*testHandler).rewind();
+ string trainingString;
+
+ trainingString+=(prev_d4 ? '4' : (h ? 'H' : '3'));
+ for (int i=0; i<noIterationsModel3; ++i)
+ trainingString+='3';
+ for (int i=0; i<noIterationsModel4; ++i)
+ trainingString+='4';
+ for (int i=0; i<noIterationsModel5; ++i)
+ trainingString+='5';
+ for (int i=0; i<noIterationsModel6; ++i)
+ trainingString+='6';
+ cout << "\n==========================================================\n";
+ cout << "Starting "<<trainingString<<": Viterbi Training";
+ cout << "\n "<<trainingString<<" Training Started at: "<< my_ctime(&st)
+ << '\n';
+
+
+ vector<m3_em_loop_t> th;
+ th.resize(NCPUS);
+
+ int k;
+
+ for(k = 1; k< NCPUS; k++) {
+ th[k].m = this;
+ th[k].d4 = &d4m;
+ th[k].d5 = &d5m;
+ }
+
+ for (unsigned int it=1; it < trainingString.length(); it++) {
+ bool final=0;
+ if (it==trainingString.length()-1)
+ final=1;
+ string modelName;
+ char fromModel=trainingString[it-1], toModel=trainingString[it];
+ if (fromModel==toModel)
+ modelName=string("Model")+fromModel;
+ else
+ modelName=string("T")+fromModel+"To"+toModel;
+ it_st = time(NULL);
+ cout <<"\n---------------------\n"<<modelName<<": Iteration " << it
+ <<'\n';
+ dump_files = (final || ((Model3_Dump_Freq != 0) && ((it
+ % Model3_Dump_Freq) == 0))) && !NODUMPS;
+ string d4file2;
+ {
+ // set up the names of the files where the tables will be printed
+ int n = it;
+ number = "";
+ do {
+ //mj changed next line
+ number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ if (final)
+ number="final";
+ tfile = Prefix + ".t3." + number;
+ tfile_actual = Prefix + ".actual.t3." + number;
+ afile = Prefix + ".a3." + number;
+ nfile = Prefix + ".n3." + number;
+ nfile_actual = Prefix + ".actual.n3." + number;
+ dfile = Prefix + ".d3." + number;
+ d4file = Prefix + ".d4." + number;
+ d4file2 = Prefix + ".D4." + number;
+ d5file = Prefix + ".d5." + number;
+ alignfile = Prefix + ".A3." + number;
+ test_alignfile = Prefix + ".tst.A3." + number;
+ p0file = Prefix + ".p0_3." + number;
}
-
- for (unsigned int it=1; it < trainingString.length(); it++) {
- bool final=0;
- if (it==trainingString.length()-1)
- final=1;
- string modelName;
- char fromModel=trainingString[it-1], toModel=trainingString[it];
- if (fromModel==toModel)
- modelName=string("Model")+fromModel;
- else
- modelName=string("T")+fromModel+"To"+toModel;
- it_st = time(NULL);
- cout <<"\n---------------------\n"<<modelName<<": Iteration " << it
- <<'\n';
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it
- % Model3_Dump_Freq) == 0))) && !NODUMPS;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A3." + number;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- d4m.clear();
- p0_count = p1_count = 0;
- //dump_files=true;
-
- sHandler1.rewind();
- if (testPerp && testHandler)
- (*testHandler).rewind();
-
- char node[2] ;
- node[1] = '\0';
- for (k=1 ; k< NCPUS ; k++){
- th[k].m = this;
- th[k].done = 0;
- th[k].valid = 0;
- th[k].it = it;
- th[k].final = final;
- th[k].alignfile = alignfile + ".part";
- node[0] = '0' + k;
- th[k].alignfile += node;
- th[k].dump_files = dump_files;
- th[k].fromModel = fromModel;
- th[k].toModel = toModel;
- th[k].modelName = modelName;
- th[k].valid = pthread_create(&(th[k].thread),NULL,m3_exe_emloop,&(th[k]));
- if(th[k].valid){
- cerr << "Error starting thread " << k << endl;
- }
- }
- node[0] = '0';
- alignfile = alignfile + ".part";
- alignfile += node;
-
+ // clear count tables
+ // tCountTable.clear();
+ dCountTable.clear();
+ aCountTable.clear();
+ initAL();
+ nCountTable.clear();
+ d4m.clear();
+ p0_count = p1_count = 0;
+ //dump_files=true;
+
+ sHandler1.rewind();
+ if (testPerp && testHandler)
+ (*testHandler).rewind();
+
+ char node[2] ;
+ node[1] = '\0';
+ for (k=1 ; k< NCPUS ; k++) {
+ th[k].m = this;
+ th[k].done = 0;
+ th[k].valid = 0;
+ th[k].it = it;
+ th[k].final = final;
+ th[k].alignfile = alignfile + ".part";
+ node[0] = '0' + k;
+ th[k].alignfile += node;
+ th[k].dump_files = dump_files;
+ th[k].fromModel = fromModel;
+ th[k].toModel = toModel;
+ th[k].modelName = modelName;
+ th[k].valid = pthread_create(&(th[k].thread),NULL,m3_exe_emloop,&(th[k]));
+ if(th[k].valid) {
+ cerr << "Error starting thread " << k << endl;
+ }
+ }
+ node[0] = '0';
+ alignfile = alignfile + ".part";
+ alignfile += node;
+
#ifdef TRICKY_IBM3_TRAINING
#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, dump_files, alignfile.c_str(), true, modelName,final
#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
- switch (toModel) {
- case '3':
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TRAIN_ARGS,h,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TEST_ARGS, h,(void*)0);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3>( TEST_ARGS, (void*)0,(void*)0);
- break;
- default:
- abort();
- }
- break;
- case '4': {
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d4model>(TRAIN_ARGS,h,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm,
- d4model>(TEST_ARGS, h,&d4m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void, d4model>(TRAIN_ARGS, (void*)0,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3, void, d4model>( TEST_ARGS , (void*)0,&d4m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>(TRAIN_ARGS , &d4m,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>( TEST_ARGS, &d4m,&d4m);
- break;
- default:
- abort();
- }
- if(dumpCount && it == trainingString.length()-1){
- string realD4TableName = dumpCountName;
- realD4TableName += ".d4.count";
- string realD4bTableName = realD4TableName+".b";
- if(!d4m.dumpCount(realD4TableName.c_str(),realD4bTableName.c_str()))
- cerr <<"Error writing count file to" << realD4TableName << endl;
- }
- d4m.normalizeTable();
- if (dump_files)
- d4m.printProbTable(d4file.c_str(), d4file2.c_str());
- }
- break;
- case '5': {
- switch (fromModel) {
- case 'H':
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d5model>(TRAIN_ARGS,h,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm,
- d5model>(TEST_ARGS, h,&d5m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void, d5model>(TRAIN_ARGS, (void*)0,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3, void, d5model>( TEST_ARGS , (void*)0,&d5m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model, d5model>(TRAIN_ARGS, &d4m,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model4, d4model, d5model>( TEST_ARGS, &d4m,&d5m);
- break;
- case '5':
- viterbi_loop_with_tricks<transpair_model5, d5model, d5model>(TRAIN_ARGS, &d5m,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model5, d5model, d5model>( TEST_ARGS, &d5m,&d5m);
- break;
- default:
- abort();
- }
- if(dumpCount && it == trainingString.length()-1){
- string realD4TableName = dumpCountName;
- realD4TableName += ".d4";
- string realD4bTableName = realD4TableName+".b";
- if(!d5m.d4m.dumpCount(realD4TableName.c_str(),realD4bTableName.c_str()))
- cerr <<"Error writing count file to" << realD4TableName << endl;
- }
- d5m.d4m.normalizeTable();
- if (dump_files)
- d5m.d4m.printProbTable(d4file.c_str(), d4file2.c_str());
- d5m.normalizeTable();
- if (dump_files) {
- ofstream d5output(d5file.c_str());
- d5output << d5m;
- }
- }
- break;
- default:
- abort();
- }
+ switch (toModel) {
+ case '3':
+ switch (fromModel) {
+ case 'H':
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TRAIN_ARGS,h,(void*)0);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TEST_ARGS, h,(void*)0);
+ break;
+ case '3':
+ viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_model3>( TEST_ARGS, (void*)0,(void*)0);
+ break;
+ default:
+ abort();
+ }
+ break;
+ case '4': {
+ switch (fromModel) {
+ case 'H':
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d4model>(TRAIN_ARGS,h,&d4m);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm,
+ d4model>(TEST_ARGS, h,&d4m);
+ break;
+ case '3':
+ viterbi_loop_with_tricks<transpair_model3, void, d4model>(TRAIN_ARGS, (void*)0,&d4m);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_model3, void, d4model>( TEST_ARGS , (void*)0,&d4m);
+ break;
+ case '4':
+ viterbi_loop_with_tricks<transpair_model4, d4model, d4model>(TRAIN_ARGS , &d4m,&d4m);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_model4, d4model, d4model>( TEST_ARGS, &d4m,&d4m);
+ break;
+ default:
+ abort();
+ }
+ if(dumpCount && it == trainingString.length()-1) {
+ string realD4TableName = dumpCountName;
+ realD4TableName += ".d4.count";
+ string realD4bTableName = realD4TableName+".b";
+ if(!d4m.dumpCount(realD4TableName.c_str(),realD4bTableName.c_str()))
+ cerr <<"Error writing count file to" << realD4TableName << endl;
+ }
+ d4m.normalizeTable();
+ if (dump_files)
+ d4m.printProbTable(d4file.c_str(), d4file2.c_str());
+ }
+ break;
+ case '5': {
+ switch (fromModel) {
+ case 'H':
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm, d5model>(TRAIN_ARGS,h,&d5m);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm,
+ d5model>(TEST_ARGS, h,&d5m);
+ break;
+ case '3':
+ viterbi_loop_with_tricks<transpair_model3, void, d5model>(TRAIN_ARGS, (void*)0,&d5m);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_model3, void, d5model>( TEST_ARGS , (void*)0,&d5m);
+ break;
+ case '4':
+ viterbi_loop_with_tricks<transpair_model4, d4model, d5model>(TRAIN_ARGS, &d4m,&d5m);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_model4, d4model, d5model>( TEST_ARGS, &d4m,&d5m);
+ break;
+ case '5':
+ viterbi_loop_with_tricks<transpair_model5, d5model, d5model>(TRAIN_ARGS, &d5m,&d5m);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_model5, d5model, d5model>( TEST_ARGS, &d5m,&d5m);
+ break;
+ default:
+ abort();
+ }
+ if(dumpCount && it == trainingString.length()-1) {
+ string realD4TableName = dumpCountName;
+ realD4TableName += ".d4";
+ string realD4bTableName = realD4TableName+".b";
+ if(!d5m.d4m.dumpCount(realD4TableName.c_str(),realD4bTableName.c_str()))
+ cerr <<"Error writing count file to" << realD4TableName << endl;
+ }
+ d5m.d4m.normalizeTable();
+ if (dump_files)
+ d5m.d4m.printProbTable(d4file.c_str(), d4file2.c_str());
+ d5m.normalizeTable();
+ if (dump_files) {
+ ofstream d5output(d5file.c_str());
+ d5output << d5m;
+ }
+ }
+ break;
+ default:
+ abort();
+ }
#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- for (k=1;k<NCPUS;k++){
- pthread_join((th[k].thread),NULL);
- cerr << "Thread " << k << "done" << endl;
- }
- if (errorsAL()<minErrors) {
- minErrors=errorsAL();
- minIter=it;
- }
- // now normalize count tables
+ viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
+ alignfile.c_str(), true, model);
+ if (testPerp && testHandler)
+ viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
+ dump_files, test_alignfile.c_str(), false, model);
+#endif
+ for (k=1; k<NCPUS; k++) {
+ pthread_join((th[k].thread),NULL);
+ cerr << "Thread " << k << "done" << endl;
+ }
+ if (errorsAL()<minErrors) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ // now normalize count tables
// dump_files = true;
- if (dump_files&&OutputInAachenFormat==1)
- tTable.printCountTable(tfile.c_str(), Elist.getVocabList(),
- Flist.getVocabList(), 1);
- perp.record(modelName);
- errorReportAL(cerr, modelName);
- trainViterbiPerp.record(modelName);
-
- if(dumpCount && it == trainingString.length()-1){
- string realTableName = dumpCountName;
- realTableName += ".t.count";
- tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
- string realATableName = dumpCountName;
- realATableName += ".a.count";
- aCountTable.printRealTable(realATableName.c_str());
- string realDTableName = dumpCountName;
- realDTableName += ".d.count";
- dCountTable.printRealTable(realDTableName.c_str());
- string realNTableName = dumpCountName;
- realNTableName += ".n.count";
- nCountTable.printRealNTable(Elist.uniqTokens(),realNTableName.c_str(),Elist.getVocabList(),useString);
- }
-
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- dCountTable.normalize(dTable);
- nCountTable.normalize(nTable, &Elist.getVocabList());
- sHandler1.rewind();
- //testHandler->rewind();
- // cout << "tTable contains " <<
- // tTable.getHash().bucket_count() << " buckets and "<<
- //tTable.getHash().size() << " entries.\n";
-
- // normalize p1 & p0
-
- cout << "p0_count is " << p0_count << " and p1 is " << p1_count << "; ";
- if (P0!=-1.0) {
- p0 = P0;
- p1 = 1-P0;
- } else {
- if (p1_count + p0_count != 0) {
- p1 = p1_count / (p1_count + p0_count );
- p0 = 1 - p1;
- } else {
- p1 = p0 = 0;
- cerr << "ERROR: p0_count+p1_count is zero!!!\n";
- }
- }
-
- cout << "p0 is " << p0 << " p1: " << p1 << '\n';
-
- cout << modelName<<": TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ":("<<it<<" TEST CROSS-ENTROPY " << (*testPerp).cross_entropy() << " PERPLEXITY " << (*testPerp).perplexity() << " sum: " << (*testPerp).getSum()<< " wc: " << (*testPerp).word_count() << '\n';
- cout << modelName << ": ("<<it<<") TRAIN VITERBI CROSS-ENTROPY "
- << trainViterbiPerp.cross_entropy() << " PERPLEXITY "
- << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<")TEST VITERBI CROSS-ENTROPY "
- << (*testViterbiPerp).cross_entropy() << " PERPLEXITY "
- << (*testViterbiPerp).perplexity() << " Sum: " << (*testViterbiPerp).getSum() << " wc: " << (*testViterbiPerp).word_count() << '\n';
- //dump_files = true;
- if (dump_files) {
- if (OutputInAachenFormat==0)
- tTable.printProbTable(tfile.c_str(), Elist.getVocabList(),
- Flist.getVocabList(), OutputInAachenFormat);
- aTable.printTable(afile.c_str());
- dTable.printTable(dfile.c_str());
- nTable.printNTable(Elist.uniqTokens(), nfile.c_str(),
- Elist.getVocabList(), OutputInAachenFormat);
- ofstream of(p0file.c_str());
- of << p0;
- of.close();
- }
- it_fn = time(NULL) ;
- cout << "\n" << modelName << " Viterbi Iteration : "<<it<< " took: "
- << difftime(it_fn, it_st) << " seconds\n";
- } /* of iterations */
- fn = time(NULL);
- cout << trainingString <<" Training Finished at: " << my_ctime(&fn) << "\n";
- cout << "\n" << "Entire Viterbi "<<trainingString<<" Training took: "
- << difftime(fn, st) << " seconds\n";
- cout << "==========================================================\n";
- if (noIterationsModel4||noIterationsModel5)
- minIter-=noIterationsModel3;
- if (noIterationsModel5)
- minIter-=noIterationsModel4;
- return minIter;
+ if (dump_files&&OutputInAachenFormat==1)
+ tTable.printCountTable(tfile.c_str(), Elist.getVocabList(),
+ Flist.getVocabList(), 1);
+ perp.record(modelName);
+ errorReportAL(cerr, modelName);
+ trainViterbiPerp.record(modelName);
+
+ if(dumpCount && it == trainingString.length()-1) {
+ string realTableName = dumpCountName;
+ realTableName += ".t.count";
+ tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
+ string realATableName = dumpCountName;
+ realATableName += ".a.count";
+ aCountTable.printRealTable(realATableName.c_str());
+ string realDTableName = dumpCountName;
+ realDTableName += ".d.count";
+ dCountTable.printRealTable(realDTableName.c_str());
+ string realNTableName = dumpCountName;
+ realNTableName += ".n.count";
+ nCountTable.printRealNTable(Elist.uniqTokens(),realNTableName.c_str(),Elist.getVocabList(),useString);
+ }
+
+ tTable.normalizeTable(Elist, Flist);
+ aCountTable.normalize(aTable);
+ dCountTable.normalize(dTable);
+ nCountTable.normalize(nTable, &Elist.getVocabList());
+ sHandler1.rewind();
+ //testHandler->rewind();
+ // cout << "tTable contains " <<
+ // tTable.getHash().bucket_count() << " buckets and "<<
+ //tTable.getHash().size() << " entries.\n";
+
+ // normalize p1 & p0
+
+ cout << "p0_count is " << p0_count << " and p1 is " << p1_count << "; ";
+ if (P0!=-1.0) {
+ p0 = P0;
+ p1 = 1-P0;
+ } else {
+ if (p1_count + p0_count != 0) {
+ p1 = p1_count / (p1_count + p0_count );
+ p0 = 1 - p1;
+ } else {
+ p1 = p0 = 0;
+ cerr << "ERROR: p0_count+p1_count is zero!!!\n";
+ }
+ }
+
+ cout << "p0 is " << p0 << " p1: " << p1 << '\n';
+
+ cout << modelName<<": TRAIN CROSS-ENTROPY " << perp.cross_entropy()
+ << " PERPLEXITY " << perp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ":("<<it<<" TEST CROSS-ENTROPY " << (*testPerp).cross_entropy() << " PERPLEXITY " << (*testPerp).perplexity() << " sum: " << (*testPerp).getSum()<< " wc: " << (*testPerp).word_count() << '\n';
+ cout << modelName << ": ("<<it<<") TRAIN VITERBI CROSS-ENTROPY "
+ << trainViterbiPerp.cross_entropy() << " PERPLEXITY "
+ << trainViterbiPerp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ": ("<<it<<")TEST VITERBI CROSS-ENTROPY "
+ << (*testViterbiPerp).cross_entropy() << " PERPLEXITY "
+ << (*testViterbiPerp).perplexity() << " Sum: " << (*testViterbiPerp).getSum() << " wc: " << (*testViterbiPerp).word_count() << '\n';
+ //dump_files = true;
+ if (dump_files) {
+ if (OutputInAachenFormat==0)
+ tTable.printProbTable(tfile.c_str(), Elist.getVocabList(),
+ Flist.getVocabList(), OutputInAachenFormat);
+ aTable.printTable(afile.c_str());
+ dTable.printTable(dfile.c_str());
+ nTable.printNTable(Elist.uniqTokens(), nfile.c_str(),
+ Elist.getVocabList(), OutputInAachenFormat);
+ ofstream of(p0file.c_str());
+ of << p0;
+ of.close();
+ }
+ it_fn = time(NULL) ;
+ cout << "\n" << modelName << " Viterbi Iteration : "<<it<< " took: "
+ << difftime(it_fn, it_st) << " seconds\n";
+ } /* of iterations */
+ fn = time(NULL);
+ cout << trainingString <<" Training Finished at: " << my_ctime(&fn) << "\n";
+ cout << "\n" << "Entire Viterbi "<<trainingString<<" Training took: "
+ << difftime(fn, st) << " seconds\n";
+ cout << "==========================================================\n";
+ if (noIterationsModel4||noIterationsModel5)
+ minIter-=noIterationsModel3;
+ if (noIterationsModel5)
+ minIter-=noIterationsModel4;
+ return minIter;
}
-int model3::viterbi_hto3() {
-
- double minErrors=1.0;
- int minIter=0;
- bool dump_files = false;
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- time_t st = time(NULL);
- cout << "Starting HMM To Model 3 Viterbi Training";
- cout << "\n hto3 Training Started at: "<< my_ctime(&st) << '\n';
- string modelName="H23";
- //cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
- int it = 1;
- bool final =false;
- ///ump_files = true;
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
- == 0))) && !NODUMPS;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".AH3_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
- test_alignfile = Prefix + ".tst.A3." + number;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- p0_count = p1_count = 0;
+int model3::viterbi_hto3()
+{
+
+ double minErrors=1.0;
+ int minIter=0;
+ bool dump_files = false;
+ string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
+ alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
+ time_t st = time(NULL);
+ cout << "Starting HMM To Model 3 Viterbi Training";
+ cout << "\n hto3 Training Started at: "<< my_ctime(&st) << '\n';
+ string modelName="H23";
+ //cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
+ int it = 1;
+ bool final =false;
+ ///ump_files = true;
+ dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
+ == 0))) && !NODUMPS;
+ string d4file2;
+ {
+ // set up the names of the files where the tables will be printed
+ int n = it;
+ number = "";
+ do {
+ //mj changed next line
+ number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ if (final)
+ number="final";
+ tfile = Prefix + ".t3." + number;
+ tfile_actual = Prefix + ".actual.t3." + number;
+ afile = Prefix + ".a3." + number;
+ nfile = Prefix + ".n3." + number;
+ nfile_actual = Prefix + ".actual.n3." + number;
+ dfile = Prefix + ".d3." + number;
+ d4file = Prefix + ".d4." + number;
+ d4file2 = Prefix + ".D4." + number;
+ d5file = Prefix + ".d5." + number;
+ alignfile = Prefix + ".AH3_";
+ char _p[2];
+ _p[1] = 0;
+ _p[0] = iter + '0';
+ alignfile += _p;
+ alignfile += ".part";
+ _p[1] = 0;
+ _p[0] = part + '0';
+ alignfile += _p;
+ test_alignfile = Prefix + ".tst.A3." + number;
+ test_alignfile = Prefix + ".tst.A3." + number;
+ p0file = Prefix + ".p0_3." + number;
+ }
+ // clear count tables
+ // tCountTable.clear();
+ dCountTable.clear();
+ aCountTable.clear();
+ initAL();
+ nCountTable.clear();
+ p0_count = p1_count = 0;
#ifdef TRICKY_IBM3_TRAINING
#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, true, alignfile.c_str(), true, modelName,final
#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TRAIN_ARGS,h,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TEST_ARGS, h,(void*)0);
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TRAIN_ARGS,h,(void*)0);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_modelhmm, const hmm>(TEST_ARGS, h,(void*)0);
#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- if (errorsAL()<minErrors) {
- minErrors=errorsAL();
- minIter=it;
- }
- return minIter;
+ viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
+ alignfile.c_str(), true, model);
+ if (testPerp && testHandler)
+ viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
+ dump_files, test_alignfile.c_str(), false, model);
+#endif
+ if (errorsAL()<minErrors) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ return minIter;
}
-int model3::viterbi_3to3() {
- bool final = false;
- double minErrors=1.0;
- int minIter=0;
- bool dump_files = false;
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- time_t st = time(NULL);
- cout << "Starting HMM To Model 3 Viterbi Training";
- cout << "\n hto3 Training Started at: "<< my_ctime(&st) << '\n';
- string modelName="H23";
- int it = 1;
-
- // cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
-
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
- == 0))) && !NODUMPS;
- dump_files = true;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A3_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- p0_count = p1_count = 0;
+int model3::viterbi_3to3()
+{
+ bool final = false;
+ double minErrors=1.0;
+ int minIter=0;
+ bool dump_files = false;
+ string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
+ alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
+ time_t st = time(NULL);
+ cout << "Starting HMM To Model 3 Viterbi Training";
+ cout << "\n hto3 Training Started at: "<< my_ctime(&st) << '\n';
+ string modelName="H23";
+ int it = 1;
+
+ // cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
+
+ dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
+ == 0))) && !NODUMPS;
+ dump_files = true;
+ string d4file2;
+ {
+ // set up the names of the files where the tables will be printed
+ int n = it;
+ number = "";
+ do {
+ //mj changed next line
+ number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ if (final)
+ number="final";
+ tfile = Prefix + ".t3." + number;
+ tfile_actual = Prefix + ".actual.t3." + number;
+ afile = Prefix + ".a3." + number;
+ nfile = Prefix + ".n3." + number;
+ nfile_actual = Prefix + ".actual.n3." + number;
+ dfile = Prefix + ".d3." + number;
+ d4file = Prefix + ".d4." + number;
+ d4file2 = Prefix + ".D4." + number;
+ d5file = Prefix + ".d5." + number;
+ alignfile = Prefix + ".A3_";
+ char _p[2];
+ _p[1] = 0;
+ _p[0] = iter + '0';
+ alignfile += _p;
+ alignfile += ".part";
+ _p[1] = 0;
+ _p[0] = part + '0';
+ alignfile += _p;
+ test_alignfile = Prefix + ".tst.A3." + number;
+ p0file = Prefix + ".p0_3." + number;
+ }
+ // clear count tables
+ // tCountTable.clear();
+ dCountTable.clear();
+ aCountTable.clear();
+ initAL();
+ nCountTable.clear();
+ p0_count = p1_count = 0;
#ifdef TRICKY_IBM3_TRAINING
#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, true, alignfile.c_str(), true, modelName,final
#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
- viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3>( TEST_ARGS, (void*)0,(void*)0);
+ viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_model3>( TEST_ARGS, (void*)0,(void*)0);
#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- if (errorsAL()<minErrors) {
- minErrors=errorsAL();
- minIter=it;
- }
- return minIter;
+ viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
+ alignfile.c_str(), true, model);
+ if (testPerp && testHandler)
+ viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
+ dump_files, test_alignfile.c_str(), false, model);
+#endif
+ if (errorsAL()<minErrors) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ return minIter;
}
-d4model* model3::viterbi_3to4() {
- double minErrors=1.0;
- bool final = false;
- bool dump_files = false;
- if(ewordclasses==NULL)
- ewordclasses = new WordClasses;
- if(fwordclasses==NULL)
- fwordclasses = new WordClasses;
-
- d4model *dm1 = new d4model(MAX_SENTENCE_LENGTH,*ewordclasses,*fwordclasses);
- d4model& d4m = *dm1;
- //d4m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
- // TargetVocabFilename+".classes");
-
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- time_t st = time(NULL);
- cout << "Starting Model 3 To Model 4 Viterbi Training";
- cout << "\n hto3 Training Started at: "<< my_ctime(&st) << '\n';
- string modelName="34";
- int it = 1;
- //cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
-
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
- == 0))) && !NODUMPS;
- dump_files = true;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A34_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- p0_count = p1_count = 0;
+d4model* model3::viterbi_3to4()
+{
+ double minErrors=1.0;
+ bool final = false;
+ bool dump_files = false;
+ if(ewordclasses==NULL)
+ ewordclasses = new WordClasses;
+ if(fwordclasses==NULL)
+ fwordclasses = new WordClasses;
+
+ d4model *dm1 = new d4model(MAX_SENTENCE_LENGTH,*ewordclasses,*fwordclasses);
+ d4model& d4m = *dm1;
+ //d4m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
+ // TargetVocabFilename+".classes");
+
+ string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
+ alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
+ time_t st = time(NULL);
+ cout << "Starting Model 3 To Model 4 Viterbi Training";
+ cout << "\n hto3 Training Started at: "<< my_ctime(&st) << '\n';
+ string modelName="34";
+ int it = 1;
+ //cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
+
+ dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
+ == 0))) && !NODUMPS;
+ dump_files = true;
+ string d4file2;
+ {
+ // set up the names of the files where the tables will be printed
+ int n = it;
+ number = "";
+ do {
+ //mj changed next line
+ number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ if (final)
+ number="final";
+ tfile = Prefix + ".t3." + number;
+ tfile_actual = Prefix + ".actual.t3." + number;
+ afile = Prefix + ".a3." + number;
+ nfile = Prefix + ".n3." + number;
+ nfile_actual = Prefix + ".actual.n3." + number;
+ dfile = Prefix + ".d3." + number;
+ d4file = Prefix + ".d4." + number;
+ d4file2 = Prefix + ".D4." + number;
+ d5file = Prefix + ".d5." + number;
+ alignfile = Prefix + ".A34_";
+ char _p[2];
+ _p[1] = 0;
+ _p[0] = iter + '0';
+ alignfile += _p;
+ alignfile += ".part";
+ _p[1] = 0;
+ _p[0] = part + '0';
+ alignfile += _p;
+ test_alignfile = Prefix + ".tst.A3." + number;
+ p0file = Prefix + ".p0_3." + number;
+ }
+ // clear count tables
+ // tCountTable.clear();
+ dCountTable.clear();
+ aCountTable.clear();
+ initAL();
+ nCountTable.clear();
+ p0_count = p1_count = 0;
#ifdef TRICKY_IBM3_TRAINING
#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, true, alignfile.c_str(), true, modelName,final
#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
- viterbi_loop_with_tricks<transpair_model3, void, d4model>(TRAIN_ARGS, (void*)0,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3, void, d4model>( TEST_ARGS , (void*)0,&d4m);
+ viterbi_loop_with_tricks<transpair_model3, void, d4model>(TRAIN_ARGS, (void*)0,&d4m);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_model3, void, d4model>( TEST_ARGS , (void*)0,&d4m);
#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- if (errorsAL()<minErrors) minErrors=errorsAL();
- return dm1;
+ viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
+ alignfile.c_str(), true, model);
+ if (testPerp && testHandler)
+ viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
+ dump_files, test_alignfile.c_str(), false, model);
+#endif
+ if (errorsAL()<minErrors) minErrors=errorsAL();
+ return dm1;
}
-int model3::viterbi_4to4(d4model& d4m) {
- double minErrors=1.0;
- int minIter=0;
- bool dump_files = false;
-
- //d4model d4m(MAX_SENTENCE_LENGTH);
- //d4m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
- // TargetVocabFilename+".classes");
-
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
-
- cout << "Starting Model4 To Model 4 Viterbi Training";
- int it = 1;
- bool final = false;
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
- == 0))) && !NODUMPS;
- dump_files = true;
-
- string modelName="H23";
- //cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A4_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- p0_count = p1_count = 0;
+int model3::viterbi_4to4(d4model& d4m)
+{
+ double minErrors=1.0;
+ int minIter=0;
+ bool dump_files = false;
+
+ //d4model d4m(MAX_SENTENCE_LENGTH);
+ //d4m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
+ // TargetVocabFilename+".classes");
+
+ string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
+ alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
+
+ cout << "Starting Model4 To Model 4 Viterbi Training";
+ int it = 1;
+ bool final = false;
+ dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq)
+ == 0))) && !NODUMPS;
+ dump_files = true;
+
+ string modelName="H23";
+ //cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
+ string d4file2;
+ {
+ // set up the names of the files where the tables will be printed
+ int n = it;
+ number = "";
+ do {
+ //mj changed next line
+ number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ tfile = Prefix + ".t3." + number;
+ tfile_actual = Prefix + ".actual.t3." + number;
+ afile = Prefix + ".a3." + number;
+ nfile = Prefix + ".n3." + number;
+ nfile_actual = Prefix + ".actual.n3." + number;
+ dfile = Prefix + ".d3." + number;
+ d4file = Prefix + ".d4." + number;
+ d4file2 = Prefix + ".D4." + number;
+ d5file = Prefix + ".d5." + number;
+ alignfile = Prefix + ".A4_";
+ char _p[2];
+ _p[1] = 0;
+ _p[0] = iter + '0';
+ alignfile += _p;
+ alignfile += ".part";
+ _p[1] = 0;
+ _p[0] = part + '0';
+ alignfile += _p;
+ test_alignfile = Prefix + ".tst.A3." + number;
+ p0file = Prefix + ".p0_3." + number;
+ }
+ // clear count tables
+ // tCountTable.clear();
+ dCountTable.clear();
+ aCountTable.clear();
+ initAL();
+ nCountTable.clear();
+ p0_count = p1_count = 0;
#ifdef TRICKY_IBM3_TRAINING
#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, true, alignfile.c_str(), true, modelName,final
#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>(TRAIN_ARGS , &d4m,&d4m);
+ viterbi_loop_with_tricks<transpair_model4, d4model, d4model>(TRAIN_ARGS , &d4m,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model4, d4model, d4model>( TEST_ARGS, &d4m,&d4m);
+ if (testPerp && testHandler)
+ viterbi_loop_with_tricks<transpair_model4, d4model, d4model>( TEST_ARGS, &d4m,&d4m);
#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-#endif
- if (errorsAL()<minErrors) {
- minErrors=errorsAL();
- minIter=it;
- }
- return minIter;
+ viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
+ alignfile.c_str(), true, model);
+ if (testPerp && testHandler)
+ viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
+ dump_files, test_alignfile.c_str(), false, model);
+#endif
+ if (errorsAL()<minErrors) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ return minIter;
}
struct model3_align_struct {
- model3 *m;
- int part;
- int iter;
- int valid;
- pthread_t thread;
- int done;
- d4model *d4;
- int result;
- model3_align_struct() :
- m(NULL), part(0), iter(0), valid(0), done(0), d4(NULL) {
- }
+ model3 *m;
+ int part;
+ int iter;
+ int valid;
+ pthread_t thread;
+ int done;
+ d4model *d4;
+ int result;
+ model3_align_struct() :
+ m(NULL), part(0), iter(0), valid(0), done(0), d4(NULL) {
+ }
};
-void* em_thread_h23(void *arg) {
- model3_align_struct * m3 = (model3_align_struct*) arg;
- m3->m->initAL();
- m3->result = m3->m->viterbi_hto3();
- m3->done = 1;
- return m3;
+void* em_thread_h23(void *arg)
+{
+ model3_align_struct * m3 = (model3_align_struct*) arg;
+ m3->m->initAL();
+ m3->result = m3->m->viterbi_hto3();
+ m3->done = 1;
+ return m3;
}
-void* em_thread_323(void *arg) {
- model3_align_struct * m3 = (model3_align_struct*) arg;
- m3->m->initAL();
- m3->result = m3->m->viterbi_3to3();
- m3->done = 1;
- return m3;
+void* em_thread_323(void *arg)
+{
+ model3_align_struct * m3 = (model3_align_struct*) arg;
+ m3->m->initAL();
+ m3->result = m3->m->viterbi_3to3();
+ m3->done = 1;
+ return m3;
}
-void* em_thread_324(void *arg) {
- model3_align_struct * m3 = (model3_align_struct*) arg;
- m3->m->initAL();
- m3->d4 = m3->m->viterbi_3to4();
- m3->done = 1;
- return m3;
+void* em_thread_324(void *arg)
+{
+ model3_align_struct * m3 = (model3_align_struct*) arg;
+ m3->m->initAL();
+ m3->d4 = m3->m->viterbi_3to4();
+ m3->done = 1;
+ return m3;
}
-void* em_thread_424(void *arg) {
- model3_align_struct * m3 = (model3_align_struct*) arg;
- m3->m->initAL();
- m3->result = m3->m->viterbi_4to4(*(m3->d4));
- m3->done = 1;
- return m3;
+void* em_thread_424(void *arg)
+{
+ model3_align_struct * m3 = (model3_align_struct*) arg;
+ m3->m->initAL();
+ m3->result = m3->m->viterbi_4to4(*(m3->d4));
+ m3->done = 1;
+ return m3;
}
void multi_thread_m34_em(model3& m3, int ncpu, int Model3_Iterations,
- int Model4_Iterations) {
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
- alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
- vector<model3_align_struct> threads;
- bool dump_files;
- threads.resize(ncpu);
- time_t it_st, st, it_fn, fn;
- int i, j;
- ncpu=1;
- vector<amodel<COUNT> > counts;
- counts.resize(ncpu);
- m3.part=0;
- for (i=1; i<ncpu; i++) {
- threads[i].m = new model3(m3,m3.dTable,m3.nTable,counts[i]);
- threads[i].m->setHMM(m3.h);
- threads[i].m->part = i;
- }
- d4model *d4m= NULL;
- st = time(NULL);
-
- string trainingString;
- trainingString+=(m3.h ? 'H' : '3');
- for (int i=0; i<Model3_Iterations; ++i)
- trainingString+='3';
- for (int i=0; i<Model4_Iterations; ++i)
- trainingString+='4';
- cout << "\n==========================================================\n";
- cout << "Starting "<<trainingString<<": Viterbi Training";
- cout << "\n "<<trainingString<<" Training Started at: "<< my_ctime(&st)
- << '\n';
-
- for (i=0; i<Model3_Iterations+Model4_Iterations; i++) {
- m3.perp.clear();
- m3.trainViterbiPerp.clear();
- m3.iter = i;
- bool final = (i==Model3_Iterations-1 || i == Model4_Iterations
- +Model3_Iterations-1);
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((i
- % Model3_Dump_Freq) == 0))) && !NODUMPS;
- m3.sHandler1.rewind();
- m3.perp.clear() ; // clears cross_entrop & perplexity
- m3.trainViterbiPerp.clear() ; // clears cross_entrop & perplexity
- string modelName;
- it_st = time(NULL);
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((i
- % Model3_Dump_Freq) == 0))) && !NODUMPS;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = i;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
- tfile = Prefix + ".t3." + number;
- tfile_actual = Prefix + ".actual.t3." + number;
- afile = Prefix + ".a3." + number;
- nfile = Prefix + ".n3." + number;
- nfile_actual = Prefix + ".actual.n3." + number;
- dfile = Prefix + ".d3." + number;
- d4file = Prefix + ".d4." + number;
- d4file2 = Prefix + ".D4." + number;
- d5file = Prefix + ".d5." + number;
- alignfile = Prefix + ".A3." + number;
- test_alignfile = Prefix + ".tst.A3." + number;
- p0file = Prefix + ".p0_3." + number;
- }
- if (m3.testPerp && m3.testHandler) {
- m3.testHandler->rewind();
- m3.testPerp->clear();
- m3.testViterbiPerp->clear();
- }
-
- for (j=1; j<ncpu; j++) {
- threads[j].m->p0 = m3.p0;
- threads[j].m->p1 = m3.p1;
- threads[j].m->p0_count = 0;
- threads[j].m->p1_count = 0;
- threads[j].m->nCountTable.clear();
- threads[j].m->dCountTable.clear();
- threads[j].m->aCountTable.clear();
- threads[j].m->iter = i;
- if (threads[j].d4) {
- *(threads[j].d4) = *d4m;
- threads[j].d4->clear();
- }
- }
- if (i==0) { // H23
- for (j=1; j<ncpu; j++) {
- threads[j].valid = pthread_create(&(threads[j].thread), NULL,
- em_thread_h23, &(threads[j]));
- if (threads[j].valid) {
- cerr << "Error Starting Thread " << j << endl;
- }
- }
- modelName = "HTO3";
- m3.viterbi_hto3();
- while (1) {
- bool done = true;
- for (j=1; j<ncpu; j++) {
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if (threads[j].done==1) {
- threads[j].done = 2;
- m3.aCountTable.merge(threads[j].m->aCountTable);
- m3.dCountTable.merge(threads[j].m->dCountTable);
- m3.nCountTable.merge(threads[j].m->nCountTable,
- m3.Elist.uniqTokens(), m3.Elist.getVocabList());
- m3.p0_count += threads[j].m->p0_count;
- m3.p1_count += threads[j].m->p1_count;
- } else if (threads[j].done==2) {
- // Nothing
- } else if (threads[j].done==0) {
- done = false;
- }
- }
- if (done)
- break;
- }
- } else if (i>0 && i< Model3_Iterations) {
- modelName = "3TO3";
- for (j=1; j<ncpu; j++) {
- threads[j].valid = pthread_create(&(threads[j].thread), NULL,
- em_thread_323, &(threads[j]));
- if (threads[j].valid) {
- cerr << "Error Starting Thread " << j << endl;
- }
- }
- m3.viterbi_3to3();
- while (1) {
- bool done = true;
- for (j=1; j<ncpu; j++) {
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if (threads[j].done==1) {
- threads[j].done = 2;
- m3.aCountTable.merge(threads[j].m->aCountTable);
- m3.dCountTable.merge(threads[j].m->dCountTable);
- m3.nCountTable.merge(threads[j].m->nCountTable,
- m3.Elist.uniqTokens(), m3.Elist.getVocabList());
- m3.p0_count += threads[j].m->p0_count;
- m3.p1_count += threads[j].m->p1_count;
- } else if (threads[j].done==2) {
- // Nothing
- } else if (threads[j].done==0) {
- done = false;
- }
- }
- if (done)
- break;
- }
- } else if (i==Model3_Iterations) {
- modelName = "3TO4";
- for (j=1; j<ncpu; j++) {
- threads[j].valid = pthread_create(&(threads[j].thread), NULL,
- em_thread_324, &(threads[j]));
- if (threads[j].valid) {
- cerr << "Error Starting Thread " << j << endl;
- }
- }
- d4m = m3.viterbi_3to4();
- while (1) {
- bool done = true;
- for (j=1; j<ncpu; j++) {
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if (threads[j].done==1) {
- threads[j].done = 2;
- m3.aCountTable.merge(threads[j].m->aCountTable);
- m3.dCountTable.merge(threads[j].m->dCountTable);
- m3.nCountTable.merge(threads[j].m->nCountTable,
- m3.Elist.uniqTokens(), m3.Elist.getVocabList());
- m3.p0_count += threads[j].m->p0_count;
- m3.p1_count += threads[j].m->p1_count;
- d4m->merge(*threads[j].d4);
- } else if (threads[j].done==2) {
- // Nothing
- } else if (threads[j].done==0) {
- done = false;
- }
- }
- if (done)
- break;
- }
- } else if (i>Model3_Iterations) {
- modelName = "4TO4";
- for (j=1; j<ncpu; j++) {
- threads[j].valid = pthread_create(&(threads[j].thread), NULL,
- em_thread_424, &(threads[j]));
- if (threads[j].valid) {
- cerr << "Error Starting Thread " << j << endl;
- }
- }
- m3.viterbi_4to4(*d4m);
- while (1) {
- bool done = true;
- for (j=1; j<ncpu; j++) {
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if (threads[j].done==1) {
- threads[j].done = 2;
- m3.aCountTable.merge(threads[j].m->aCountTable);
- m3.dCountTable.merge(threads[j].m->dCountTable);
- m3.nCountTable.merge(threads[j].m->nCountTable,
- m3.Elist.uniqTokens(), m3.Elist.getVocabList());
- m3.p0_count += threads[j].m->p0_count;
- m3.p1_count += threads[j].m->p1_count;
- d4m->merge(*(threads[j].d4));
- } else if (threads[j].done==2) {
- // Nothing
- } else if (threads[j].done==0) {
- done = false;
- }
- }
- if (done)
- break;
- }
- }
- m3.perp.record(modelName);
- m3.errorReportAL(cerr, modelName);
- m3.trainViterbiPerp.record(modelName);
-
- m3.tTable.normalizeTable(m3.Elist, m3.Flist);
- m3.aCountTable.normalize(m3.aTable);
- m3.aCountTable.clear();
- m3.dCountTable.normalize(m3.dTable);
- m3.dCountTable.clear();
- m3.nCountTable.normalize(m3.nTable, &(m3.Elist.getVocabList()));
- m3.nCountTable.clear();
- cout << "p0_count is " << m3.p0_count << " and p1 is " << m3.p1_count
- << "; ";
- if (P0!=-1.0) {
- m3.p0 = P0;
- m3.p1 = 1-P0;
- } else {
- if (m3.p1_count + m3.p0_count != 0) {
- m3.p1 = m3.p1_count / (m3.p1_count + m3.p0_count );
- m3.p0 = 1 - m3.p1;
- } else {
- m3.p1 = m3.p0 = 0;
- cerr << "ERROR: p0_count+p1_count is zero!!!\n";
- }
- }
- m3.p0_count = m3.p1_count = 0;
- cout << "p0 is " << m3.p0 << " p1: " << m3.p1 << '\n';
- if (d4m) {
- d4m->normalizeTable();
- d4m->clear();
- }
-
- cout << modelName<<": TRAIN CROSS-ENTROPY " << m3.perp.cross_entropy()
- << " PERPLEXITY " << m3.perp.perplexity() << '\n';
- if (m3.testPerp && m3.testHandler)
- cout << modelName << ":("<<i<<" TEST CROSS-ENTROPY "
- << m3.testPerp->cross_entropy() << " PERPLEXITY "
- << m3.testPerp->perplexity() << " sum: "
- << m3.testPerp->getSum()<< " wc: "
- << m3.testPerp->word_count() << '\n';
- cout << modelName << ": ("<<i<<") TRAIN VITERBI CROSS-ENTROPY "
- << m3.trainViterbiPerp.cross_entropy() << " PERPLEXITY "
- << m3.trainViterbiPerp.perplexity() << '\n';
- bool dump_files = true;
- if (dump_files) {
- if (OutputInAachenFormat==0)
- m3.tTable.printProbTable(tfile.c_str(),
- m3.Elist.getVocabList(), m3.Flist.getVocabList(),
- OutputInAachenFormat);
- m3.aTable.printTable(afile.c_str());
- m3.dTable.printTable(dfile.c_str());
- m3.nTable.printNTable(m3.Elist.uniqTokens(), nfile.c_str(),
- m3.Elist.getVocabList(), OutputInAachenFormat);
- ofstream of(p0file.c_str());
- of << m3.p0;
- of.close();
- }
- it_fn = time(NULL);
- cout << "\n" << modelName << " Viterbi Iteration : "<<i<< " took: "
- << difftime(it_fn, it_st) << " seconds\n";
- }
- fn = time(NULL);
- cout << trainingString <<" Training Finished at: " << my_ctime(&fn) << "\n";
- cout << "\n" << "Entire Viterbi "<<trainingString<<" Training took: "
- << difftime(fn, st) << " seconds\n";
- cout << "==========================================================\n";
+ int Model4_Iterations)
+{
+ string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file,
+ alignfile, number, test_alignfile, d4file, d5file, zeroFertFile;
+ vector<model3_align_struct> threads;
+ bool dump_files;
+ threads.resize(ncpu);
+ time_t it_st, st, it_fn, fn;
+ int i, j;
+ ncpu=1;
+ vector<amodel<COUNT> > counts;
+ counts.resize(ncpu);
+ m3.part=0;
+ for (i=1; i<ncpu; i++) {
+ threads[i].m = new model3(m3,m3.dTable,m3.nTable,counts[i]);
+ threads[i].m->setHMM(m3.h);
+ threads[i].m->part = i;
+ }
+ d4model *d4m= NULL;
+ st = time(NULL);
+
+ string trainingString;
+ trainingString+=(m3.h ? 'H' : '3');
+ for (int i=0; i<Model3_Iterations; ++i)
+ trainingString+='3';
+ for (int i=0; i<Model4_Iterations; ++i)
+ trainingString+='4';
+ cout << "\n==========================================================\n";
+ cout << "Starting "<<trainingString<<": Viterbi Training";
+ cout << "\n "<<trainingString<<" Training Started at: "<< my_ctime(&st)
+ << '\n';
+
+ for (i=0; i<Model3_Iterations+Model4_Iterations; i++) {
+ m3.perp.clear();
+ m3.trainViterbiPerp.clear();
+ m3.iter = i;
+ bool final = (i==Model3_Iterations-1 || i == Model4_Iterations
+ +Model3_Iterations-1);
+ dump_files = (final || ((Model3_Dump_Freq != 0) && ((i
+ % Model3_Dump_Freq) == 0))) && !NODUMPS;
+ m3.sHandler1.rewind();
+ m3.perp.clear() ; // clears cross_entrop & perplexity
+ m3.trainViterbiPerp.clear() ; // clears cross_entrop & perplexity
+ string modelName;
+ it_st = time(NULL);
+ dump_files = (final || ((Model3_Dump_Freq != 0) && ((i
+ % Model3_Dump_Freq) == 0))) && !NODUMPS;
+ string d4file2;
+ {
+ // set up the names of the files where the tables will be printed
+ int n = i;
+ number = "";
+ do {
+ //mj changed next line
+ number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
+ } while ((n /= 10) > 0);
+ if (final)
+ number="final";
+ tfile = Prefix + ".t3." + number;
+ tfile_actual = Prefix + ".actual.t3." + number;
+ afile = Prefix + ".a3." + number;
+ nfile = Prefix + ".n3." + number;
+ nfile_actual = Prefix + ".actual.n3." + number;
+ dfile = Prefix + ".d3." + number;
+ d4file = Prefix + ".d4." + number;
+ d4file2 = Prefix + ".D4." + number;
+ d5file = Prefix + ".d5." + number;
+ alignfile = Prefix + ".A3." + number;
+ test_alignfile = Prefix + ".tst.A3." + number;
+ p0file = Prefix + ".p0_3." + number;
+ }
+ if (m3.testPerp && m3.testHandler) {
+ m3.testHandler->rewind();
+ m3.testPerp->clear();
+ m3.testViterbiPerp->clear();
+ }
+
+ for (j=1; j<ncpu; j++) {
+ threads[j].m->p0 = m3.p0;
+ threads[j].m->p1 = m3.p1;
+ threads[j].m->p0_count = 0;
+ threads[j].m->p1_count = 0;
+ threads[j].m->nCountTable.clear();
+ threads[j].m->dCountTable.clear();
+ threads[j].m->aCountTable.clear();
+ threads[j].m->iter = i;
+ if (threads[j].d4) {
+ *(threads[j].d4) = *d4m;
+ threads[j].d4->clear();
+ }
+ }
+ if (i==0) { // H23
+ for (j=1; j<ncpu; j++) {
+ threads[j].valid = pthread_create(&(threads[j].thread), NULL,
+ em_thread_h23, &(threads[j]));
+ if (threads[j].valid) {
+ cerr << "Error Starting Thread " << j << endl;
+ }
+ }
+ modelName = "HTO3";
+ m3.viterbi_hto3();
+ while (1) {
+ bool done = true;
+ for (j=1; j<ncpu; j++) {
+ //pthread_join((args[j].thread),NULL);
+ // Start normalization as soon as possible
+ if (threads[j].done==1) {
+ threads[j].done = 2;
+ m3.aCountTable.merge(threads[j].m->aCountTable);
+ m3.dCountTable.merge(threads[j].m->dCountTable);
+ m3.nCountTable.merge(threads[j].m->nCountTable,
+ m3.Elist.uniqTokens(), m3.Elist.getVocabList());
+ m3.p0_count += threads[j].m->p0_count;
+ m3.p1_count += threads[j].m->p1_count;
+ } else if (threads[j].done==2) {
+ // Nothing
+ } else if (threads[j].done==0) {
+ done = false;
+ }
+ }
+ if (done)
+ break;
+ }
+ } else if (i>0 && i< Model3_Iterations) {
+ modelName = "3TO3";
+ for (j=1; j<ncpu; j++) {
+ threads[j].valid = pthread_create(&(threads[j].thread), NULL,
+ em_thread_323, &(threads[j]));
+ if (threads[j].valid) {
+ cerr << "Error Starting Thread " << j << endl;
+ }
+ }
+ m3.viterbi_3to3();
+ while (1) {
+ bool done = true;
+ for (j=1; j<ncpu; j++) {
+ //pthread_join((args[j].thread),NULL);
+ // Start normalization as soon as possible
+ if (threads[j].done==1) {
+ threads[j].done = 2;
+ m3.aCountTable.merge(threads[j].m->aCountTable);
+ m3.dCountTable.merge(threads[j].m->dCountTable);
+ m3.nCountTable.merge(threads[j].m->nCountTable,
+ m3.Elist.uniqTokens(), m3.Elist.getVocabList());
+ m3.p0_count += threads[j].m->p0_count;
+ m3.p1_count += threads[j].m->p1_count;
+ } else if (threads[j].done==2) {
+ // Nothing
+ } else if (threads[j].done==0) {
+ done = false;
+ }
+ }
+ if (done)
+ break;
+ }
+ } else if (i==Model3_Iterations) {
+ modelName = "3TO4";
+ for (j=1; j<ncpu; j++) {
+ threads[j].valid = pthread_create(&(threads[j].thread), NULL,
+ em_thread_324, &(threads[j]));
+ if (threads[j].valid) {
+ cerr << "Error Starting Thread " << j << endl;
+ }
+ }
+ d4m = m3.viterbi_3to4();
+ while (1) {
+ bool done = true;
+ for (j=1; j<ncpu; j++) {
+ //pthread_join((args[j].thread),NULL);
+ // Start normalization as soon as possible
+ if (threads[j].done==1) {
+ threads[j].done = 2;
+ m3.aCountTable.merge(threads[j].m->aCountTable);
+ m3.dCountTable.merge(threads[j].m->dCountTable);
+ m3.nCountTable.merge(threads[j].m->nCountTable,
+ m3.Elist.uniqTokens(), m3.Elist.getVocabList());
+ m3.p0_count += threads[j].m->p0_count;
+ m3.p1_count += threads[j].m->p1_count;
+ d4m->merge(*threads[j].d4);
+ } else if (threads[j].done==2) {
+ // Nothing
+ } else if (threads[j].done==0) {
+ done = false;
+ }
+ }
+ if (done)
+ break;
+ }
+ } else if (i>Model3_Iterations) {
+ modelName = "4TO4";
+ for (j=1; j<ncpu; j++) {
+ threads[j].valid = pthread_create(&(threads[j].thread), NULL,
+ em_thread_424, &(threads[j]));
+ if (threads[j].valid) {
+ cerr << "Error Starting Thread " << j << endl;
+ }
+ }
+ m3.viterbi_4to4(*d4m);
+ while (1) {
+ bool done = true;
+ for (j=1; j<ncpu; j++) {
+ //pthread_join((args[j].thread),NULL);
+ // Start normalization as soon as possible
+ if (threads[j].done==1) {
+ threads[j].done = 2;
+ m3.aCountTable.merge(threads[j].m->aCountTable);
+ m3.dCountTable.merge(threads[j].m->dCountTable);
+ m3.nCountTable.merge(threads[j].m->nCountTable,
+ m3.Elist.uniqTokens(), m3.Elist.getVocabList());
+ m3.p0_count += threads[j].m->p0_count;
+ m3.p1_count += threads[j].m->p1_count;
+ d4m->merge(*(threads[j].d4));
+ } else if (threads[j].done==2) {
+ // Nothing
+ } else if (threads[j].done==0) {
+ done = false;
+ }
+ }
+ if (done)
+ break;
+ }
+ }
+ m3.perp.record(modelName);
+ m3.errorReportAL(cerr, modelName);
+ m3.trainViterbiPerp.record(modelName);
+
+ m3.tTable.normalizeTable(m3.Elist, m3.Flist);
+ m3.aCountTable.normalize(m3.aTable);
+ m3.aCountTable.clear();
+ m3.dCountTable.normalize(m3.dTable);
+ m3.dCountTable.clear();
+ m3.nCountTable.normalize(m3.nTable, &(m3.Elist.getVocabList()));
+ m3.nCountTable.clear();
+ cout << "p0_count is " << m3.p0_count << " and p1 is " << m3.p1_count
+ << "; ";
+ if (P0!=-1.0) {
+ m3.p0 = P0;
+ m3.p1 = 1-P0;
+ } else {
+ if (m3.p1_count + m3.p0_count != 0) {
+ m3.p1 = m3.p1_count / (m3.p1_count + m3.p0_count );
+ m3.p0 = 1 - m3.p1;
+ } else {
+ m3.p1 = m3.p0 = 0;
+ cerr << "ERROR: p0_count+p1_count is zero!!!\n";
+ }
+ }
+ m3.p0_count = m3.p1_count = 0;
+ cout << "p0 is " << m3.p0 << " p1: " << m3.p1 << '\n';
+ if (d4m) {
+ d4m->normalizeTable();
+ d4m->clear();
+ }
+
+ cout << modelName<<": TRAIN CROSS-ENTROPY " << m3.perp.cross_entropy()
+ << " PERPLEXITY " << m3.perp.perplexity() << '\n';
+ if (m3.testPerp && m3.testHandler)
+ cout << modelName << ":("<<i<<" TEST CROSS-ENTROPY "
+ << m3.testPerp->cross_entropy() << " PERPLEXITY "
+ << m3.testPerp->perplexity() << " sum: "
+ << m3.testPerp->getSum()<< " wc: "
+ << m3.testPerp->word_count() << '\n';
+ cout << modelName << ": ("<<i<<") TRAIN VITERBI CROSS-ENTROPY "
+ << m3.trainViterbiPerp.cross_entropy() << " PERPLEXITY "
+ << m3.trainViterbiPerp.perplexity() << '\n';
+ bool dump_files = true;
+ if (dump_files) {
+ if (OutputInAachenFormat==0)
+ m3.tTable.printProbTable(tfile.c_str(),
+ m3.Elist.getVocabList(), m3.Flist.getVocabList(),
+ OutputInAachenFormat);
+ m3.aTable.printTable(afile.c_str());
+ m3.dTable.printTable(dfile.c_str());
+ m3.nTable.printNTable(m3.Elist.uniqTokens(), nfile.c_str(),
+ m3.Elist.getVocabList(), OutputInAachenFormat);
+ ofstream of(p0file.c_str());
+ of << m3.p0;
+ of.close();
+ }
+ it_fn = time(NULL);
+ cout << "\n" << modelName << " Viterbi Iteration : "<<i<< " took: "
+ << difftime(it_fn, it_st) << " seconds\n";
+ }
+ fn = time(NULL);
+ cout << trainingString <<" Training Finished at: " << my_ctime(&fn) << "\n";
+ cout << "\n" << "Entire Viterbi "<<trainingString<<" Training took: "
+ << difftime(fn, st) << " seconds\n";
+ cout << "==========================================================\n";
}
diff --git a/mgizapp/src/model3.h b/mgizapp/src/model3.h
index 33e7dd1..5e95f7d 100644
--- a/mgizapp/src/model3.h
+++ b/mgizapp/src/model3.h
@@ -8,14 +8,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -55,100 +55,101 @@ using __gnu_cxx::hash_map;
#include "D5Tables.h"
#include "AlignTables.h"
#include "syncObj.h"
-class model3 : public model2 {
- Mutex plock;
+class model3 : public model2
+{
+ Mutex plock;
public:
- amodel<PROB>& dTable;
- amodel<COUNT> dCountTable;
+ amodel<PROB>& dTable;
+ amodel<COUNT> dCountTable;
- PROB p0, p1;
- SyncDouble p0_count, p1_count;
+ PROB p0, p1;
+ SyncDouble p0_count, p1_count;
- nmodel<PROB>& nTable;
- nmodel<COUNT> nCountTable;
- hmm*h;
- int part;
- int iter;
+ nmodel<PROB>& nTable;
+ nmodel<COUNT> nCountTable;
+ hmm*h;
+ int part;
+ int iter;
private:
- WordClasses* ewordclasses;
- WordClasses* fwordclasses;
+ WordClasses* ewordclasses;
+ WordClasses* fwordclasses;
public:
- model3(model2& m2);
- void setHMM(hmm*_h) {
- ewordclasses = &(_h->ewordclasses);
- fwordclasses = &(_h->fwordclasses);
- h=_h;
- }
- model3(model2& m2, amodel<PROB>& d, nmodel<PROB>& n);
- model3(model3& m3, amodel<PROB>& d, nmodel<PROB>& n, amodel<COUNT>&);
- ~model3();
- // methods
- void transfer(sentenceHandler&, bool, Perplexity&, Perplexity&,
- bool updateT=1);
- void transferSimple(sentenceHandler&, bool, Perplexity&, Perplexity&,
- bool updateT=1);
- void load_tables(const char *nfile, const char *dfile, const char *p0file);
-
- void em(int, sentenceHandler&);
- int viterbi(int, int, int, int,const char* prev_d4 = NULL,const char* prev_d4_2=NULL,bool dumpCount = false,
- const char* dumpCountName = NULL, bool useString = false);
- int viterbi_hto3();
- d4model* viterbi_3to4();
- int viterbi_3to3();
- int viterbi_4to4(d4model& d4m);
- void viterbi_thread(int it, string alignfile, bool dump_files,d4model& d4m,d5model& d5m,bool final,char fromModel,char toModel,string& modelName);
+ model3(model2& m2);
+ void setHMM(hmm*_h) {
+ ewordclasses = &(_h->ewordclasses);
+ fwordclasses = &(_h->fwordclasses);
+ h=_h;
+ }
+ model3(model2& m2, amodel<PROB>& d, nmodel<PROB>& n);
+ model3(model3& m3, amodel<PROB>& d, nmodel<PROB>& n, amodel<COUNT>&);
+ ~model3();
+ // methods
+ void transfer(sentenceHandler&, bool, Perplexity&, Perplexity&,
+ bool updateT=1);
+ void transferSimple(sentenceHandler&, bool, Perplexity&, Perplexity&,
+ bool updateT=1);
+ void load_tables(const char *nfile, const char *dfile, const char *p0file);
+
+ void em(int, sentenceHandler&);
+ int viterbi(int, int, int, int,const char* prev_d4 = NULL,const char* prev_d4_2=NULL,bool dumpCount = false,
+ const char* dumpCountName = NULL, bool useString = false);
+ int viterbi_hto3();
+ d4model* viterbi_3to4();
+ int viterbi_3to3();
+ int viterbi_4to4(d4model& d4m);
+ void viterbi_thread(int it, string alignfile, bool dump_files,d4model& d4m,d5model& d5m,bool final,char fromModel,char toModel,string& modelName);
private:
-
-
- LogProb prob_of_special(Vector<WordIndex>&, Vector<WordIndex>&,
- tmodel<COUNT, PROB>&, Vector<WordIndex>&, Vector<WordIndex>&);
-
- LogProb prob_of_target_and_alignment_given_source(Vector<WordIndex>&,
- Vector<WordIndex>&, tmodel<COUNT, PROB>&, Vector<WordIndex>&,
- Vector<WordIndex>&);
- LogProb prob_of_target_given_source(tmodel<COUNT, PROB>&,
- Vector<WordIndex>&, Vector<WordIndex>&);
-
- LogProb scoreOfMove(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, Vector<WordIndex>&, tmodel<COUNT, PROB>&,
- WordIndex, WordIndex);
-
- LogProb scoreOfSwap(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, tmodel<COUNT, PROB>&, int, int);
-
- void hillClimb(Vector<WordIndex>&, Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, LogProb&, tmodel<COUNT, PROB>&, int, int);
-
- void findBestAlignment(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, Vector<WordIndex>&, LogProb&, int, int);
-
- void findAlignmentsNeighborhood(Vector<WordIndex>&, Vector<WordIndex>&,
- LogProb&align_total_count, alignmodel&neighborhood, int, int);
- void collectCountsOverAlignement(const Vector<WordIndex>& es,
- const Vector<WordIndex>& fs, const Vector<WordIndex>&, LogProb,
- float count);
- LogProb viterbi_model2(const transpair_model3&ef, alignment&output,
- int pair_no, int i_peg = -1, int j_peg = -1) const;
- LogProb _viterbi_model2(const transpair_model2&ef, alignment&output,
- int i_peg = -1, int j_peg = -1) const;
- LogProb viterbi_model2(const transpair_modelhmm&ef, alignment&output,
- int pair_no, int i_peg = -1, int j_peg = -1) const;
+
+
+ LogProb prob_of_special(Vector<WordIndex>&, Vector<WordIndex>&,
+ tmodel<COUNT, PROB>&, Vector<WordIndex>&, Vector<WordIndex>&);
+
+ LogProb prob_of_target_and_alignment_given_source(Vector<WordIndex>&,
+ Vector<WordIndex>&, tmodel<COUNT, PROB>&, Vector<WordIndex>&,
+ Vector<WordIndex>&);
+ LogProb prob_of_target_given_source(tmodel<COUNT, PROB>&,
+ Vector<WordIndex>&, Vector<WordIndex>&);
+
+ LogProb scoreOfMove(Vector<WordIndex>&, Vector<WordIndex>&,
+ Vector<WordIndex>&, Vector<WordIndex>&, tmodel<COUNT, PROB>&,
+ WordIndex, WordIndex);
+
+ LogProb scoreOfSwap(Vector<WordIndex>&, Vector<WordIndex>&,
+ Vector<WordIndex>&, tmodel<COUNT, PROB>&, int, int);
+
+ void hillClimb(Vector<WordIndex>&, Vector<WordIndex>&, Vector<WordIndex>&,
+ Vector<WordIndex>&, LogProb&, tmodel<COUNT, PROB>&, int, int);
+
+ void findBestAlignment(Vector<WordIndex>&, Vector<WordIndex>&,
+ Vector<WordIndex>&, Vector<WordIndex>&, LogProb&, int, int);
+
+ void findAlignmentsNeighborhood(Vector<WordIndex>&, Vector<WordIndex>&,
+ LogProb&align_total_count, alignmodel&neighborhood, int, int);
+ void collectCountsOverAlignement(const Vector<WordIndex>& es,
+ const Vector<WordIndex>& fs, const Vector<WordIndex>&, LogProb,
+ float count);
+ LogProb viterbi_model2(const transpair_model3&ef, alignment&output,
+ int pair_no, int i_peg = -1, int j_peg = -1) const;
+ LogProb _viterbi_model2(const transpair_model2&ef, alignment&output,
+ int i_peg = -1, int j_peg = -1) const;
+ LogProb viterbi_model2(const transpair_modelhmm&ef, alignment&output,
+ int pair_no, int i_peg = -1, int j_peg = -1) const;
private:
- void estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp, Perplexity& perp1,
- bool simple, bool dump_files, bool updateT);
- void viterbi_loop(Perplexity&, Perplexity&, sentenceHandler&, bool,
- const char*, bool, string model);
+ void estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp, Perplexity& perp1,
+ bool simple, bool dump_files, bool updateT);
+ void viterbi_loop(Perplexity&, Perplexity&, sentenceHandler&, bool,
+ const char*, bool, string model);
- template<class MODEL_TYPE, class A, class B> void viterbi_loop_with_tricks(
- Perplexity&, Perplexity&, sentenceHandler&, bool, const char*,
- bool, string model, bool final, A*d4m, B*d5m);
+ template<class MODEL_TYPE, class A, class B> void viterbi_loop_with_tricks(
+ Perplexity&, Perplexity&, sentenceHandler&, bool, const char*,
+ bool, string model, bool final, A*d4m, B*d5m);
};
void multi_thread_m34_em(model3& m3, int ncpu, int Model3_Iterations,
- int Model4_Iterations);
+ int Model4_Iterations);
#endif
diff --git a/mgizapp/src/model345-peg.cpp b/mgizapp/src/model345-peg.cpp
index 8c1bde6..b31c977 100644
--- a/mgizapp/src/model345-peg.cpp
+++ b/mgizapp/src/model345-peg.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -30,162 +30,170 @@ bool makeOneMoveSwap(const alignment&x,const alignment&y,set<OneMoveSwap>&soms)
int count=0;
Vector<int> positions(4);
assert(x.get_m()==y.get_m());
- for(PositionIndex j=1;j<=x.get_m();j++)
- if(x(j)!=y(j))
- {
- if(count==4)
- return 0;
- positions[count]=j;
- count++;
- }
+ for(PositionIndex j=1; j<=x.get_m(); j++)
+ if(x(j)!=y(j)) {
+ if(count==4)
+ return 0;
+ positions[count]=j;
+ count++;
+ }
assert(count>0);
- if(count==1)
- {
- oms.type=1;
+ if(count==1) {
+ oms.type=1;
+ oms.a=positions[0];
+ oms.b=y(positions[0]);
+ soms.insert(oms);
+ for(unsigned int j=1; j<=x.get_m(); ++j) {
+ if( int(j)!=positions[0]&&y(j)==y(positions[0])) {
+ oms.type=3;
+ oms.a=j;
+ oms.b=x(positions[0]);
+ soms.insert(oms);
+ }
+ }
+ for(unsigned int j=1; j<=x.get_m(); ++j) {
+ if( int(j)!=positions[0]&&x(j)==x(positions[0])) {
+ oms.type=2;
+ oms.a=positions[0];
+ oms.b=j;
+ if( oms.b<oms.a)swap(oms.b,oms.a);
+ soms.insert(oms);
+ }
+ }
+ return 1;
+ } else if(count==2) {
+ if(x(positions[0])==y(positions[1]) && x(positions[1])==y(positions[0])) {
+ oms.type=4;
oms.a=positions[0];
- oms.b=y(positions[0]);
+ oms.b=positions[1];
soms.insert(oms);
- for(unsigned int j=1;j<=x.get_m();++j)
- {
- if( int(j)!=positions[0]&&y(j)==y(positions[0]))
- {
- oms.type=3;
- oms.a=j;
- oms.b=x(positions[0]);
- soms.insert(oms);
- }
- }
- for(unsigned int j=1;j<=x.get_m();++j)
- {
- if( int(j)!=positions[0]&&x(j)==x(positions[0]))
- {
- oms.type=2;
- oms.a=positions[0];
- oms.b=j;
- if( oms.b<oms.a)swap(oms.b,oms.a);
- soms.insert(oms);
- }
- }
- return 1;
- }
- else if(count==2)
- {
- if(x(positions[0])==y(positions[1]) && x(positions[1])==y(positions[0]))
- {
- oms.type=4;
- oms.a=positions[0];
- oms.b=positions[1];
- soms.insert(oms);
- for(unsigned int j=1;j<=x.get_m();++j)
- {
- if( int(j)!=positions[0]&&y(j)==y(positions[0]))
- {
- oms.type=2;oms.a=j;oms.b=positions[1];if( oms.b<oms.a)swap(oms.b,oms.a);soms.insert(oms);
- }
- if( int(j)!=positions[1]&&y(j)==y(positions[1]))
- {
- oms.type=2;oms.a=j;oms.b=positions[0];if( oms.b<oms.a)swap(oms.b,oms.a);soms.insert(oms);
- }
- }
- }
- else if(x(positions[0])==y(positions[1]) )
- {
- oms.type=3;
- oms.a=positions[0];
- oms.b=x(positions[1]);
- soms.insert(oms);
- oms.type=2;
- oms.a=positions[0];
- oms.b=positions[1];
- soms.insert(oms);
- }
- else if( x(positions[1])==y(positions[0]) )
- {
- oms.type=3;
- oms.a=positions[1];
- oms.b=x(positions[0]);
- soms.insert(oms);
- oms.type=2;
- oms.a=positions[0];
- oms.b=positions[1];
- soms.insert(oms);
- }
+ for(unsigned int j=1; j<=x.get_m(); ++j) {
+ if( int(j)!=positions[0]&&y(j)==y(positions[0])) {
+ oms.type=2;
+ oms.a=j;
+ oms.b=positions[1];
+ if( oms.b<oms.a)swap(oms.b,oms.a);
+ soms.insert(oms);
+ }
+ if( int(j)!=positions[1]&&y(j)==y(positions[1])) {
+ oms.type=2;
+ oms.a=j;
+ oms.b=positions[0];
+ if( oms.b<oms.a)swap(oms.b,oms.a);
+ soms.insert(oms);
+ }
+ }
+ } else if(x(positions[0])==y(positions[1]) ) {
oms.type=3;
oms.a=positions[0];
+ oms.b=x(positions[1]);
+ soms.insert(oms);
+ oms.type=2;
+ oms.a=positions[0];
+ oms.b=positions[1];
+ soms.insert(oms);
+ } else if( x(positions[1])==y(positions[0]) ) {
+ oms.type=3;
+ oms.a=positions[1];
oms.b=x(positions[0]);
soms.insert(oms);
+ oms.type=2;
+ oms.a=positions[0];
+ oms.b=positions[1];
+ soms.insert(oms);
+ }
+ oms.type=3;
+ oms.a=positions[0];
+ oms.b=x(positions[0]);
+ soms.insert(oms);
+ oms.a=positions[1];
+ oms.b=x(positions[1]);
+ soms.insert(oms);
+ return 1;
+ } else if( count==3 ) {
+ // three differences and three different numbers
+ Vector<int> xx(3),yy(3);
+ xx[0]=x(positions[0]);
+ xx[1]=x(positions[1]);
+ xx[2]=x(positions[2]);
+ yy[0]=y(positions[0]);
+ yy[1]=y(positions[1]);
+ yy[2]=y(positions[2]);
+ sort(xx.begin(),xx.end());
+ sort(yy.begin(),yy.end());
+ if(xx==yy) {
+ oms.type=2;
+ oms.a=positions[0];
+ oms.b=positions[1];
+ soms.insert(oms);
+ oms.type=2;
+ oms.a=positions[0];
+ oms.b=positions[2];
+ soms.insert(oms);
+ oms.type=2;
oms.a=positions[1];
- oms.b=x(positions[1]);
+ oms.b=positions[2];
soms.insert(oms);
- return 1;
+ } else {
+ //cout << "HERE.\n";
+ if( x(positions[0])==y(positions[1])&&x(positions[1])==y(positions[0]) ) {
+ oms.type=2;
+ oms.a=positions[0];
+ oms.b=positions[1];
+ if( oms.b<oms.a) swap(oms.b,oms.a);
+ soms.insert(oms);
+ oms.type=3;
+ oms.a=positions[2];
+ oms.b=x(positions[2]);
+ soms.insert(oms);
+ }
+ if( x(positions[2])==y(positions[1])&&x(positions[1])==y(positions[2]) ) {
+ oms.type=2;
+ oms.a=positions[2];
+ oms.b=positions[1];
+ if( oms.b<oms.a) swap(oms.b,oms.a);
+ soms.insert(oms);
+ oms.type=3;
+ oms.a=positions[0];
+ oms.b=x(positions[0]);
+ soms.insert(oms);
+ }
+ if( x(positions[0])==y(positions[2])&&x(positions[2])==y(positions[0]) ) {
+ oms.type=2;
+ oms.a=positions[0];
+ oms.b=positions[2];
+ if( oms.b<oms.a) swap(oms.b,oms.a);
+ soms.insert(oms);
+ oms.type=3;
+ oms.a=positions[1];
+ oms.b=x(positions[1]);
+ soms.insert(oms);
+ }
}
- else if( count==3 )
- { // three differences and three different numbers
- Vector<int> xx(3),yy(3);
- xx[0]=x(positions[0]);xx[1]=x(positions[1]);xx[2]=x(positions[2]);
- yy[0]=y(positions[0]);yy[1]=y(positions[1]);yy[2]=y(positions[2]);
- sort(xx.begin(),xx.end());
- sort(yy.begin(),yy.end());
- if(xx==yy)
- {
- oms.type=2;oms.a=positions[0];oms.b=positions[1];soms.insert(oms);
- oms.type=2;oms.a=positions[0];oms.b=positions[2];soms.insert(oms);
- oms.type=2;oms.a=positions[1];oms.b=positions[2];soms.insert(oms);
- }
- else
- {
- //cout << "HERE.\n";
- if( x(positions[0])==y(positions[1])&&x(positions[1])==y(positions[0]) )
- {
- oms.type=2;oms.a=positions[0];oms.b=positions[1];
- if( oms.b<oms.a) swap(oms.b,oms.a);
- soms.insert(oms);
- oms.type=3;oms.a=positions[2];oms.b=x(positions[2]);soms.insert(oms);
- }
- if( x(positions[2])==y(positions[1])&&x(positions[1])==y(positions[2]) )
- {
- oms.type=2;oms.a=positions[2];oms.b=positions[1];
- if( oms.b<oms.a) swap(oms.b,oms.a);
- soms.insert(oms);
- oms.type=3;oms.a=positions[0];oms.b=x(positions[0]);soms.insert(oms);
- }
- if( x(positions[0])==y(positions[2])&&x(positions[2])==y(positions[0]) )
- {
- oms.type=2;oms.a=positions[0];oms.b=positions[2];
- if( oms.b<oms.a) swap(oms.b,oms.a);
- soms.insert(oms);
- oms.type=3;oms.a=positions[1];oms.b=x(positions[1]);soms.insert(oms);
- }
- }
- return 1;
+ return 1;
+ } else if(count==4) {
+ Vector<int> xx(4),yy(4);
+ for(int i=0; i<4; ++i) {
+ xx[i]=x(positions[i]);
+ yy[i]=y(positions[i]);
}
- else if(count==4)
- {
- Vector<int> xx(4),yy(4);
- for(int i=0;i<4;++i)
- {
- xx[i]=x(positions[i]);
- yy[i]=y(positions[i]);
- }
- sort(xx.begin(),xx.end());
- sort(yy.begin(),yy.end());
- if(xx==yy)
- {
- oms.type=2;
- for(int j1=0;j1<4;j1++)
- for(int j2=j1+1;j2<4;j2++)
- {
- if(x(positions[j1])!=x(positions[j2])&&
- x(positions[j1])==y(positions[j2])&&
- x(positions[j2])==y(positions[j1]))
- {
- oms.type=2;oms.a=positions[j1];oms.b=positions[j2];
- soms.insert(oms);
- }
- }
- }
- return 1;
+ sort(xx.begin(),xx.end());
+ sort(yy.begin(),yy.end());
+ if(xx==yy) {
+ oms.type=2;
+ for(int j1=0; j1<4; j1++)
+ for(int j2=j1+1; j2<4; j2++) {
+ if(x(positions[j1])!=x(positions[j2])&&
+ x(positions[j1])==y(positions[j2])&&
+ x(positions[j2])==y(positions[j1])) {
+ oms.type=2;
+ oms.a=positions[j1];
+ oms.b=positions[j2];
+ soms.insert(oms);
+ }
+ }
}
- else
+ return 1;
+ } else
return 0;
}
diff --git a/mgizapp/src/model3_viterbi.cpp b/mgizapp/src/model3_viterbi.cpp
index 9398116..1ec154d 100644
--- a/mgizapp/src/model3_viterbi.cpp
+++ b/mgizapp/src/model3_viterbi.cpp
@@ -8,14 +8,14 @@
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -31,79 +31,82 @@ typedef hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignme
#endif
LogProb model3::prob_of_target_and_alignment_given_source(Vector<WordIndex>& A,
- Vector<WordIndex>& Fert, tmodel<COUNT, PROB>& tTable,
- Vector<WordIndex>& fs, Vector<WordIndex>& es) {
- LogProb total = 1.0;
- LogProb temp = 0.0;
- const LogProb zero = 0.0;
- WordIndex l = es.size()-1, m = fs.size()-1;
- WordIndex i, j;
-
- total *= pow(double(1-p1), m-2.0 * Fert[0]) * pow(double(p1), double(Fert[0]));
- if (total == 0)
- return (zero);
- for (i = 1; i <= Fert[0]; i++) { // loop caculates m-fert[0] choose fert[0]
- total *= double(m - Fert[0] - i + 1) / i;
- if (total == 0)
- return (zero);
- }
- for (i = 1; i <= l; i++) { // this loop calculates fertilities term
- total *= double(nTable.getValue(es[i], Fert[i])) * (LogProb) factorial(Fert[i]);
- if (total == 0)
- return (zero);
- }
- for (j = 1; j <= m; j++) {
- // temp = tTable.getValue(es[A[j]], fs[j]) ;
- temp = double(tTable.getProb(es[A[j]], fs[j]));
- total *= temp;
- if (0 != A[j])
- total *= double(dTable.getValue(j, A[j], l, m));
- if (total == 0)
- return (zero);
- }
- return (total);
+ Vector<WordIndex>& Fert, tmodel<COUNT, PROB>& tTable,
+ Vector<WordIndex>& fs, Vector<WordIndex>& es)
+{
+ LogProb total = 1.0;
+ LogProb temp = 0.0;
+ const LogProb zero = 0.0;
+ WordIndex l = es.size()-1, m = fs.size()-1;
+ WordIndex i, j;
+
+ total *= pow(double(1-p1), m-2.0 * Fert[0]) * pow(double(p1), double(Fert[0]));
+ if (total == 0)
+ return (zero);
+ for (i = 1; i <= Fert[0]; i++) { // loop caculates m-fert[0] choose fert[0]
+ total *= double(m - Fert[0] - i + 1) / i;
+ if (total == 0)
+ return (zero);
+ }
+ for (i = 1; i <= l; i++) { // this loop calculates fertilities term
+ total *= double(nTable.getValue(es[i], Fert[i])) * (LogProb) factorial(Fert[i]);
+ if (total == 0)
+ return (zero);
+ }
+ for (j = 1; j <= m; j++) {
+ // temp = tTable.getValue(es[A[j]], fs[j]) ;
+ temp = double(tTable.getProb(es[A[j]], fs[j]));
+ total *= temp;
+ if (0 != A[j])
+ total *= double(dTable.getValue(j, A[j], l, m));
+ if (total == 0)
+ return (zero);
+ }
+ return (total);
}
LogProb model3::prob_of_target_given_source(tmodel<COUNT, PROB>& tTable,
- Vector<WordIndex>& fs, Vector<WordIndex>& es) {
-
- WordIndex x, y;
- LogProb total = 0;
- // WordIndex l = es.size(), m = fs.size();
- WordIndex l = es.size()-1, m = fs.size()-1;
- Vector<WordIndex> A(fs.size(),/*-1*/0);
- Vector<WordIndex> Fert(es.size(),0);
- WordIndex i, j;
-
- for (x = 0; x < pow(l+1.0, double(m)) ; x++) { // For all possible alignmets A
- y = x;
- // for (j = 1 ; j < m ; j++){
- for (j = 1; j <= m; j++) {
- A[j] = y % (l+1);
- y /= (l+1);
- }
- // for(i = 0 ; i < l ; i++)
- for (i = 0; i <= l; i++)
- Fert[i] = 0;
- // for (j = 1 ; j < m ; j++)
- for (j = 1; j <= m; j++)
- Fert[A[j]]++;
- // if (2 * Fert[0] < m){
- if (2 * Fert[0] <= m) { /* consider alignments that has Fert[0] less than
- half the length of french sentence */
- total += prob_of_target_and_alignment_given_source(A, Fert, tTable,
- fs, es);
- }
- }
- return (total);
+ Vector<WordIndex>& fs, Vector<WordIndex>& es)
+{
+
+ WordIndex x, y;
+ LogProb total = 0;
+ // WordIndex l = es.size(), m = fs.size();
+ WordIndex l = es.size()-1, m = fs.size()-1;
+ Vector<WordIndex> A(fs.size(),/*-1*/0);
+ Vector<WordIndex> Fert(es.size(),0);
+ WordIndex i, j;
+
+ for (x = 0; x < pow(l+1.0, double(m)) ; x++) { // For all possible alignmets A
+ y = x;
+ // for (j = 1 ; j < m ; j++){
+ for (j = 1; j <= m; j++) {
+ A[j] = y % (l+1);
+ y /= (l+1);
+ }
+ // for(i = 0 ; i < l ; i++)
+ for (i = 0; i <= l; i++)
+ Fert[i] = 0;
+ // for (j = 1 ; j < m ; j++)
+ for (j = 1; j <= m; j++)
+ Fert[A[j]]++;
+ // if (2 * Fert[0] < m){
+ if (2 * Fert[0] <= m) {
+ /* consider alignments that has Fert[0] less than
+ half the length of french sentence */
+ total += prob_of_target_and_alignment_given_source(A, Fert, tTable,
+ fs, es);
+ }
+ }
+ return (total);
}
LogProb model3::scoreOfMove(Vector<WordIndex>& es, Vector<WordIndex>& fs,
- Vector<WordIndex>& A, Vector<WordIndex>& Fert,
- tmodel<COUNT, PROB>& tTable, WordIndex j, WordIndex i)
+ Vector<WordIndex>& A, Vector<WordIndex>& Fert,
+ tmodel<COUNT, PROB>& tTable, WordIndex j, WordIndex i)
// returns the scaling factor of the original score if A[j] is linked to
// i, no change is really made to A
-// but the score is calculated if the move is to be taken (i.e.
+// but the score is calculated if the move is to be taken (i.e.
// no side effects on Alignment A nor its Fertility Fert
// If the value of the scaling factor is:
// 1: then the score of the new alignment if the move is taken will
@@ -112,31 +115,31 @@ LogProb model3::scoreOfMove(Vector<WordIndex>& es, Vector<WordIndex>& fs,
// 2.0: the new score will be twice as much.
//
{
- // LogProb score;
- LogProb change;
- WordIndex m, l;
-
- m = fs.size() - 1;
- l = es.size() - 1;
-
- if (A[j] == i)
- // return(original_score);
- return (1);
- else if (A[j] == 0) { // a move from position zero to something else
- change = double(p0*p0)/p1 * (double((Fert[0]*(m-Fert[0]+1))) / ((m-2*Fert[0]+1)*(m-2*Fert[0]
- +2))) * (Fert[i]+1) * double(nTable.getValue(es[i], Fert[i]+1)) / double(nTable.getValue(es[i], Fert[i])) * double(tTable.getProb(es[i], fs[j])) / double(tTable.getProb(es[A[j]], fs[j])) * double(dTable.getValue(j, i, l, m));
- } else if (i == 0) { // a move to position zero
- change= ((double(p1) / (p0*p0)) * (double((m-2*Fert[0])*(m-2*Fert[0]-1))/((Fert[0]+1)*(m-Fert[0]))) * (double(1)/Fert[A[j]]) * double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) / double(nTable.getValue(es[A[j]], Fert[A[j]]))* double(tTable.getProb(es[i], fs[j])) / double(tTable.getProb(es[A[j]], fs[j])) * 1.0 / double(dTable.getValue(j, A[j], l, m)));
- } else { // a move that does not involve position zero
- change = ((double(Fert[i]+1)/Fert[A[j]]) * double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) / double(nTable.getValue(es[A[j]], Fert[A[j]])) * double(nTable.getValue(es[i], Fert[i]+1)) / double(nTable.getValue(es[i], Fert[i])) * double(tTable.getProb(es[i], fs[j]))/ double(tTable.getProb(es[A[j]], fs[j])) * double(dTable.getValue(j, i, l, m))/ double(dTable.getValue(j, A[j], l, m)));
- }
- return (change);
+ // LogProb score;
+ LogProb change;
+ WordIndex m, l;
+
+ m = fs.size() - 1;
+ l = es.size() - 1;
+
+ if (A[j] == i)
+ // return(original_score);
+ return (1);
+ else if (A[j] == 0) { // a move from position zero to something else
+ change = double(p0*p0)/p1 * (double((Fert[0]*(m-Fert[0]+1))) / ((m-2*Fert[0]+1)*(m-2*Fert[0]
+ +2))) * (Fert[i]+1) * double(nTable.getValue(es[i], Fert[i]+1)) / double(nTable.getValue(es[i], Fert[i])) * double(tTable.getProb(es[i], fs[j])) / double(tTable.getProb(es[A[j]], fs[j])) * double(dTable.getValue(j, i, l, m));
+ } else if (i == 0) { // a move to position zero
+ change= ((double(p1) / (p0*p0)) * (double((m-2*Fert[0])*(m-2*Fert[0]-1))/((Fert[0]+1)*(m-Fert[0]))) * (double(1)/Fert[A[j]]) * double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) / double(nTable.getValue(es[A[j]], Fert[A[j]]))* double(tTable.getProb(es[i], fs[j])) / double(tTable.getProb(es[A[j]], fs[j])) * 1.0 / double(dTable.getValue(j, A[j], l, m)));
+ } else { // a move that does not involve position zero
+ change = ((double(Fert[i]+1)/Fert[A[j]]) * double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) / double(nTable.getValue(es[A[j]], Fert[A[j]])) * double(nTable.getValue(es[i], Fert[i]+1)) / double(nTable.getValue(es[i], Fert[i])) * double(tTable.getProb(es[i], fs[j]))/ double(tTable.getProb(es[A[j]], fs[j])) * double(dTable.getValue(j, i, l, m))/ double(dTable.getValue(j, A[j], l, m)));
+ }
+ return (change);
}
LogProb model3::scoreOfSwap(Vector<WordIndex>& es, Vector<WordIndex>& fs,
- Vector<WordIndex>& A, tmodel<COUNT, PROB>& tTable, int j1, int j2)
-// returns the scaling factor of the original score if the swap to
-// take place,
+ Vector<WordIndex>& A, tmodel<COUNT, PROB>& tTable, int j1, int j2)
+// returns the scaling factor of the original score if the swap to
+// take place,
// No side effects here (none of the parameters passed is changed!
// (i.e. the alignment A is not really changed)
// If the value of the scaling factor is:
@@ -146,421 +149,423 @@ LogProb model3::scoreOfSwap(Vector<WordIndex>& es, Vector<WordIndex>& fs,
// 2.0: the new score will be twice as much.
//
{
- LogProb score;
- WordIndex i1, i2, m, l;
-
- m = fs.size() - 1;
- l = es.size() - 1;
- if (j1 == j2 || A[j1] == A[j2]) // if swapping same position return ratio 1
- return (1);
- else {
- i1 = A[j1];
- i2 = A[j2];
- score = double(tTable.getProb(es[i2], fs[j1]))/double(tTable.getProb(es[i1], fs[j1])) * double(tTable.getProb(es[i1], fs[j2]))/double(tTable.getProb(es[i2], fs[j2]));
- if (i1 != 0) {
- score *= double(dTable.getValue(j2, i1, l, m))/double(dTable.getValue(j1, i1, l, m));
- }
- if (i2 != 0) {
- score *= double(dTable.getValue(j1, i2, l, m))/double(dTable.getValue(j2, i2, l, m));
- }
- return (score);
- }
+ LogProb score;
+ WordIndex i1, i2, m, l;
+
+ m = fs.size() - 1;
+ l = es.size() - 1;
+ if (j1 == j2 || A[j1] == A[j2]) // if swapping same position return ratio 1
+ return (1);
+ else {
+ i1 = A[j1];
+ i2 = A[j2];
+ score = double(tTable.getProb(es[i2], fs[j1]))/double(tTable.getProb(es[i1], fs[j1])) * double(tTable.getProb(es[i1], fs[j2]))/double(tTable.getProb(es[i2], fs[j2]));
+ if (i1 != 0) {
+ score *= double(dTable.getValue(j2, i1, l, m))/double(dTable.getValue(j1, i1, l, m));
+ }
+ if (i2 != 0) {
+ score *= double(dTable.getValue(j1, i2, l, m))/double(dTable.getValue(j2, i2, l, m));
+ }
+ return (score);
+ }
}
void model3::hillClimb(Vector<WordIndex>& es, Vector<WordIndex>& fs,
- Vector<WordIndex>& A, Vector<WordIndex>& Fert, LogProb& best_score,
- tmodel<COUNT, PROB>& tTable, int = -1, int j_peg = -1)
+ Vector<WordIndex>& A, Vector<WordIndex>& Fert, LogProb& best_score,
+ tmodel<COUNT, PROB>& tTable, int = -1, int j_peg = -1)
// Hill climbing given alignment A .
// Alignment A will be updated and also best_score
// if no pegging is needed i_peg == -1, and j_peg == -1
{
- WordIndex i, j, l, m, j1, old_i;
- LogProb change;
- bool local_minima;
- int level = 0;
- LogProb best_change_so_far, best_change;
- Vector<WordIndex> A_so_far;
- Vector<WordIndex> Fert_so_far;
-
- l = es.size() - 1;
- m = fs.size() - 1;
- best_change = 1; // overall scaling factor (i.e. from the begining of climb
- do {
- best_change_so_far = 1; // best scaling factor of this level of hill climb
- local_minima = true;
- for (j = 1; j <= m; j++) {
- if (int(j) != j_peg) { // make sure not to change the pegged link
- for (j1 = j + 1; j1 <= m; j1++) {
- // for all possible swaps
- // make sure you are not swapping at same position
- if ((A[j] != A[j1]) && (int(j1) != j_peg)) {
- // change = scoreOfSwap(es, fs, A, best_score, tTable, j, j1);
- change = scoreOfSwap(es, fs, A, tTable, j, j1);
- if (change > best_change_so_far) { // if better alignment found, keep it
- local_minima = false;
- best_change_so_far = change;
- A_so_far = A;
- Fert_so_far = Fert;
- old_i = A_so_far[j];
- A_so_far[j] = A_so_far[j1];
- A_so_far[j1] = old_i;
- } // end of if (change > best_change_so_far)
- } // end of if (A[j] != A[j1] ..)
- } // of for (j1 = j+1 ....)
- // for (i = 0 ; i < l ; i++){ // all possible moves
- for (i = 0; i <= l; i++) { // all possible moves
- if (i != A[j]) { // make sure not to move to same position
- if (i != 0 || (m >= 2 * (Fert[0]+1))) { // if moving to NULL word
- // (pos 0), make sure not to violate the fertility restriction
- // i.e. NULL can not take more than half the target words
- // change = scoreOfMove(es, fs, A, Fert, best_score, tTable, j, i);
- change = scoreOfMove(es, fs, A, Fert, tTable, j, i);
- if (change > best_change_so_far) { // if better alignment found, keep it
- best_change_so_far = change;
- local_minima = false;
- A_so_far = A;
- Fert_so_far = Fert;
- old_i = A_so_far[j];
- A_so_far[j] = i;
- Fert_so_far[old_i]--;
- Fert_so_far[i]++;
- } // end of if (change > best_change_so_far)
- } // end of if ((i!=0) ...
- } // end of if (i != A[j] )
- } // end of for (i = 0 ; ....)
- } // end of if(j != j_peg)
- } // end of for (j = 1 ; ...)
- level++;
- if (!local_minima) {
- if (best_change_so_far > 1) { // if current chage is improving
- A = A_so_far;
- Fert = Fert_so_far;
- best_change *= best_change_so_far;
- } else {
- local_minima = true;
- }
- } // end of if(!local_minima)
- if (level> 15)
- cerr << ".";
- } while (local_minima == false);
- if (level > 15)
- cerr << "\nHill Climb Level: " << level << " score: scaling old: "
- <<(best_score*best_change);
- best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs,
- es);
- if (level>15)
- cerr << " using new calc: " << best_score << '\n';
+ WordIndex i, j, l, m, j1, old_i;
+ LogProb change;
+ bool local_minima;
+ int level = 0;
+ LogProb best_change_so_far, best_change;
+ Vector<WordIndex> A_so_far;
+ Vector<WordIndex> Fert_so_far;
+
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ best_change = 1; // overall scaling factor (i.e. from the begining of climb
+ do {
+ best_change_so_far = 1; // best scaling factor of this level of hill climb
+ local_minima = true;
+ for (j = 1; j <= m; j++) {
+ if (int(j) != j_peg) { // make sure not to change the pegged link
+ for (j1 = j + 1; j1 <= m; j1++) {
+ // for all possible swaps
+ // make sure you are not swapping at same position
+ if ((A[j] != A[j1]) && (int(j1) != j_peg)) {
+ // change = scoreOfSwap(es, fs, A, best_score, tTable, j, j1);
+ change = scoreOfSwap(es, fs, A, tTable, j, j1);
+ if (change > best_change_so_far) { // if better alignment found, keep it
+ local_minima = false;
+ best_change_so_far = change;
+ A_so_far = A;
+ Fert_so_far = Fert;
+ old_i = A_so_far[j];
+ A_so_far[j] = A_so_far[j1];
+ A_so_far[j1] = old_i;
+ } // end of if (change > best_change_so_far)
+ } // end of if (A[j] != A[j1] ..)
+ } // of for (j1 = j+1 ....)
+ // for (i = 0 ; i < l ; i++){ // all possible moves
+ for (i = 0; i <= l; i++) { // all possible moves
+ if (i != A[j]) { // make sure not to move to same position
+ if (i != 0 || (m >= 2 * (Fert[0]+1))) { // if moving to NULL word
+ // (pos 0), make sure not to violate the fertility restriction
+ // i.e. NULL can not take more than half the target words
+ // change = scoreOfMove(es, fs, A, Fert, best_score, tTable, j, i);
+ change = scoreOfMove(es, fs, A, Fert, tTable, j, i);
+ if (change > best_change_so_far) { // if better alignment found, keep it
+ best_change_so_far = change;
+ local_minima = false;
+ A_so_far = A;
+ Fert_so_far = Fert;
+ old_i = A_so_far[j];
+ A_so_far[j] = i;
+ Fert_so_far[old_i]--;
+ Fert_so_far[i]++;
+ } // end of if (change > best_change_so_far)
+ } // end of if ((i!=0) ...
+ } // end of if (i != A[j] )
+ } // end of for (i = 0 ; ....)
+ } // end of if(j != j_peg)
+ } // end of for (j = 1 ; ...)
+ level++;
+ if (!local_minima) {
+ if (best_change_so_far > 1) { // if current chage is improving
+ A = A_so_far;
+ Fert = Fert_so_far;
+ best_change *= best_change_so_far;
+ } else {
+ local_minima = true;
+ }
+ } // end of if(!local_minima)
+ if (level> 15)
+ cerr << ".";
+ } while (local_minima == false);
+ if (level > 15)
+ cerr << "\nHill Climb Level: " << level << " score: scaling old: "
+ <<(best_score*best_change);
+ best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs,
+ es);
+ if (level>15)
+ cerr << " using new calc: " << best_score << '\n';
}
void model3::findBestAlignment(Vector<WordIndex>& es, Vector<WordIndex>& fs,
- Vector<WordIndex>& A, Vector<WordIndex>& Fert, LogProb& best_score,
- /*tmodel<COUNT, PROB>& tTable,
- amodel<PROB>& aTable, */
- int i_peg = -1, int j_peg = -1)
+ Vector<WordIndex>& A, Vector<WordIndex>& Fert, LogProb& best_score,
+ /*tmodel<COUNT, PROB>& tTable,
+ amodel<PROB>& aTable, */
+ int i_peg = -1, int j_peg = -1)
// This finds the best Model2 alignment (i.e. no fertilities stuff) in A
// for the given sentence pair. Its score is returned in A. Its fertility
-// info in Fert.
+// info in Fert.
// if j_peg == -1 && i_peg == -1 then No pegging is performed.
{
- WordIndex i, j, l, m, best_i=0;
- LogProb temp, score, ss;
-
- l = es.size() - 1;
- m = fs.size() - 1;
- for (i=0; i <= l; i++)
- Fert[i] = 0;
- ss = 1;
- if ((j_peg != -1) && (i_peg != -1)) { // if you're doing pegging
- A[j_peg] = i_peg;
- Fert[i_peg] = 1;
- ss *= double(tTable.getProb(es[i_peg], fs[j_peg])) * double(aTable.getValue(i_peg, j_peg, l, m));
- }
- for (j = 1; j <= m; j++) {
- if (int(j) != j_peg) {
- score = 0;
- for (i = 0; i <= l; i++) {
- // first make sure that connecting target word at pos j to source word
- // at pos i will not lead to a violation on Fertility restrictions
- // (e.g. maximum fertility for a word, max fertility for NULL word, etc)
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0]
- +1))) || (i != 0))) {
- temp = double(tTable.getProb(es[i], fs[j])) * double(aTable.getValue(i, j, l, m));
- if (temp > score) {
- best_i = i;
- score = temp;
- } // end of if (temp > score)
- } // end of if (((i == 0 ...)
- } // end of for (i= 0 ...)
- if (score == 0) {
- cerr << "WARNING: In searching for model2 best alignment\n ";
- cerr << "Nothing was set for target token " << fs[j]
- << "at position j: " << j << "\n";
- for (i = 0; i <= l; i++) {
- cerr << "i: " << i << "ttable("<<es[i]<<", "<<fs[j]<<") = "
- << tTable.getProb(es[i], fs[j]) << " atable(" << i
- <<", "<<j<<", "<< l<<", "<<m<<") = "
- << aTable.getValue(i, j, l, m) << " product "
- << double(tTable.getProb(es[i], fs[j])) * double(aTable.getValue(i, j, l, m)) << '\n';
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2
- *(Fert[0]+1))) || (i != 0)))
- cerr <<"Passed fertility condition \n";
- else
- cerr <<"Failed fertility condition \n";
- }
-
- } // end of if (score == 0)
- else {
- Fert[best_i]++;
- A[j] = best_i;
- }
- ss *= score;
- } // end of if (j != j_peg)
- } // end of for (j == 1 ; ...)
- if (ss <= 0) {
- cerr
- << "WARNING: Model2 viterbi alignment has zero score for sentence pair:\n";
- printSentencePair(es, fs, cerr);
- }
- best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs,
- es);
+ WordIndex i, j, l, m, best_i=0;
+ LogProb temp, score, ss;
+
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ for (i=0; i <= l; i++)
+ Fert[i] = 0;
+ ss = 1;
+ if ((j_peg != -1) && (i_peg != -1)) { // if you're doing pegging
+ A[j_peg] = i_peg;
+ Fert[i_peg] = 1;
+ ss *= double(tTable.getProb(es[i_peg], fs[j_peg])) * double(aTable.getValue(i_peg, j_peg, l, m));
+ }
+ for (j = 1; j <= m; j++) {
+ if (int(j) != j_peg) {
+ score = 0;
+ for (i = 0; i <= l; i++) {
+ // first make sure that connecting target word at pos j to source word
+ // at pos i will not lead to a violation on Fertility restrictions
+ // (e.g. maximum fertility for a word, max fertility for NULL word, etc)
+ if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0]
+ +1))) || (i != 0))) {
+ temp = double(tTable.getProb(es[i], fs[j])) * double(aTable.getValue(i, j, l, m));
+ if (temp > score) {
+ best_i = i;
+ score = temp;
+ } // end of if (temp > score)
+ } // end of if (((i == 0 ...)
+ } // end of for (i= 0 ...)
+ if (score == 0) {
+ cerr << "WARNING: In searching for model2 best alignment\n ";
+ cerr << "Nothing was set for target token " << fs[j]
+ << "at position j: " << j << "\n";
+ for (i = 0; i <= l; i++) {
+ cerr << "i: " << i << "ttable("<<es[i]<<", "<<fs[j]<<") = "
+ << tTable.getProb(es[i], fs[j]) << " atable(" << i
+ <<", "<<j<<", "<< l<<", "<<m<<") = "
+ << aTable.getValue(i, j, l, m) << " product "
+ << double(tTable.getProb(es[i], fs[j])) * double(aTable.getValue(i, j, l, m)) << '\n';
+ if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2
+ *(Fert[0]+1))) || (i != 0)))
+ cerr <<"Passed fertility condition \n";
+ else
+ cerr <<"Failed fertility condition \n";
+ }
+
+ } // end of if (score == 0)
+ else {
+ Fert[best_i]++;
+ A[j] = best_i;
+ }
+ ss *= score;
+ } // end of if (j != j_peg)
+ } // end of for (j == 1 ; ...)
+ if (ss <= 0) {
+ cerr
+ << "WARNING: Model2 viterbi alignment has zero score for sentence pair:\n";
+ printSentencePair(es, fs, cerr);
+ }
+ best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs,
+ es);
}
void model3::collectCountsOverAlignement(const Vector<WordIndex>& es,
- const Vector<WordIndex>& fs, const Vector<WordIndex>& A, LogProb score,
- float count) {
- WordIndex j, i, l, m;
- Vector<WordIndex> Fert(es.size(),0);
- l = es.size() - 1;
- m = fs.size() - 1;
- score *= LogProb(count);
- COUNT temp = COUNT(score) ;
- for (i=0; i <= l; i++)
- Fert[i] = 0;
- for (j = 1; j <= m; j++) {
- Fert[A[j]]++;
- tTable.incCount(es[A[j]], fs[j], temp);
- // tCountTable.getRef(es[A[j]], fs[j])+=score;
- if (A[j])
- dCountTable.addValue(j, A[j], l, m, temp);
- aCountTable.addValue(A[j], j, l, m, temp);
- }
- for (i = 0; i <= l; i++)
- nCountTable.addValue(es[i], Fert[i], temp);
- // p1_count += score * (LogProb) (Fert[0]) ;
- // p0_count += score * (LogProb) ((m - 2 * Fert[0])) ;
- p1_count += temp * (Fert[0]);
- p0_count += temp * ((m - 2 * Fert[0]));
+ const Vector<WordIndex>& fs, const Vector<WordIndex>& A, LogProb score,
+ float count)
+{
+ WordIndex j, i, l, m;
+ Vector<WordIndex> Fert(es.size(),0);
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ score *= LogProb(count);
+ COUNT temp = COUNT(score) ;
+ for (i=0; i <= l; i++)
+ Fert[i] = 0;
+ for (j = 1; j <= m; j++) {
+ Fert[A[j]]++;
+ tTable.incCount(es[A[j]], fs[j], temp);
+ // tCountTable.getRef(es[A[j]], fs[j])+=score;
+ if (A[j])
+ dCountTable.addValue(j, A[j], l, m, temp);
+ aCountTable.addValue(A[j], j, l, m, temp);
+ }
+ for (i = 0; i <= l; i++)
+ nCountTable.addValue(es[i], Fert[i], temp);
+ // p1_count += score * (LogProb) (Fert[0]) ;
+ // p0_count += score * (LogProb) ((m - 2 * Fert[0])) ;
+ p1_count += temp * (Fert[0]);
+ p0_count += temp * ((m - 2 * Fert[0]));
}
void model3::findAlignmentsNeighborhood(Vector<WordIndex>& es,
- Vector<WordIndex>& fs, LogProb&align_total_count,
- alignmodel&neighborhood, int i_peg = -1, int j_peg = -1)
+ Vector<WordIndex>& fs, LogProb&align_total_count,
+ alignmodel&neighborhood, int i_peg = -1, int j_peg = -1)
// Finding the Neigborhood of a best viterbi alignment after hill climbing
// if (i_peg == -1 and j_peg == -1, then No Pegging is done.
{
- LogProb best_score, score;
- WordIndex i, j, l, m, old_i, j1;
- Vector<WordIndex> A(fs.size(),0);
- Vector<WordIndex> Fert(es.size(),0);
- time_t it_st;
-
- best_score = 0;
- l = es.size() - 1;
- m = fs.size() - 1;
- findBestAlignment(es, fs, A, Fert, best_score, /*tTable, aTable,*/i_peg,
- j_peg);
- if (best_score == 0) {
- cerr
- << "WARNING: viterbi alignment score is zero for the following pair\n";
- printSentencePair(es, fs, cerr);
- }
- hillClimb(es, fs, A, Fert, best_score, tTable, i_peg, j_peg);
- if (best_score <= 0) {
- cerr
- << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- printSentencePair(es, fs, cerr);
- } else { // best_score > 0
- // if (2 * Fert[0] < m ){
- if (2*Fert[0] <= m) {
- /* consider alignments that has Fert[0] less than
- half the number of words in French sentence */
- if (neighborhood.insert(A, best_score)) {
- align_total_count += best_score;
- }
- } else { // else part is added for debugging / Yaser
- cerr
- << "WARNING:Best Alignment found violates Fertility requiremnets !!\n";
- for (i = 0; i <= l; i++)
- cerr << "Fert["<<i<<"] = "<< Fert[i] << "\n";
- for (j = 1; j <= m; j++) {
- cerr << "A["<<j<<"] = "<< A[j] <<"\n";
- }
- cerr << "Condition violated : 2 * Fert[0] <= m " << 2*Fert[0] <<"?"
- << m << "\n";
- } // end of added code for debugging // Yaser
- it_st = time(NULL) ;
-
- // Now find add all neighbors of the best alignmet to the collection
- for (j = 1; j <= m; j++) {
- for (j1 = j + 1; j1 <= m; j1++) { // all possible swaps
- if (A[j] != A[j1]) {// make sure you are not swapping at same position
- // score = best_score * scoreOfSwap(es, fs, A, best_score, tTable, j, j1);
- score = best_score * scoreOfSwap(es, fs, A, tTable, j, j1);
- // ADD A and its score to list of alig. to collect counts over
- if (2 * Fert[0] <= m && score > 0) {
- /* consider alignments that has Fert[0] less than
- half the number of words in French sentence */
- old_i = A[j];
- A[j] = A[j1];
- A[j1] = old_i;
- if (neighborhood.insert(A, score)) {
- align_total_count += score;
- }
- // restore original alignment
- old_i = A[j];
- A[j] = A[j1];
- A[j1] = old_i;
- }
- }
- }
- for (i = 0; i <= l; i++) { // all possible moves
- if (i != A[j]) { // make sure not to move to same position
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2
- *(Fert[0]+1))) || (i != 0))) {
- // consider legal alignments only
- score = best_score * scoreOfMove(es, fs, A, Fert,
- tTable, j, i);
- // ADD A and its score to list of alig. to collect counts over
- if (score > 0) {
- old_i = A[j];
- A[j] = i;
- Fert[old_i]--;
- Fert[i]++;
- // add to list of alignemts here ******************
- if (neighborhood.insert(A, score)) {
- align_total_count += score;
- }
- // now resotre alignment and fertilities to previoud values
- A[j] = old_i;
- Fert[old_i]++;
- Fert[i]--;
- } // end of if (score > 0)
- } // end of if (i == 0 ...)
- } // end of if (i != A[j])
- }// end of for(i = 0 ; ...)
- }// end of for (j = 1 ; ...)
- } // of else best_score <= 0
+ LogProb best_score, score;
+ WordIndex i, j, l, m, old_i, j1;
+ Vector<WordIndex> A(fs.size(),0);
+ Vector<WordIndex> Fert(es.size(),0);
+ time_t it_st;
+
+ best_score = 0;
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ findBestAlignment(es, fs, A, Fert, best_score, /*tTable, aTable,*/i_peg,
+ j_peg);
+ if (best_score == 0) {
+ cerr
+ << "WARNING: viterbi alignment score is zero for the following pair\n";
+ printSentencePair(es, fs, cerr);
+ }
+ hillClimb(es, fs, A, Fert, best_score, tTable, i_peg, j_peg);
+ if (best_score <= 0) {
+ cerr
+ << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
+ printSentencePair(es, fs, cerr);
+ } else { // best_score > 0
+ // if (2 * Fert[0] < m ){
+ if (2*Fert[0] <= m) {
+ /* consider alignments that has Fert[0] less than
+ half the number of words in French sentence */
+ if (neighborhood.insert(A, best_score)) {
+ align_total_count += best_score;
+ }
+ } else { // else part is added for debugging / Yaser
+ cerr
+ << "WARNING:Best Alignment found violates Fertility requiremnets !!\n";
+ for (i = 0; i <= l; i++)
+ cerr << "Fert["<<i<<"] = "<< Fert[i] << "\n";
+ for (j = 1; j <= m; j++) {
+ cerr << "A["<<j<<"] = "<< A[j] <<"\n";
+ }
+ cerr << "Condition violated : 2 * Fert[0] <= m " << 2*Fert[0] <<"?"
+ << m << "\n";
+ } // end of added code for debugging // Yaser
+ it_st = time(NULL) ;
+
+ // Now find add all neighbors of the best alignmet to the collection
+ for (j = 1; j <= m; j++) {
+ for (j1 = j + 1; j1 <= m; j1++) { // all possible swaps
+ if (A[j] != A[j1]) {// make sure you are not swapping at same position
+ // score = best_score * scoreOfSwap(es, fs, A, best_score, tTable, j, j1);
+ score = best_score * scoreOfSwap(es, fs, A, tTable, j, j1);
+ // ADD A and its score to list of alig. to collect counts over
+ if (2 * Fert[0] <= m && score > 0) {
+ /* consider alignments that has Fert[0] less than
+ half the number of words in French sentence */
+ old_i = A[j];
+ A[j] = A[j1];
+ A[j1] = old_i;
+ if (neighborhood.insert(A, score)) {
+ align_total_count += score;
+ }
+ // restore original alignment
+ old_i = A[j];
+ A[j] = A[j1];
+ A[j1] = old_i;
+ }
+ }
+ }
+ for (i = 0; i <= l; i++) { // all possible moves
+ if (i != A[j]) { // make sure not to move to same position
+ if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2
+ *(Fert[0]+1))) || (i != 0))) {
+ // consider legal alignments only
+ score = best_score * scoreOfMove(es, fs, A, Fert,
+ tTable, j, i);
+ // ADD A and its score to list of alig. to collect counts over
+ if (score > 0) {
+ old_i = A[j];
+ A[j] = i;
+ Fert[old_i]--;
+ Fert[i]++;
+ // add to list of alignemts here ******************
+ if (neighborhood.insert(A, score)) {
+ align_total_count += score;
+ }
+ // now resotre alignment and fertilities to previoud values
+ A[j] = old_i;
+ Fert[old_i]++;
+ Fert[i]--;
+ } // end of if (score > 0)
+ } // end of if (i == 0 ...)
+ } // end of if (i != A[j])
+ }// end of for(i = 0 ; ...)
+ }// end of for (j = 1 ; ...)
+ } // of else best_score <= 0
}
void model3::viterbi_loop(Perplexity& perp, Perplexity& viterbiPerp,
- sentenceHandler& sHandler1, bool dump_files, const char* alignfile,
- bool collect_counts, string model) {
- WordIndex i, j, l, m;
- ofstream of2;
- int pair_no;
- LogProb temp;
-
- if (dump_files)
- of2.open(alignfile);
- pair_no = 0; // sentence pair number
- // for each sentence pair in the corpus
- perp.clear() ; // clears cross_entrop & perplexity
- viterbiPerp.clear();
- sentPair sent;
- while (sHandler1.getNextSentence(sent)) {
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 1000) == 0)
- cerr <<sent.sentenceNo << '\n';
- time_t sent_s = time(NULL) ;
- pair_no++;
- l = es.size() - 1;
- m = fs.size() - 1;
-
- LogProb align_total_count=0;
- // LogProb best_score;
-
- Vector<WordIndex> viterbi_alignment;
- LogProb viterbi_score;
- alignmodel neighborhood;
- neighborhood.clear();
- align_total_count = 0;
- findAlignmentsNeighborhood(
- /*tTable, aTable,*//*p1_count, p0_count,*/es, fs,
- align_total_count, neighborhood) ;
- if (Peg) {
- for (i = 0; i <= l; i++)
- for (j = 1; j <= m; j++) {
- if ( (tTable.getProb(es[i], fs[j]) > PROB_SMOOTH)
- && (aTable.getValue(i, j, l, m) > PROB_SMOOTH)
- && (dTable.getValue(j, i, l, m) > PROB_SMOOTH))
- findAlignmentsNeighborhood(/*tTable, aTable,*//*p1_count,
+ sentenceHandler& sHandler1, bool dump_files, const char* alignfile,
+ bool collect_counts, string model)
+{
+ WordIndex i, j, l, m;
+ ofstream of2;
+ int pair_no;
+ LogProb temp;
+
+ if (dump_files)
+ of2.open(alignfile);
+ pair_no = 0; // sentence pair number
+ // for each sentence pair in the corpus
+ perp.clear() ; // clears cross_entrop & perplexity
+ viterbiPerp.clear();
+ sentPair sent;
+ while (sHandler1.getNextSentence(sent)) {
+ Vector<WordIndex>& es = sent.eSent;
+ Vector<WordIndex>& fs = sent.fSent;
+ const float count = sent.getCount();
+ if ((sent.sentenceNo % 1000) == 0)
+ cerr <<sent.sentenceNo << '\n';
+ time_t sent_s = time(NULL) ;
+ pair_no++;
+ l = es.size() - 1;
+ m = fs.size() - 1;
+
+ LogProb align_total_count=0;
+ // LogProb best_score;
+
+ Vector<WordIndex> viterbi_alignment;
+ LogProb viterbi_score;
+ alignmodel neighborhood;
+ neighborhood.clear();
+ align_total_count = 0;
+ findAlignmentsNeighborhood(
+ /*tTable, aTable,*//*p1_count, p0_count,*/es, fs,
+ align_total_count, neighborhood) ;
+ if (Peg) {
+ for (i = 0; i <= l; i++)
+ for (j = 1; j <= m; j++) {
+ if ( (tTable.getProb(es[i], fs[j]) > PROB_SMOOTH)
+ && (aTable.getValue(i, j, l, m) > PROB_SMOOTH)
+ && (dTable.getValue(j, i, l, m) > PROB_SMOOTH))
+ findAlignmentsNeighborhood(/*tTable, aTable,*//*p1_count,
p0_count, */es, fs, align_total_count, neighborhood, i,
- j);
- }
- }
- // Now Collect counts over saved neighborhoods
- viterbi_score = 0;
- if (Verbose)
- cerr << "\nCollecting counts over found alignments, total prob: "
- << align_total_count << "\n";
- alignment_hash::iterator align;
- int acount = 0;
- if (align_total_count == 0) {
- cerr << " WARNINIG: For the following sentence pair : \n";
- printSentencePair(es, fs, cerr);
- cerr << "The collection of alignments found have 0 probability!!\n";
- cerr << "No counts will be collected of it \n";
- } else {
- if (collect_counts) {
- for (align = neighborhood.begin(); align != neighborhood.end(); align++) {
- temp = (*align).second/align_total_count;
- collectCountsOverAlignement(/*tTable, aCountTable, */es,
- fs, /*p1_count,
+ j);
+ }
+ }
+ // Now Collect counts over saved neighborhoods
+ viterbi_score = 0;
+ if (Verbose)
+ cerr << "\nCollecting counts over found alignments, total prob: "
+ << align_total_count << "\n";
+ alignment_hash::iterator align;
+ int acount = 0;
+ if (align_total_count == 0) {
+ cerr << " WARNINIG: For the following sentence pair : \n";
+ printSentencePair(es, fs, cerr);
+ cerr << "The collection of alignments found have 0 probability!!\n";
+ cerr << "No counts will be collected of it \n";
+ } else {
+ if (collect_counts) {
+ for (align = neighborhood.begin(); align != neighborhood.end(); align++) {
+ temp = (*align).second/align_total_count;
+ collectCountsOverAlignement(/*tTable, aCountTable, */es,
+ fs, /*p1_count,
p0_count ,*/((*align).first), temp, count);
- acount++;
- if (viterbi_score < temp) {
- viterbi_alignment = ((*align).first);
- viterbi_score = temp;
- }
- }
- } // end of if (collect_counts)
- perp.addFactor(log(double(align_total_count)), count, l, m, 0);
- viterbiPerp.addFactor(log(double(viterbi_score)), count, l, m, 0);
-
- if (Verbose) {
- cerr << "Collected counts over "<<acount <<" (of " << pow(
- double(m), double(l+1)) <<") differnet alignments\n";
- cerr << "Bucket count of alignments hash: "
- << neighborhood.getHash().bucket_count()<< ", size "
- << neighborhood.getHash().size() << "\n";
- }
- } // end of else
- // write best alignment (viterbi) for this sentence pair to alignment file
- if (collect_counts) {
- if (viterbi_score <= 0) {
- cerr << "Viterbi Alignment for this pair have score zero!!\n";
- of2 << "\n\n";
- } else {
- if (dump_files)
- printAlignToFile(es, fs, Elist.getVocabList(),
- Flist.getVocabList(), of2, viterbi_alignment,
- pair_no, viterbi_score);
- addAL(viterbi_alignment, sent.sentenceNo, l);
- }
- } // end of if (collect_counts)
- double period = difftime(time(NULL), sent_s);
- if (Verbose)
- cerr << "processing this sentence pair took : " << period
- << " seconds\n";
-
- } /* of sentence pair E, F */
- sHandler1.rewind();
- errorReportAL(cerr, model);
- perp.record(model);
- viterbiPerp.record(model);
- if (dump_files)
- of2.close();
+ acount++;
+ if (viterbi_score < temp) {
+ viterbi_alignment = ((*align).first);
+ viterbi_score = temp;
+ }
+ }
+ } // end of if (collect_counts)
+ perp.addFactor(log(double(align_total_count)), count, l, m, 0);
+ viterbiPerp.addFactor(log(double(viterbi_score)), count, l, m, 0);
+
+ if (Verbose) {
+ cerr << "Collected counts over "<<acount <<" (of " << pow(
+ double(m), double(l+1)) <<") differnet alignments\n";
+ cerr << "Bucket count of alignments hash: "
+ << neighborhood.getHash().bucket_count()<< ", size "
+ << neighborhood.getHash().size() << "\n";
+ }
+ } // end of else
+ // write best alignment (viterbi) for this sentence pair to alignment file
+ if (collect_counts) {
+ if (viterbi_score <= 0) {
+ cerr << "Viterbi Alignment for this pair have score zero!!\n";
+ of2 << "\n\n";
+ } else {
+ if (dump_files)
+ printAlignToFile(es, fs, Elist.getVocabList(),
+ Flist.getVocabList(), of2, viterbi_alignment,
+ pair_no, viterbi_score);
+ addAL(viterbi_alignment, sent.sentenceNo, l);
+ }
+ } // end of if (collect_counts)
+ double period = difftime(time(NULL), sent_s);
+ if (Verbose)
+ cerr << "processing this sentence pair took : " << period
+ << " seconds\n";
+
+ } /* of sentence pair E, F */
+ sHandler1.rewind();
+ errorReportAL(cerr, model);
+ perp.record(model);
+ viterbiPerp.record(model);
+ if (dump_files)
+ of2.close();
}
diff --git a/mgizapp/src/model3_viterbi_with_tricks.cpp b/mgizapp/src/model3_viterbi_with_tricks.cpp
index 1596643..021a3d3 100644
--- a/mgizapp/src/model3_viterbi_with_tricks.cpp
+++ b/mgizapp/src/model3_viterbi_with_tricks.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -50,140 +50,141 @@ int PrintHillClimbWarning=0;
int PrintZeroScoreWarning=0;
-LogProb model3::viterbi_model2(const transpair_modelhmm&ef, alignment&output, int
+LogProb model3::viterbi_model2(const transpair_modelhmm&ef, alignment&output, int
#ifdef STORE_HMM_ALIGNMENTS
-pair_no
+ pair_no
#endif
-, int i_peg , int j_peg )const
+ , int i_peg , int j_peg )const
{
- static Vector<pair<alignment,LogProb> > viterbis;
- Vector<int>vit;
- int m=ef.get_m();
- int l=ef.get_l();
- double ret=0.0;
- //#define STORE_HMM_ALIGNMENTS
+ static Vector<pair<alignment,LogProb> > viterbis;
+ Vector<int>vit;
+ int m=ef.get_m();
+ int l=ef.get_l();
+ double ret=0.0;
+ //#define STORE_HMM_ALIGNMENTS
#ifdef STORE_HMM_ALIGNMENTS
- if( i_peg==-1 && j_peg==-1 && viterbis.size()>pair_no ){
- output=viterbis[pair_no].first;
- ret=viterbis[pair_no].second;
- massert( ret==HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply );
- } else{
- ret=HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply;
- for(int j=1;j<=m;j++){
- if( vit[j-1]+1>l )
- output.set(j,0);
- else
- output.set(j,vit[j-1]+1);
- massert( (j==j_peg&&int(output(j))==i_peg) || j_peg!=j);
- }
- if( i_peg==-1 && j_peg==-1 ){
- iassert(viterbis.size()==pair_no);
- viterbis.push_back(make_pair(output,ret));
- }
- }
-#else
+ if( i_peg==-1 && j_peg==-1 && viterbis.size()>pair_no ) {
+ output=viterbis[pair_no].first;
+ ret=viterbis[pair_no].second;
+ massert( ret==HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply );
+ } else {
ret=HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply;
- for(int j=1;j<=m;j++){
- if( vit[j-1]+1>l )
- output.set(j,0);
- else
- output.set(j,vit[j-1]+1);
- massert( (j==j_peg&&int(output(j))==i_peg) || j_peg!=j);
+ for(int j=1; j<=m; j++) {
+ if( vit[j-1]+1>l )
+ output.set(j,0);
+ else
+ output.set(j,vit[j-1]+1);
+ massert( (j==j_peg&&int(output(j))==i_peg) || j_peg!=j);
}
-#endif
- massert( j_peg==-1 || int(output(j_peg))==i_peg );
- if( j_peg!=-1 )
- massert(int(output(j_peg))==i_peg);
- if( output.valid() )
- return ret;
- else{
- return _viterbi_model2(ef,output,i_peg,j_peg);
+ if( i_peg==-1 && j_peg==-1 ) {
+ iassert(viterbis.size()==pair_no);
+ viterbis.push_back(make_pair(output,ret));
}
+ }
+#else
+ ret=HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply;
+ for(int j=1; j<=m; j++) {
+ if( vit[j-1]+1>l )
+ output.set(j,0);
+ else
+ output.set(j,vit[j-1]+1);
+ massert( (j==j_peg&&int(output(j))==i_peg) || j_peg!=j);
+ }
+#endif
+ massert( j_peg==-1 || int(output(j_peg))==i_peg );
+ if( j_peg!=-1 )
+ massert(int(output(j_peg))==i_peg);
+ if( output.valid() )
+ return ret;
+ else {
+ return _viterbi_model2(ef,output,i_peg,j_peg);
+ }
}
-LogProb model3::_viterbi_model2(const transpair_model2&ef, alignment&output, int i_peg, int j_peg)const{
- WordIndex best_i=0;
- LogProb ss=1;
- PositionIndex l = ef.get_l(), m=ef.get_m();
- Vector<WordIndex> Fert(l+1, (WordIndex)0);
- if ((j_peg != -1) && (i_peg != -1)){
- output.set(j_peg, i_peg);
- ss *= ef.get_t(i_peg, j_peg) * ef.get_a(i_peg, j_peg);
- if( ss==0 )
- cerr << "WARNING: already starting is zero: " << ef.get_t(i_peg, j_peg) << " " << ef.get_a(i_peg, j_peg) << '\n';
- }else
- ss=1;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg){
- LogProb score = 0 ;
- for (PositionIndex i = 0 ; i <= l ; i++){
- if( Fert[i]+1<MAX_FERTILITY && (i != 0 || m>=(2 * (Fert[0] + 1)))){
- LogProb temp = ef.get_t(i, j) * ef.get_a(i, j);
- if (temp > score ){
- best_i = i ;
- score = temp ;
- }
- }
- }
- if (score == 0){
- cerr << "WARNING: In searching for model2 best alignment\n";
- cerr << "Nothing was set for target token at position j: " << j << "\n";
- for (PositionIndex i = 0 ; i <= l ; i++){
- cerr << "i: " << i << "ttable("<<i<<", "<<j<<") = " <<
- ef.get_t(i, j) << " atable(" << i<<", "<<j<<", "<<
- l<<", "<<m<<") = "<< ef.get_a(i, j) << " product " <<
- ef.get_t(i, j) * ef.get_a(i, j) ;
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0]+1)))
- || (i != 0)))
- cerr <<"Passed fertility condition \n";
- else
- cerr <<"Failed fertility condition \n";
- }
- }else{
- output.set(j, best_i);
- Fert[best_i]++;
+LogProb model3::_viterbi_model2(const transpair_model2&ef, alignment&output, int i_peg, int j_peg)const
+{
+ WordIndex best_i=0;
+ LogProb ss=1;
+ PositionIndex l = ef.get_l(), m=ef.get_m();
+ Vector<WordIndex> Fert(l+1, (WordIndex)0);
+ if ((j_peg != -1) && (i_peg != -1)) {
+ output.set(j_peg, i_peg);
+ ss *= ef.get_t(i_peg, j_peg) * ef.get_a(i_peg, j_peg);
+ if( ss==0 )
+ cerr << "WARNING: already starting is zero: " << ef.get_t(i_peg, j_peg) << " " << ef.get_a(i_peg, j_peg) << '\n';
+ } else
+ ss=1;
+ for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg) {
+ LogProb score = 0 ;
+ for (PositionIndex i = 0 ; i <= l ; i++) {
+ if( Fert[i]+1<MAX_FERTILITY && (i != 0 || m>=(2 * (Fert[0] + 1)))) {
+ LogProb temp = ef.get_t(i, j) * ef.get_a(i, j);
+ if (temp > score ) {
+ best_i = i ;
+ score = temp ;
+ }
}
- ss *= score;
- }
- if (ss <= 0){
- //cerr << ef;
- cerr << "WARNING: Model2 viterbi alignment has zero score.\n" ;
- cerr << "Here are the different elements that made this alignment probability zero \n";
- cerr << "Source length " << l << " target length " << m << '\n';
- LogProb gg=1 ; // for debugging only .....
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg){
- LogProb score = 0 ;
- LogProb a = 0, t =0 ;
- for (PositionIndex i = 0 ; i <= l ; i++){
- // if( Debug_Fert[i]+1<MAX_FERTILITY && (i != 0 || m>=(2 * (Debug_Fert[0] + 1)))){
- LogProb temp = ef.get_t(i, j) * ef.get_a(i, j);
- if (temp > score ){
- score = temp ;
- best_i = i ;
- a = ef.get_a(i, j);
- t = ef.get_t(i, j) ;
- }
- // }
- }
- gg *= score ;
- cerr << "best: fs[" << j << "] "<< j <<" : es[" << best_i << "] " <<
- best_i << " , a: " << ef.get_a(best_i, j) << " t: " << t << " score " << score << " product : " << gg << " ss " <<
- ss << '\n';
+ }
+ if (score == 0) {
+ cerr << "WARNING: In searching for model2 best alignment\n";
+ cerr << "Nothing was set for target token at position j: " << j << "\n";
+ for (PositionIndex i = 0 ; i <= l ; i++) {
+ cerr << "i: " << i << "ttable("<<i<<", "<<j<<") = " <<
+ ef.get_t(i, j) << " atable(" << i<<", "<<j<<", "<<
+ l<<", "<<m<<") = "<< ef.get_a(i, j) << " product " <<
+ ef.get_t(i, j) * ef.get_a(i, j) ;
+ if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0]+1)))
+ || (i != 0)))
+ cerr <<"Passed fertility condition \n";
+ else
+ cerr <<"Failed fertility condition \n";
}
- for(PositionIndex i = 0 ; i <= l ; i++)
- cerr << "Fert["<<i<<"] selected " << Fert[i] << '\n';
+ } else {
+ output.set(j, best_i);
+ Fert[best_i]++;
+ }
+ ss *= score;
}
- massert(output.valid());
- return ss;
+ if (ss <= 0) {
+ //cerr << ef;
+ cerr << "WARNING: Model2 viterbi alignment has zero score.\n" ;
+ cerr << "Here are the different elements that made this alignment probability zero \n";
+ cerr << "Source length " << l << " target length " << m << '\n';
+ LogProb gg=1 ; // for debugging only .....
+ for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg) {
+ LogProb score = 0 ;
+ LogProb a = 0, t =0 ;
+ for (PositionIndex i = 0 ; i <= l ; i++) {
+ // if( Debug_Fert[i]+1<MAX_FERTILITY && (i != 0 || m>=(2 * (Debug_Fert[0] + 1)))){
+ LogProb temp = ef.get_t(i, j) * ef.get_a(i, j);
+ if (temp > score ) {
+ score = temp ;
+ best_i = i ;
+ a = ef.get_a(i, j);
+ t = ef.get_t(i, j) ;
+ }
+ // }
+ }
+ gg *= score ;
+ cerr << "best: fs[" << j << "] "<< j <<" : es[" << best_i << "] " <<
+ best_i << " , a: " << ef.get_a(best_i, j) << " t: " << t << " score " << score << " product : " << gg << " ss " <<
+ ss << '\n';
+ }
+ for(PositionIndex i = 0 ; i <= l ; i++)
+ cerr << "Fert["<<i<<"] selected " << Fert[i] << '\n';
+ }
+ massert(output.valid());
+ return ss;
}
LogProb model3::viterbi_model2(const transpair_model3&ef, alignment&output, int pair_no,int i_peg , int j_peg )const
{
- if( h&&UseHMMViterbiAlignmentIfPossible ){
- transpair_modelhmm efhmm(ef.E,ef.F,tTable,aTable,dTable,nTable,0.0,0.0,h);
- LogProb ret=viterbi_model2(efhmm,output,pair_no,i_peg,j_peg);
- massert(output.valid());
- return ret;
- }
- return _viterbi_model2(ef,output,i_peg,j_peg);
+ if( h&&UseHMMViterbiAlignmentIfPossible ) {
+ transpair_modelhmm efhmm(ef.E,ef.F,tTable,aTable,dTable,nTable,0.0,0.0,h);
+ LogProb ret=viterbi_model2(efhmm,output,pair_no,i_peg,j_peg);
+ massert(output.valid());
+ return ret;
+ }
+ return _viterbi_model2(ef,output,i_peg,j_peg);
}
//int HillClimbingSteps=0;
@@ -195,62 +196,56 @@ LogProb greedyClimb_WithIBM3Scoring(MoveSwapMatrix<TRANSPAIR>&msc2,int& HillClim
int changed=0;
int iter=0;
bool hereVERB=0;
- do
- {
- MoveSwapMatrix<typename TRANSPAIR::simpler_transpair_model> msc_IBM3(msc2.get_ef(),alignment(msc2));
- vector<pair<double,OneMoveSwap> > msvec;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- WordIndex aj=msc2(j);
- for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)
- if((aj != msc2(j1)) && (int(j1) != j_peg))
- msvec.push_back(pair<double,OneMoveSwap>(-msc_IBM3.cswap(j,j1),OneMoveSwap(1,j,j1)));
- for (PositionIndex i = 0 ; i <= l ; i++)
- if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY)
- msvec.push_back(pair<double,OneMoveSwap>(-msc_IBM3.cmove(i,j),OneMoveSwap(2,i,j)));
- }
- sort(msvec.begin(),msvec.end());
- HillClimbingSteps++;
- int iused=-1;
- changed=0;
- for(unsigned int i=0;i<msvec.size()&&changed==0;++i)
- {
- LogProb csts;
- const OneMoveSwap &oms=msvec[i].second;
- if( oms.type==1&&(csts=msc2.cswap(oms.a,oms.b))>1.0001 )
- {
- if( hereVERB==1 )
- cerr << "SWAP: " << csts << '\n';
- msc2.doSwap(oms.a,oms.b);
- changed=1;
- iused=i;
- break;
- }
- if( oms.type==2&&(csts=msc2.cmove(oms.a,oms.b))>1.0001 )
- {
- if( hereVERB==1 )
- cerr << "MOVE: " << csts << '\n';
- msc2.doMove(oms.a,oms.b);
- changed=1;
- iused=i;
- break;
- }
- }
- if( ++iter>30 )
- {
- //msc2.ef.verboseTP=1;
- hereVERB=1;
- cerr << "ERROR: more than 30 iterations in hill-climbing: " << iused
- << " improvement: " << msvec[iused].first << " value:" << msvec[iused].second
- << '\n' << msc2 << '\n';
- for(int a=0;a<20;++a)
- cout << a << ' ' << msvec[a].first << ' ' << msvec[a].second << '\n';
- //cerr << msvec << '\n';
- }
- if( iter>50 )
- break;
- } while(changed);
- return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
+ do {
+ MoveSwapMatrix<typename TRANSPAIR::simpler_transpair_model> msc_IBM3(msc2.get_ef(),alignment(msc2));
+ vector<pair<double,OneMoveSwap> > msvec;
+ for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg) {
+ WordIndex aj=msc2(j);
+ for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)
+ if((aj != msc2(j1)) && (int(j1) != j_peg))
+ msvec.push_back(pair<double,OneMoveSwap>(-msc_IBM3.cswap(j,j1),OneMoveSwap(1,j,j1)));
+ for (PositionIndex i = 0 ; i <= l ; i++)
+ if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY)
+ msvec.push_back(pair<double,OneMoveSwap>(-msc_IBM3.cmove(i,j),OneMoveSwap(2,i,j)));
+ }
+ sort(msvec.begin(),msvec.end());
+ HillClimbingSteps++;
+ int iused=-1;
+ changed=0;
+ for(unsigned int i=0; i<msvec.size()&&changed==0; ++i) {
+ LogProb csts;
+ const OneMoveSwap &oms=msvec[i].second;
+ if( oms.type==1&&(csts=msc2.cswap(oms.a,oms.b))>1.0001 ) {
+ if( hereVERB==1 )
+ cerr << "SWAP: " << csts << '\n';
+ msc2.doSwap(oms.a,oms.b);
+ changed=1;
+ iused=i;
+ break;
+ }
+ if( oms.type==2&&(csts=msc2.cmove(oms.a,oms.b))>1.0001 ) {
+ if( hereVERB==1 )
+ cerr << "MOVE: " << csts << '\n';
+ msc2.doMove(oms.a,oms.b);
+ changed=1;
+ iused=i;
+ break;
+ }
+ }
+ if( ++iter>30 ) {
+ //msc2.ef.verboseTP=1;
+ hereVERB=1;
+ cerr << "ERROR: more than 30 iterations in hill-climbing: " << iused
+ << " improvement: " << msvec[iused].first << " value:" << msvec[iused].second
+ << '\n' << msc2 << '\n';
+ for(int a=0; a<20; ++a)
+ cout << a << ' ' << msvec[a].first << ' ' << msvec[a].second << '\n';
+ //cerr << msvec << '\n';
+ }
+ if( iter>50 )
+ break;
+ } while(changed);
+ return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
}
template<class TRANSPAIR>
@@ -260,19 +255,17 @@ LogProb greedyClimb(MoveSwapMatrix<TRANSPAIR>&msc2, int& HillClimbingSteps, int
return greedyClimb_WithIBM3Scoring(msc2,HillClimbingSteps,j_peg);
PositionIndex l = msc2.get_l(), m=msc2.get_m();
int changed=0;
- do
- {
- HillClimbingSteps++;
- changed=0;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- WordIndex aj=msc2(j);
- for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)if((aj != msc2(j1)) && (int(j1) != j_peg)&&msc2.cswap(j, j1) > 1.0)
- msc2.doSwap(j, j1), changed=1;
- for (PositionIndex i = 0 ; i <= l ; i++)if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY && msc2.cmove(i, j)>1.0)
- msc2.doMove(i, j), changed=1;
- }
- } while (changed);
+ do {
+ HillClimbingSteps++;
+ changed=0;
+ for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg) {
+ WordIndex aj=msc2(j);
+ for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)if((aj != msc2(j1)) && (int(j1) != j_peg)&&msc2.cswap(j, j1) > 1.0)
+ msc2.doSwap(j, j1), changed=1;
+ for (PositionIndex i = 0 ; i <= l ; i++)if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY && msc2.cmove(i, j)>1.0)
+ msc2.doMove(i, j), changed=1;
+ }
+ } while (changed);
return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
}
@@ -280,75 +273,65 @@ template<class TRANSPAIR>
LogProb hillClimb_std(MoveSwapMatrix<TRANSPAIR>&msc2, int &HillClimbingSteps,int= -1,int j_peg = -1)
{
if( msc2.isLazy() )
- return greedyClimb_WithIBM3Scoring(msc2,HillClimbingSteps,j_peg);
+ return greedyClimb_WithIBM3Scoring(msc2,HillClimbingSteps,j_peg);
if( LogHillClimb>1 )
cout << msc2 << '\n';
PositionIndex l = msc2.get_l(), m=msc2.get_m();
int changes=0;
int best_change_type=-1, best_change_v1=-1, best_change_v2=-1;
- do
- {
- HillClimbingSteps++;
- LogProb best_change_so_far = 1.00001 ;
- best_change_type=0;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- WordIndex aj=msc2(j);
- for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)if((aj != msc2(j1)) && (int(j1) != j_peg))
- {
- LogProb change = msc2.cswap(j, j1);
- if (change > best_change_so_far)
- {
- best_change_so_far = change ;
- best_change_type=1;
- best_change_v1=j;
- best_change_v2=j1;
- if( LogHillClimb )
- cerr << "CLIMB: " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << " " << best_change_so_far << msc2 << '\n';
- massert(msc2.get_ef().isSubOptimal()==1);
- }
- }
- for (PositionIndex i = 0 ; i <= l ; i++)if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY)
- {
- LogProb change = msc2.cmove(i, j);
- if (change > best_change_so_far)
- {
- best_change_so_far = change ;
- best_change_type=2;
- best_change_v1=j;
- best_change_v2=i;
- if( LogHillClimb )
- cerr << "CLIMB: " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << " " << best_change_so_far << msc2 << '\n';
- massert(msc2.get_ef().isSubOptimal()==1);
- }
- }
- }
- if (best_change_type==1)
- {
- msc2.doSwap(best_change_v1, best_change_v2);
- if( LogHillClimb )
- cerr << "SW-CLIMB-DONE: " << j_peg << msc2 << '\n';
- }
- if (best_change_type==2)
- {
- msc2.doMove(best_change_v2, best_change_v1);
- if( LogHillClimb )
- cerr << "MO-CLIMB-DONE: " << j_peg << msc2 << '\n';
- }
- changes++;
- if( changes>40 )
- {
- if( PrintHillClimbWarning++<1000 )
- cerr << "WARNING: already " << changes << " iterations in hillclimb: " << best_change_so_far << " " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << '\n';
- else if (PrintHillClimbWarning==1000)
- cerr << "ERROR: too many hill climbing warnings => I do not print more.\n";
- }
- if(changes>60 )
- {
- cerr << msc2 << '\n';
- break;
- }
- } while (best_change_type);
+ do {
+ HillClimbingSteps++;
+ LogProb best_change_so_far = 1.00001 ;
+ best_change_type=0;
+ for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg) {
+ WordIndex aj=msc2(j);
+ for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)if((aj != msc2(j1)) && (int(j1) != j_peg)) {
+ LogProb change = msc2.cswap(j, j1);
+ if (change > best_change_so_far) {
+ best_change_so_far = change ;
+ best_change_type=1;
+ best_change_v1=j;
+ best_change_v2=j1;
+ if( LogHillClimb )
+ cerr << "CLIMB: " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << " " << best_change_so_far << msc2 << '\n';
+ massert(msc2.get_ef().isSubOptimal()==1);
+ }
+ }
+ for (PositionIndex i = 0 ; i <= l ; i++)if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY) {
+ LogProb change = msc2.cmove(i, j);
+ if (change > best_change_so_far) {
+ best_change_so_far = change ;
+ best_change_type=2;
+ best_change_v1=j;
+ best_change_v2=i;
+ if( LogHillClimb )
+ cerr << "CLIMB: " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << " " << best_change_so_far << msc2 << '\n';
+ massert(msc2.get_ef().isSubOptimal()==1);
+ }
+ }
+ }
+ if (best_change_type==1) {
+ msc2.doSwap(best_change_v1, best_change_v2);
+ if( LogHillClimb )
+ cerr << "SW-CLIMB-DONE: " << j_peg << msc2 << '\n';
+ }
+ if (best_change_type==2) {
+ msc2.doMove(best_change_v2, best_change_v1);
+ if( LogHillClimb )
+ cerr << "MO-CLIMB-DONE: " << j_peg << msc2 << '\n';
+ }
+ changes++;
+ if( changes>40 ) {
+ if( PrintHillClimbWarning++<1000 )
+ cerr << "WARNING: already " << changes << " iterations in hillclimb: " << best_change_so_far << " " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << '\n';
+ else if (PrintHillClimbWarning==1000)
+ cerr << "ERROR: too many hill climbing warnings => I do not print more.\n";
+ }
+ if(changes>60 ) {
+ cerr << msc2 << '\n';
+ break;
+ }
+ } while (best_change_type);
return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
}
@@ -357,24 +340,21 @@ bool extendCenterList(Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >&setOfGo
{
unsigned int l=msc->get_ef().get_l();
set<OneMoveSwap> alreadyCovered;
- for(unsigned int nr=0;nr<setOfGoodCenters.size();nr++)
+ for(unsigned int nr=0; nr<setOfGoodCenters.size(); nr++)
makeOneMoveSwap(*setOfGoodCenters[nr].first,*msc,alreadyCovered);
- for(set<OneMoveSwap>::const_iterator i=alreadyCovered.begin();i!=alreadyCovered.end();++i)
- {
- if( i->type==1||i->type==4)
- msc->delCenter();
- if( i->type==1 )
- {
- for(unsigned int ii=0;ii<=l;++ii)
- if( (*msc)(i->a)!=ii )
- msc->delMove(ii,i->a);
- }
- else if( i->type==2||i->type==4 )
- msc->delSwap(i->a,i->b);
- else if( i->type==3 )
- msc->delMove(i->b,i->a);
- else abort();
- }
+ for(set<OneMoveSwap>::const_iterator i=alreadyCovered.begin(); i!=alreadyCovered.end(); ++i) {
+ if( i->type==1||i->type==4)
+ msc->delCenter();
+ if( i->type==1 ) {
+ for(unsigned int ii=0; ii<=l; ++ii)
+ if( (*msc)(i->a)!=ii )
+ msc->delMove(ii,i->a);
+ } else if( i->type==2||i->type==4 )
+ msc->delSwap(i->a,i->b);
+ else if( i->type==3 )
+ msc->delMove(i->b,i->a);
+ else abort();
+ }
setOfGoodCenters.push_back(make_pair(msc,peggedAlignmentScore));
return 1;
}
@@ -391,458 +371,466 @@ public:
};
inline bool operator<(const Als&x,const Als&y)
-{return x.v>y.v;}
+{
+ return x.v>y.v;
+}
template<class MODEL_TYPE, class ADDITIONAL_MODEL_DATA_IN,class ADDITIONAL_MODEL_DATA_OUT>
-void model3::viterbi_loop_with_tricks(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1,
- bool dump_files, const char* alignfile,
- bool collect_counts, string model, bool final,
- ADDITIONAL_MODEL_DATA_IN*dm_in,
- ADDITIONAL_MODEL_DATA_OUT*dm_out){
- ofstream *writeNBestErrorsFile=0;
- if( (dump_files||FEWDUMPS)&&PrintN&&ReferenceAlignment.size()>0 ) {
- string x=alignfile+string("NBEST");
- writeNBestErrorsFile= new ofstream(x.c_str());
+void model3::viterbi_loop_with_tricks(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1,
+ bool dump_files, const char* alignfile,
+ bool collect_counts, string model, bool final,
+ ADDITIONAL_MODEL_DATA_IN*dm_in,
+ ADDITIONAL_MODEL_DATA_OUT*dm_out)
+{
+ ofstream *writeNBestErrorsFile=0;
+ if( (dump_files||FEWDUMPS)&&PrintN&&ReferenceAlignment.size()>0 ) {
+ string x=alignfile+string("NBEST");
+ writeNBestErrorsFile= new ofstream(x.c_str());
+ }
+ ofstream *of3=0;
+ PositionIndex l, m ;
+ ofstream of2;
+ int pair_no;
+ int HillClimbingSteps=0;
+ NumberOfAlignmentsInSophisticatedCountCollection=0;
+ if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) ) {
+ of2.open(alignfile);
+ if(of2.is_open()) {
+ cout << "I will write alignment to " << alignfile << endl;
}
- ofstream *of3=0;
- PositionIndex l, m ;
- ofstream of2;
- int pair_no;
- int HillClimbingSteps=0;
- NumberOfAlignmentsInSophisticatedCountCollection=0;
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) ){
- of2.open(alignfile);
- if(of2.is_open()){
- cout << "I will write alignment to " << alignfile << endl;
- }
+ }
+ /* if(!of2.is_open()){
+ cerr << "I don't know why you do not let me dump file " << alignfile << endl;
+ }*/
+ if( dump_files&&PrintN&&final ) {
+ string x=alignfile+string("NBEST");
+ of3= new ofstream(x.c_str());
+ }
+ pair_no = 0 ; // sentence pair number
+ // for each sentence pair in the corpus
+ perp.clear() ; // clears cross_entrop & perplexity
+ viterbiPerp.clear() ; // clears cross_entrop & perplexity
+ sentPair sent ;
+ int NCenter=0,NHillClimbed=0,NAlignment=0,NTotal=0,NBetterByPegging=0;
+ while(sHandler1.getNextSentence(sent)) {
+ if( sent.eSent.size()==1||sent.fSent.size()==1 )
+ continue;
+// SentNr=sent.sentenceNo;
+ Vector<WordIndex>& es = sent.eSent;
+ Vector<WordIndex>& fs = sent.fSent;
+ const float count = sent.getCount();
+ if ((sent.sentenceNo % 10000) == 0)
+ cerr <<sent.sentenceNo << '\n';
+ time_t sent_s = time(NULL) ;
+ pair_no++ ;
+ l = es.size() - 1 ;
+ m = fs.size() - 1 ;
+
+ LogProb align_total_count=0;
+ alignment viterbi2alignment(l,m);
+ MODEL_TYPE ef(es,fs,tTable,aTable,dTable,nTable,p1,p0,dm_in);
+ viterbi_model2(ef,viterbi2alignment,pair_no-1);
+ Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >setOfGoodCenters(1);
+ set<alignment> alignments;
+ MoveSwapMatrix<MODEL_TYPE> *best = (setOfGoodCenters[0].first = new MoveSwapMatrix<MODEL_TYPE>(ef, viterbi2alignment));
+ MoveSwapMatrix<MODEL_TYPE> _viterbi(*best), *viterbi=&_viterbi; // please, don't delete this line (FJO)
+ if( ef.isSubOptimal() )
+ setOfGoodCenters[0].second = hillClimb_std(*best,HillClimbingSteps);
+ else {
+ setOfGoodCenters[0].second = best->get_ef().prob_of_target_and_alignment_given_source(*best);
+ if( setOfGoodCenters[0].second==0 ) {
+ cerr << "PROBLEM: alignment is 0.\n";
+ best->get_ef().prob_of_target_and_alignment_given_source(*best,1);
+ }
}
-/* if(!of2.is_open()){
- cerr << "I don't know why you do not let me dump file " << alignfile << endl;
- }*/
- if( dump_files&&PrintN&&final ){
- string x=alignfile+string("NBEST");
- of3= new ofstream(x.c_str());
+ int bestAlignment=0;
+
+ for(unsigned int i=0; i<setOfGoodCenters.size(); ++i)
+ setOfGoodCenters[i].first->check();
+ alignments.insert(*best);
+ if (setOfGoodCenters[bestAlignment].second <= 0) {
+ if( PrintZeroScoreWarning++<100 ) {
+ cerr << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
+ cerr << alignment(*setOfGoodCenters[bestAlignment].first) ;
+ printSentencePair(es, fs, cerr);
+ } else if(PrintZeroScoreWarning==100) {
+ cerr << "ERROR: too many zero score warnings => no additional one will be printed\n";
+ }
+ setOfGoodCenters[bestAlignment].second=1e-300;
+ continue;
}
- pair_no = 0 ; // sentence pair number
- // for each sentence pair in the corpus
- perp.clear() ; // clears cross_entrop & perplexity
- viterbiPerp.clear() ; // clears cross_entrop & perplexity
- sentPair sent ;
- int NCenter=0,NHillClimbed=0,NAlignment=0,NTotal=0,NBetterByPegging=0;
- while(sHandler1.getNextSentence(sent)){
- if( sent.eSent.size()==1||sent.fSent.size()==1 )
- continue;
-// SentNr=sent.sentenceNo;
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 10000) == 0)
- cerr <<sent.sentenceNo << '\n';
- time_t sent_s = time(NULL) ;
- pair_no++ ;
- l = es.size() - 1 ;
- m = fs.size() - 1 ;
-
- LogProb align_total_count=0;
- alignment viterbi2alignment(l,m);
- MODEL_TYPE ef(es,fs,tTable,aTable,dTable,nTable,p1,p0,dm_in);
- viterbi_model2(ef,viterbi2alignment,pair_no-1);
- Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >setOfGoodCenters(1);
- set<alignment> alignments;
- MoveSwapMatrix<MODEL_TYPE> *best = (setOfGoodCenters[0].first = new MoveSwapMatrix<MODEL_TYPE>(ef, viterbi2alignment));
- MoveSwapMatrix<MODEL_TYPE> _viterbi(*best), *viterbi=&_viterbi; // please, don't delete this line (FJO)
- if( ef.isSubOptimal() )
- setOfGoodCenters[0].second = hillClimb_std(*best,HillClimbingSteps);
- else{
- setOfGoodCenters[0].second = best->get_ef().prob_of_target_and_alignment_given_source(*best);
- if( setOfGoodCenters[0].second==0 ){
- cerr << "PROBLEM: alignment is 0.\n";
- best->get_ef().prob_of_target_and_alignment_given_source(*best,1);
- }
- }
- int bestAlignment=0;
-
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- alignments.insert(*best);
- if (setOfGoodCenters[bestAlignment].second <= 0){
- if( PrintZeroScoreWarning++<100 ){
- cerr << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- cerr << alignment(*setOfGoodCenters[bestAlignment].first) ;
- printSentencePair(es, fs, cerr);
+ int nHillClimbed=1,nAlignment=1;
+ bool flagBetterByPegging=0;
+ if ( Peg ) {
+ const MoveSwapMatrix<MODEL_TYPE> *useMatrix=viterbi; // it is faster using 'best', ... (FJO)
+ Array2<short, vector<short> > linkCache(l+1, m+1, false);
+ if(UseLinkCache)for(unsigned int j=1; j<=m; j++)linkCache((*useMatrix)(j), j)=1;
+ for(PositionIndex j=1; j<=m; j++)for(PositionIndex i=0; i<=l; i++) {
+ nAlignment++;
+ if( i!=(*useMatrix)(j) && (UseLinkCache==0||linkCache(i,j)==0) &&
+ ef.get_t(i,j)>ef.get_t((*useMatrix)(j),j)*PEGGED_CUTOFF &&
+ (i != 0 || (m >= 2 * (useMatrix->fert(0)+1)))) {
+ MoveSwapMatrix<MODEL_TYPE> *BESTPEGGED=0;
+ LogProb peggedAlignmentScore;
+ nHillClimbed++;
+ if( ef.isSubOptimal() ) {
+ BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(*useMatrix);
+ BESTPEGGED->doMove(i, j);
+ peggedAlignmentScore= hillClimb_std(*BESTPEGGED,HillClimbingSteps, i,j);
+ } else {
+ alignment pegAlignment(l,m);
+ peggedAlignmentScore=viterbi_model2(ef,pegAlignment,pair_no-1,i,j);
+ BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(ef,pegAlignment);
+ massert( pegAlignment(j)==i );
}
- else if(PrintZeroScoreWarning==100) {
- cerr << "ERROR: too many zero score warnings => no additional one will be printed\n";
- }
- setOfGoodCenters[bestAlignment].second=1e-300;
- continue;
- }
- int nHillClimbed=1,nAlignment=1;
- bool flagBetterByPegging=0;
- if ( Peg ){
- const MoveSwapMatrix<MODEL_TYPE> *useMatrix=viterbi; // it is faster using 'best', ... (FJO)
- Array2<short, vector<short> > linkCache(l+1, m+1, false);
- if(UseLinkCache)for(unsigned int j=1;j<=m;j++)linkCache((*useMatrix)(j), j)=1;
- for(PositionIndex j=1;j<=m;j++)for(PositionIndex i=0;i<=l;i++){
- nAlignment++;
- if( i!=(*useMatrix)(j) && (UseLinkCache==0||linkCache(i,j)==0) &&
- ef.get_t(i,j)>ef.get_t((*useMatrix)(j),j)*PEGGED_CUTOFF &&
- (i != 0 || (m >= 2 * (useMatrix->fert(0)+1)))){
- MoveSwapMatrix<MODEL_TYPE> *BESTPEGGED=0;
- LogProb peggedAlignmentScore;
- nHillClimbed++;
- if( ef.isSubOptimal() ){
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(*useMatrix);
- BESTPEGGED->doMove(i, j);
- peggedAlignmentScore= hillClimb_std(*BESTPEGGED,HillClimbingSteps, i,j);
- }else{
- alignment pegAlignment(l,m);
- peggedAlignmentScore=viterbi_model2(ef,pegAlignment,pair_no-1,i,j);
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(ef,pegAlignment);
- massert( pegAlignment(j)==i );
- }
- if(UseLinkCache)
- for(unsigned int j=1;j<=m;j++)
- linkCache((*BESTPEGGED)(j), j)=1;
- if( peggedAlignmentScore>setOfGoodCenters[bestAlignment].second*(LogProb)PEGGED_CUTOFF && alignments.count(*BESTPEGGED)==0 ){
- if(extendCenterList(setOfGoodCenters,BESTPEGGED,peggedAlignmentScore)){
- alignments.insert(*BESTPEGGED);
- if( peggedAlignmentScore>1.00001*setOfGoodCenters[bestAlignment].second ){
- if( LogPeg ){
- cerr << "found better alignment by pegging " << pair_no << " " << peggedAlignmentScore/setOfGoodCenters[bestAlignment].second << '\n';
- cerr << "NEW BEST: " << alignment(*BESTPEGGED);
- cerr << "OLD : " << alignment(*setOfGoodCenters[bestAlignment].first);
- }
- flagBetterByPegging=1;
- bestAlignment=alignments.size()-1;
- }
- }
- assert( differences(*BESTPEGGED, *best)!=0 );
- BESTPEGGED=0;
- }else
- delete BESTPEGGED;
+ if(UseLinkCache)
+ for(unsigned int j=1; j<=m; j++)
+ linkCache((*BESTPEGGED)(j), j)=1;
+ if( peggedAlignmentScore>setOfGoodCenters[bestAlignment].second*(LogProb)PEGGED_CUTOFF && alignments.count(*BESTPEGGED)==0 ) {
+ if(extendCenterList(setOfGoodCenters,BESTPEGGED,peggedAlignmentScore)) {
+ alignments.insert(*BESTPEGGED);
+ if( peggedAlignmentScore>1.00001*setOfGoodCenters[bestAlignment].second ) {
+ if( LogPeg ) {
+ cerr << "found better alignment by pegging " << pair_no << " " << peggedAlignmentScore/setOfGoodCenters[bestAlignment].second << '\n';
+ cerr << "NEW BEST: " << alignment(*BESTPEGGED);
+ cerr << "OLD : " << alignment(*setOfGoodCenters[bestAlignment].first);
+ }
+ flagBetterByPegging=1;
+ bestAlignment=alignments.size()-1;
}
- }
- } // end of if(Peg)
- NBetterByPegging+=flagBetterByPegging;
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( LogPeg>1 )
- cout << "PEGGED: " << setOfGoodCenters.size() << " HILLCLIMBED:" << nHillClimbed << " TOTAL:" << nAlignment << " alignments." << '\n';
- int alTotal=collectCountsOverNeighborhood(setOfGoodCenters,es, fs, tTable, aCountTable,
- dCountTable, nCountTable, p1_count, p0_count,
- align_total_count, count, collect_counts, dm_out);
- if( LogPeg>1 ){
- cout << "ALL: " << alTotal << " from " << pow(float(l+1),float(m)) << '\n';
- massert(alTotal<=pow(double(l+1),double(m)));
+ }
+ assert( differences(*BESTPEGGED, *best)!=0 );
+ BESTPEGGED=0;
+ } else
+ delete BESTPEGGED;
+ }
}
- NCenter+=setOfGoodCenters.size();NHillClimbed+=nHillClimbed;NAlignment+=nAlignment;NTotal+=alTotal;
- perp.addFactor(log(double(align_total_count)), count, l, m,0);
- viterbiPerp.addFactor(log(double(setOfGoodCenters[bestAlignment].second)), count, l, m,0);
- massert(log(double(setOfGoodCenters[bestAlignment].second)) <= log(double(align_total_count)));
- if (dump_files||(FEWDUMPS&&sent.sentenceNo<1000)||(final&&(ONLYALDUMPS)) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, (setOfGoodCenters[bestAlignment].first)->getAlignment(), sent.sentenceNo,
- setOfGoodCenters[bestAlignment].second);
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( of3||(writeNBestErrorsFile&&pair_no<int(ReferenceAlignment.size())) ){
- vector<Als> als;
- for(unsigned int s=0;s<setOfGoodCenters.size();++s){
- const MoveSwapMatrix<MODEL_TYPE>&msc= *setOfGoodCenters[s].first;
- msc.check();
- double normalized_ascore=setOfGoodCenters[s].second;
- if( !msc.isCenterDeleted() )
- als.push_back( Als(s,0,0,normalized_ascore) );
-
- for(WordIndex j=1;j<=m;j++)
- for(WordIndex i=0;i<=l;i++)
- if( i!=msc(j)&& !msc.isDelMove(i,j) )
- als.push_back( Als(s,i,j,msc.cmove(i,j)*normalized_ascore));
- for(PositionIndex j1=1;j1<=m;j1++)
- for(PositionIndex j2=j1+1;j2<=m;j2++)
- if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
- als.push_back( Als(s,-j1,-j2,msc.cswap(j1,j2)*normalized_ascore));
- }
- sort(als.begin(),als.end());
- double sum=0,sum2=0;
- for(unsigned int i=0;i<als.size();++i)
- sum+=als[i].v;
- for(unsigned int i=0;i<min((unsigned int)als.size(),(unsigned int)PrintN);++i){
- alignment x=*setOfGoodCenters[als[i].s].first;
- if( !(als[i].a==0 && als[i].b==0) ){
- if( als[i].a<=0&&als[i].b<=0 )
- x.doSwap(-als[i].a,-als[i].b);
- else
- x.doMove(als[i].a,als[i].b);
- }
- if( of3&&i<PrintN )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(),*of3,x.getAlignment(), sent.sentenceNo,
- als[i].v/sum*count);
- sum2+=als[i].v;
- if( writeNBestErrorsFile ){
- if( pair_no<int(ReferenceAlignment.size()) ){
- int ALmissing=0,ALtoomuch=0,ALeventsMissing=0,ALeventsToomuch=0;
- vector<double> scores;
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],x.getAlignment(),l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
- ef.computeScores(x,scores);
- *writeNBestErrorsFile << ALmissing+ALtoomuch << ' ';
- for(unsigned int i=0;i<scores.size();++i)
- *writeNBestErrorsFile << ((scores[i]>0.0)?(-log(scores[i])):1.0e6) << ' ';
- *writeNBestErrorsFile << '\n';
- }
- }
- }
- if( writeNBestErrorsFile )
- *writeNBestErrorsFile << '\n';
+ } // end of if(Peg)
+ NBetterByPegging+=flagBetterByPegging;
+ for(unsigned int i=0; i<setOfGoodCenters.size(); ++i)
+ setOfGoodCenters[i].first->check();
+ if( LogPeg>1 )
+ cout << "PEGGED: " << setOfGoodCenters.size() << " HILLCLIMBED:" << nHillClimbed << " TOTAL:" << nAlignment << " alignments." << '\n';
+ int alTotal=collectCountsOverNeighborhood(setOfGoodCenters,es, fs, tTable, aCountTable,
+ dCountTable, nCountTable, p1_count, p0_count,
+ align_total_count, count, collect_counts, dm_out);
+ if( LogPeg>1 ) {
+ cout << "ALL: " << alTotal << " from " << pow(float(l+1),float(m)) << '\n';
+ massert(alTotal<=pow(double(l+1),double(m)));
+ }
+ NCenter+=setOfGoodCenters.size();
+ NHillClimbed+=nHillClimbed;
+ NAlignment+=nAlignment;
+ NTotal+=alTotal;
+ perp.addFactor(log(double(align_total_count)), count, l, m,0);
+ viterbiPerp.addFactor(log(double(setOfGoodCenters[bestAlignment].second)), count, l, m,0);
+ massert(log(double(setOfGoodCenters[bestAlignment].second)) <= log(double(align_total_count)));
+ if (dump_files||(FEWDUMPS&&sent.sentenceNo<1000)||(final&&(ONLYALDUMPS)) )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, (setOfGoodCenters[bestAlignment].first)->getAlignment(), sent.sentenceNo,
+ setOfGoodCenters[bestAlignment].second);
+ for(unsigned int i=0; i<setOfGoodCenters.size(); ++i)
+ setOfGoodCenters[i].first->check();
+ if( of3||(writeNBestErrorsFile&&pair_no<int(ReferenceAlignment.size())) ) {
+ vector<Als> als;
+ for(unsigned int s=0; s<setOfGoodCenters.size(); ++s) {
+ const MoveSwapMatrix<MODEL_TYPE>&msc= *setOfGoodCenters[s].first;
+ msc.check();
+ double normalized_ascore=setOfGoodCenters[s].second;
+ if( !msc.isCenterDeleted() )
+ als.push_back( Als(s,0,0,normalized_ascore) );
+
+ for(WordIndex j=1; j<=m; j++)
+ for(WordIndex i=0; i<=l; i++)
+ if( i!=msc(j)&& !msc.isDelMove(i,j) )
+ als.push_back( Als(s,i,j,msc.cmove(i,j)*normalized_ascore));
+ for(PositionIndex j1=1; j1<=m; j1++)
+ for(PositionIndex j2=j1+1; j2<=m; j2++)
+ if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
+ als.push_back( Als(s,-j1,-j2,msc.cswap(j1,j2)*normalized_ascore));
+ }
+ sort(als.begin(),als.end());
+ double sum=0,sum2=0;
+ for(unsigned int i=0; i<als.size(); ++i)
+ sum+=als[i].v;
+ for(unsigned int i=0; i<min((unsigned int)als.size(),(unsigned int)PrintN); ++i) {
+ alignment x=*setOfGoodCenters[als[i].s].first;
+ if( !(als[i].a==0 && als[i].b==0) ) {
+ if( als[i].a<=0&&als[i].b<=0 )
+ x.doSwap(-als[i].a,-als[i].b);
+ else
+ x.doMove(als[i].a,als[i].b);
}
- addAL((setOfGoodCenters[bestAlignment].first)->getAlignment(),sent.sentenceNo,l);
- for(unsigned int i=0;i<setOfGoodCenters.size();i++)
- delete setOfGoodCenters[i].first;
- double period = difftime(time(NULL), sent_s);
- if (Verbose)
- cerr << "processing this sentence pair took : " << period
- << " seconds\n";
-
- } /* of sentence pair E, F */
- //sHandler1.rewind();
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
- of2.close();
- delete of3;
- delete writeNBestErrorsFile;
- double FSent=pair_no;
- cout << "#centers(pre/hillclimbed/real): " << NAlignment/FSent << " " << NHillClimbed/FSent << " " << NCenter/FSent << " #al: " << NTotal/FSent << " #alsophisticatedcountcollection: " << NumberOfAlignmentsInSophisticatedCountCollection/FSent << " #hcsteps: " << HillClimbingSteps/FSent << '\n';
- cout << "#peggingImprovements: " << NBetterByPegging/FSent << '\n';
+ if( of3&&i<PrintN )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(),*of3,x.getAlignment(), sent.sentenceNo,
+ als[i].v/sum*count);
+ sum2+=als[i].v;
+ if( writeNBestErrorsFile ) {
+ if( pair_no<int(ReferenceAlignment.size()) ) {
+ int ALmissing=0,ALtoomuch=0,ALeventsMissing=0,ALeventsToomuch=0;
+ vector<double> scores;
+ ErrorsInAlignment(ReferenceAlignment[pair_no-1],x.getAlignment(),l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
+ ef.computeScores(x,scores);
+ *writeNBestErrorsFile << ALmissing+ALtoomuch << ' ';
+ for(unsigned int i=0; i<scores.size(); ++i)
+ *writeNBestErrorsFile << ((scores[i]>0.0)?(-log(scores[i])):1.0e6) << ' ';
+ *writeNBestErrorsFile << '\n';
+ }
+ }
+ }
+ if( writeNBestErrorsFile )
+ *writeNBestErrorsFile << '\n';
+ }
+ addAL((setOfGoodCenters[bestAlignment].first)->getAlignment(),sent.sentenceNo,l);
+ for(unsigned int i=0; i<setOfGoodCenters.size(); i++)
+ delete setOfGoodCenters[i].first;
+ double period = difftime(time(NULL), sent_s);
+ if (Verbose)
+ cerr << "processing this sentence pair took : " << period
+ << " seconds\n";
+
+ } /* of sentence pair E, F */
+ //sHandler1.rewind();
+ if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
+ of2.close();
+ delete of3;
+ delete writeNBestErrorsFile;
+ double FSent=pair_no;
+ cout << "#centers(pre/hillclimbed/real): " << NAlignment/FSent << " " << NHillClimbed/FSent << " " << NCenter/FSent << " #al: " << NTotal/FSent << " #alsophisticatedcountcollection: " << NumberOfAlignmentsInSophisticatedCountCollection/FSent << " #hcsteps: " << HillClimbingSteps/FSent << '\n';
+ cout << "#peggingImprovements: " << NBetterByPegging/FSent << '\n';
}
/*Perform only one step of viterbi alignment*/
#if 0
template<class MODEL_TYPE, class ADDITIONAL_MODEL_DATA_IN,class ADDITIONAL_MODEL_DATA_OUT>
-void model3::viterbi_loop_with_tricks_1(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1,
- bool dump_files, const char* alignfile,
- bool collect_counts, string model, bool final,
- ADDITIONAL_MODEL_DATA_IN*dm_in,
- ADDITIONAL_MODEL_DATA_OUT*dm_out){
- ofstream *writeNBestErrorsFile=0;
- if( (dump_files||FEWDUMPS)&&PrintN&&ReferenceAlignment.size()>0 ) {
- string x=alignfile+string("NBEST");
- writeNBestErrorsFile= new ofstream(x.c_str());
+void model3::viterbi_loop_with_tricks_1(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1,
+ bool dump_files, const char* alignfile,
+ bool collect_counts, string model, bool final,
+ ADDITIONAL_MODEL_DATA_IN*dm_in,
+ ADDITIONAL_MODEL_DATA_OUT*dm_out)
+{
+ ofstream *writeNBestErrorsFile=0;
+ if( (dump_files||FEWDUMPS)&&PrintN&&ReferenceAlignment.size()>0 ) {
+ string x=alignfile+string("NBEST");
+ writeNBestErrorsFile= new ofstream(x.c_str());
+ }
+ ofstream *of3=0;
+ ofstream of2;
+ int pair_no;
+ HillClimbingSteps=0;
+ NumberOfAlignmentsInSophisticatedCountCollection=0;
+ if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
+ of2.open(alignfile);
+ if( dump_files&&PrintN&&final ) {
+ string x=alignfile+string("NBEST");
+ of3= new ofstream(x.c_str());
+ }
+ pair_no = 0 ; // sentence pair number
+ // for each sentence pair in the corpus
+ perp.clear() ; // clears cross_entrop & perplexity
+ viterbiPerp.clear() ; // clears cross_entrop & perplexity
+ sentPair sent ;
+ int NCenter=0,NHillClimbed=0,NAlignment=0,NTotal=0,NBetterByPegging=0;
+ while(sHandler1.getNextSentence(sent)) {
+ if( sent.eSent.size()==1||sent.fSent.size()==1 )
+ continue;
+ SentNr=sent.sentenceNo;
+ Vector<WordIndex>& es = sent.eSent;
+ Vector<WordIndex>& fs = sent.fSent;
+ const float count = sent.getCount();
+ if ((sent.sentenceNo % 10000) == 0)
+ cerr <<sent.sentenceNo << '\n';
+ time_t sent_s = time(NULL) ;
+ pair_no++ ;
+ l = es.size() - 1 ;
+ m = fs.size() - 1 ;
+ if (Log) {
+ logmsg << "Processing sentence pair:\n\t";
+ printSentencePair(es, fs, logmsg);
+ for (i = 0 ; i <= l ; i++)
+ logmsg << Elist.getVocabList()[es[i]].word << " ";
+ logmsg << "\n\t";
+ for (j = 1 ; j <= m ; j++)
+ logmsg << Flist.getVocabList()[fs[j]].word << " ";
+ logmsg << "\n";
}
- ofstream *of3=0;
- ofstream of2;
- int pair_no;
- HillClimbingSteps=0;
- NumberOfAlignmentsInSophisticatedCountCollection=0;
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
- of2.open(alignfile);
- if( dump_files&&PrintN&&final ){
- string x=alignfile+string("NBEST");
- of3= new ofstream(x.c_str());
+
+ LogProb align_total_count=0;
+ alignment viterbi2alignment(l,m);
+ MODEL_TYPE ef(es,fs,tTable,aTable,dTable,nTable,p1,p0,dm_in);
+ viterbi_model2(ef,viterbi2alignment,pair_no-1);
+ Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >setOfGoodCenters(1);
+ set<alignment> alignments;
+ MoveSwapMatrix<MODEL_TYPE> *best = (setOfGoodCenters[0].first = new MoveSwapMatrix<MODEL_TYPE>(ef, viterbi2alignment));
+ MoveSwapMatrix<MODEL_TYPE> _viterbi(*best), *viterbi=&_viterbi; // please, don't delete this line (FJO)
+ if (Log)
+ logmsg << "VITERBI: " << alignment(_viterbi);
+ if( ef.isSubOptimal() )
+ setOfGoodCenters[0].second = hillClimb_std(*best);
+ else {
+ setOfGoodCenters[0].second = best->get_ef().prob_of_target_and_alignment_given_source(*best);
+ if( setOfGoodCenters[0].second==0 ) {
+ cerr << "PROBLEM: alignment is 0.\n";
+ best->get_ef().prob_of_target_and_alignment_given_source(*best,1);
+ }
}
- pair_no = 0 ; // sentence pair number
- // for each sentence pair in the corpus
- perp.clear() ; // clears cross_entrop & perplexity
- viterbiPerp.clear() ; // clears cross_entrop & perplexity
- sentPair sent ;
- int NCenter=0,NHillClimbed=0,NAlignment=0,NTotal=0,NBetterByPegging=0;
- while(sHandler1.getNextSentence(sent)){
- if( sent.eSent.size()==1||sent.fSent.size()==1 )
- continue;
- SentNr=sent.sentenceNo;
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 10000) == 0)
- cerr <<sent.sentenceNo << '\n';
- time_t sent_s = time(NULL) ;
- pair_no++ ;
- l = es.size() - 1 ;
- m = fs.size() - 1 ;
- if (Log){
- logmsg << "Processing sentence pair:\n\t";
- printSentencePair(es, fs, logmsg);
- for (i = 0 ; i <= l ; i++)
- logmsg << Elist.getVocabList()[es[i]].word << " ";
- logmsg << "\n\t";
- for (j = 1 ; j <= m ; j++)
- logmsg << Flist.getVocabList()[fs[j]].word << " ";
- logmsg << "\n";
- }
-
- LogProb align_total_count=0;
- alignment viterbi2alignment(l,m);
- MODEL_TYPE ef(es,fs,tTable,aTable,dTable,nTable,p1,p0,dm_in);
- viterbi_model2(ef,viterbi2alignment,pair_no-1);
- Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >setOfGoodCenters(1);
- set<alignment> alignments;
- MoveSwapMatrix<MODEL_TYPE> *best = (setOfGoodCenters[0].first = new MoveSwapMatrix<MODEL_TYPE>(ef, viterbi2alignment));
- MoveSwapMatrix<MODEL_TYPE> _viterbi(*best), *viterbi=&_viterbi; // please, don't delete this line (FJO)
- if (Log)
- logmsg << "VITERBI: " << alignment(_viterbi);
- if( ef.isSubOptimal() )
- setOfGoodCenters[0].second = hillClimb_std(*best);
- else{
- setOfGoodCenters[0].second = best->get_ef().prob_of_target_and_alignment_given_source(*best);
- if( setOfGoodCenters[0].second==0 ){
- cerr << "PROBLEM: alignment is 0.\n";
- best->get_ef().prob_of_target_and_alignment_given_source(*best,1);
- }
+ int bestAlignment=0;
+
+ for(unsigned int i=0; i<setOfGoodCenters.size(); ++i)
+ setOfGoodCenters[i].first->check();
+ alignments.insert(*best);
+ if (setOfGoodCenters[bestAlignment].second <= 0) {
+ if( PrintZeroScoreWarning++<100 ) {
+ cerr << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
+ cerr << alignment(*setOfGoodCenters[bestAlignment].first) ;
+ printSentencePair(es, fs, cerr);
+ if(Log) {
+ logmsg << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
+ printSentencePair(es, fs, logmsg);
}
- int bestAlignment=0;
-
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- alignments.insert(*best);
- if (setOfGoodCenters[bestAlignment].second <= 0){
- if( PrintZeroScoreWarning++<100 ){
- cerr << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- cerr << alignment(*setOfGoodCenters[bestAlignment].first) ;
- printSentencePair(es, fs, cerr);
- if(Log){
- logmsg << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- printSentencePair(es, fs, logmsg);
- }
- }
- else if(PrintZeroScoreWarning==100) {
- cerr << "ERROR: too many zero score warnings => no additional one will be printed\n";
+ } else if(PrintZeroScoreWarning==100) {
+ cerr << "ERROR: too many zero score warnings => no additional one will be printed\n";
+ }
+ setOfGoodCenters[bestAlignment].second=1e-300;
+ continue;
+ }
+ int nHillClimbed=1,nAlignment=1;
+ bool flagBetterByPegging=0;
+ if ( Peg ) {
+ const MoveSwapMatrix<MODEL_TYPE> *useMatrix=viterbi; // it is faster using 'best', ... (FJO)
+ Array2<short, vector<short> > linkCache(l+1, m+1, false);
+ if(UseLinkCache)for(unsigned int j=1; j<=m; j++)linkCache((*useMatrix)(j), j)=1;
+ for(PositionIndex j=1; j<=m; j++)for(PositionIndex i=0; i<=l; i++) {
+ nAlignment++;
+ if( i!=(*useMatrix)(j) && (UseLinkCache==0||linkCache(i,j)==0) &&
+ ef.get_t(i,j)>ef.get_t((*useMatrix)(j),j)*PEGGED_CUTOFF &&
+ (i != 0 || (m >= 2 * (useMatrix->fert(0)+1)))) {
+ MoveSwapMatrix<MODEL_TYPE> *BESTPEGGED=0;
+ LogProb peggedAlignmentScore;
+ nHillClimbed++;
+ if( ef.isSubOptimal() ) {
+ BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(*useMatrix);
+ BESTPEGGED->doMove(i, j);
+ peggedAlignmentScore= hillClimb_std(*BESTPEGGED, i,j);
+ } else {
+ alignment pegAlignment(l,m);
+ peggedAlignmentScore=viterbi_model2(ef,pegAlignment,pair_no-1,i,j);
+ BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(ef,pegAlignment);
+ massert( pegAlignment(j)==i );
}
- setOfGoodCenters[bestAlignment].second=1e-300;
- continue;
- }
- int nHillClimbed=1,nAlignment=1;
- bool flagBetterByPegging=0;
- if ( Peg ){
- const MoveSwapMatrix<MODEL_TYPE> *useMatrix=viterbi; // it is faster using 'best', ... (FJO)
- Array2<short, vector<short> > linkCache(l+1, m+1, false);
- if(UseLinkCache)for(unsigned int j=1;j<=m;j++)linkCache((*useMatrix)(j), j)=1;
- for(PositionIndex j=1;j<=m;j++)for(PositionIndex i=0;i<=l;i++){
- nAlignment++;
- if( i!=(*useMatrix)(j) && (UseLinkCache==0||linkCache(i,j)==0) &&
- ef.get_t(i,j)>ef.get_t((*useMatrix)(j),j)*PEGGED_CUTOFF &&
- (i != 0 || (m >= 2 * (useMatrix->fert(0)+1)))){
- MoveSwapMatrix<MODEL_TYPE> *BESTPEGGED=0;
- LogProb peggedAlignmentScore;
- nHillClimbed++;
- if( ef.isSubOptimal() ){
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(*useMatrix);
- BESTPEGGED->doMove(i, j);
- peggedAlignmentScore= hillClimb_std(*BESTPEGGED, i,j);
- }else{
- alignment pegAlignment(l,m);
- peggedAlignmentScore=viterbi_model2(ef,pegAlignment,pair_no-1,i,j);
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(ef,pegAlignment);
- massert( pegAlignment(j)==i );
- }
- if(UseLinkCache)
- for(unsigned int j=1;j<=m;j++)
- linkCache((*BESTPEGGED)(j), j)=1;
- if( peggedAlignmentScore>setOfGoodCenters[bestAlignment].second*(LogProb)PEGGED_CUTOFF && alignments.count(*BESTPEGGED)==0 ){
- if(extendCenterList(setOfGoodCenters,BESTPEGGED,peggedAlignmentScore)){
- alignments.insert(*BESTPEGGED);
- if( peggedAlignmentScore>1.00001*setOfGoodCenters[bestAlignment].second ){
- if( LogPeg ){
- cerr << "found better alignment by pegging " << pair_no << " " << peggedAlignmentScore/setOfGoodCenters[bestAlignment].second << '\n';
- cerr << "NEW BEST: " << alignment(*BESTPEGGED);
- cerr << "OLD : " << alignment(*setOfGoodCenters[bestAlignment].first);
- }
- flagBetterByPegging=1;
- bestAlignment=alignments.size()-1;
- }
- }
- assert( differences(*BESTPEGGED, *best)!=0 );
- BESTPEGGED=0;
- }else
- delete BESTPEGGED;
+ if(UseLinkCache)
+ for(unsigned int j=1; j<=m; j++)
+ linkCache((*BESTPEGGED)(j), j)=1;
+ if( peggedAlignmentScore>setOfGoodCenters[bestAlignment].second*(LogProb)PEGGED_CUTOFF && alignments.count(*BESTPEGGED)==0 ) {
+ if(extendCenterList(setOfGoodCenters,BESTPEGGED,peggedAlignmentScore)) {
+ alignments.insert(*BESTPEGGED);
+ if( peggedAlignmentScore>1.00001*setOfGoodCenters[bestAlignment].second ) {
+ if( LogPeg ) {
+ cerr << "found better alignment by pegging " << pair_no << " " << peggedAlignmentScore/setOfGoodCenters[bestAlignment].second << '\n';
+ cerr << "NEW BEST: " << alignment(*BESTPEGGED);
+ cerr << "OLD : " << alignment(*setOfGoodCenters[bestAlignment].first);
+ }
+ flagBetterByPegging=1;
+ bestAlignment=alignments.size()-1;
}
- }
- } // end of if(Peg)
- NBetterByPegging+=flagBetterByPegging;
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( LogPeg>1 )
- cout << "PEGGED: " << setOfGoodCenters.size() << " HILLCLIMBED:" << nHillClimbed << " TOTAL:" << nAlignment << " alignments." << '\n';
- int alTotal=collectCountsOverNeighborhood(setOfGoodCenters,es, fs, tTable, aCountTable,
- dCountTable, nCountTable, p1_count, p0_count,
- align_total_count, count, collect_counts, dm_out);
- if( LogPeg>1 ){
- cout << "ALL: " << alTotal << " from " << pow(float(l+1),float(m)) << '\n';
- massert(alTotal<=pow(double(l+1),double(m)));
+ }
+ assert( differences(*BESTPEGGED, *best)!=0 );
+ BESTPEGGED=0;
+ } else
+ delete BESTPEGGED;
+ }
}
- NCenter+=setOfGoodCenters.size();NHillClimbed+=nHillClimbed;NAlignment+=nAlignment;NTotal+=alTotal;
- perp.addFactor(log(double(align_total_count)), count, l, m,0);
- viterbiPerp.addFactor(log(double(setOfGoodCenters[bestAlignment].second)), count, l, m,0);
- massert(log(double(setOfGoodCenters[bestAlignment].second)) <= log(double(align_total_count)));
- if (dump_files||(FEWDUMPS&&sent.sentenceNo<1000)||(final&&(ONLYALDUMPS)) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, (setOfGoodCenters[bestAlignment].first)->getAlignment(), sent.sentenceNo,
- setOfGoodCenters[bestAlignment].second);
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( of3||(writeNBestErrorsFile&&pair_no<int(ReferenceAlignment.size())) ){
- vector<Als> als;
- for(unsigned int s=0;s<setOfGoodCenters.size();++s){
- const MoveSwapMatrix<MODEL_TYPE>&msc= *setOfGoodCenters[s].first;
- msc.check();
- double normalized_ascore=setOfGoodCenters[s].second;
- if( !msc.isCenterDeleted() )
- als.push_back( Als(s,0,0,normalized_ascore) );
-
- for(WordIndex j=1;j<=m;j++)
- for(WordIndex i=0;i<=l;i++)
- if( i!=msc(j)&& !msc.isDelMove(i,j) )
- als.push_back( Als(s,i,j,msc.cmove(i,j)*normalized_ascore));
- for(PositionIndex j1=1;j1<=m;j1++)
- for(PositionIndex j2=j1+1;j2<=m;j2++)
- if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
- als.push_back( Als(s,-j1,-j2,msc.cswap(j1,j2)*normalized_ascore));
- }
- sort(als.begin(),als.end());
- double sum=0,sum2=0;
- for(unsigned int i=0;i<als.size();++i)
- sum+=als[i].v;
- for(unsigned int i=0;i<min((unsigned int)als.size(),(unsigned int)PrintN);++i){
- alignment x=*setOfGoodCenters[als[i].s].first;
- if( !(als[i].a==0 && als[i].b==0) ){
- if( als[i].a<=0&&als[i].b<=0 )
- x.doSwap(-als[i].a,-als[i].b);
- else
- x.doMove(als[i].a,als[i].b);
- }
- if( of3&&i<PrintN )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(),*of3,x.getAlignment(), sent.sentenceNo,
- als[i].v/sum*count);
- sum2+=als[i].v;
- if( writeNBestErrorsFile ){
- if( pair_no<int(ReferenceAlignment.size()) ){
- int ALmissing=0,ALtoomuch=0,ALeventsMissing=0,ALeventsToomuch=0;
- vector<double> scores;
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],x.getAlignment(),l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
- ef.computeScores(x,scores);
- *writeNBestErrorsFile << ALmissing+ALtoomuch << ' ';
- for(unsigned int i=0;i<scores.size();++i)
- *writeNBestErrorsFile << ((scores[i]>0.0)?(-log(scores[i])):1.0e6) << ' ';
- *writeNBestErrorsFile << '\n';
- }
- }
- }
- if( writeNBestErrorsFile )
- *writeNBestErrorsFile << '\n';
+ } // end of if(Peg)
+ NBetterByPegging+=flagBetterByPegging;
+ for(unsigned int i=0; i<setOfGoodCenters.size(); ++i)
+ setOfGoodCenters[i].first->check();
+ if( LogPeg>1 )
+ cout << "PEGGED: " << setOfGoodCenters.size() << " HILLCLIMBED:" << nHillClimbed << " TOTAL:" << nAlignment << " alignments." << '\n';
+ int alTotal=collectCountsOverNeighborhood(setOfGoodCenters,es, fs, tTable, aCountTable,
+ dCountTable, nCountTable, p1_count, p0_count,
+ align_total_count, count, collect_counts, dm_out);
+ if( LogPeg>1 ) {
+ cout << "ALL: " << alTotal << " from " << pow(float(l+1),float(m)) << '\n';
+ massert(alTotal<=pow(double(l+1),double(m)));
+ }
+ NCenter+=setOfGoodCenters.size();
+ NHillClimbed+=nHillClimbed;
+ NAlignment+=nAlignment;
+ NTotal+=alTotal;
+ perp.addFactor(log(double(align_total_count)), count, l, m,0);
+ viterbiPerp.addFactor(log(double(setOfGoodCenters[bestAlignment].second)), count, l, m,0);
+ massert(log(double(setOfGoodCenters[bestAlignment].second)) <= log(double(align_total_count)));
+ if (dump_files||(FEWDUMPS&&sent.sentenceNo<1000)||(final&&(ONLYALDUMPS)) )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, (setOfGoodCenters[bestAlignment].first)->getAlignment(), sent.sentenceNo,
+ setOfGoodCenters[bestAlignment].second);
+ for(unsigned int i=0; i<setOfGoodCenters.size(); ++i)
+ setOfGoodCenters[i].first->check();
+ if( of3||(writeNBestErrorsFile&&pair_no<int(ReferenceAlignment.size())) ) {
+ vector<Als> als;
+ for(unsigned int s=0; s<setOfGoodCenters.size(); ++s) {
+ const MoveSwapMatrix<MODEL_TYPE>&msc= *setOfGoodCenters[s].first;
+ msc.check();
+ double normalized_ascore=setOfGoodCenters[s].second;
+ if( !msc.isCenterDeleted() )
+ als.push_back( Als(s,0,0,normalized_ascore) );
+
+ for(WordIndex j=1; j<=m; j++)
+ for(WordIndex i=0; i<=l; i++)
+ if( i!=msc(j)&& !msc.isDelMove(i,j) )
+ als.push_back( Als(s,i,j,msc.cmove(i,j)*normalized_ascore));
+ for(PositionIndex j1=1; j1<=m; j1++)
+ for(PositionIndex j2=j1+1; j2<=m; j2++)
+ if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
+ als.push_back( Als(s,-j1,-j2,msc.cswap(j1,j2)*normalized_ascore));
+ }
+ sort(als.begin(),als.end());
+ double sum=0,sum2=0;
+ for(unsigned int i=0; i<als.size(); ++i)
+ sum+=als[i].v;
+ for(unsigned int i=0; i<min((unsigned int)als.size(),(unsigned int)PrintN); ++i) {
+ alignment x=*setOfGoodCenters[als[i].s].first;
+ if( !(als[i].a==0 && als[i].b==0) ) {
+ if( als[i].a<=0&&als[i].b<=0 )
+ x.doSwap(-als[i].a,-als[i].b);
+ else
+ x.doMove(als[i].a,als[i].b);
+ }
+ if( of3&&i<PrintN )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(),*of3,x.getAlignment(), sent.sentenceNo,
+ als[i].v/sum*count);
+ sum2+=als[i].v;
+ if( writeNBestErrorsFile ) {
+ if( pair_no<int(ReferenceAlignment.size()) ) {
+ int ALmissing=0,ALtoomuch=0,ALeventsMissing=0,ALeventsToomuch=0;
+ vector<double> scores;
+ ErrorsInAlignment(ReferenceAlignment[pair_no-1],x.getAlignment(),l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
+ ef.computeScores(x,scores);
+ *writeNBestErrorsFile << ALmissing+ALtoomuch << ' ';
+ for(unsigned int i=0; i<scores.size(); ++i)
+ *writeNBestErrorsFile << ((scores[i]>0.0)?(-log(scores[i])):1.0e6) << ' ';
+ *writeNBestErrorsFile << '\n';
+ }
}
- addAL((setOfGoodCenters[bestAlignment].first)->getAlignment(),sent.sentenceNo,l);
- for(unsigned int i=0;i<setOfGoodCenters.size();i++)
- delete setOfGoodCenters[i].first;
- double period = difftime(time(NULL), sent_s);
- if (Verbose)
- cerr << "processing this sentence pair took : " << period
- << " seconds\n";
-
- } /* of sentence pair E, F */
- //sHandler1.rewind();
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
- of2.close();
- delete of3;
- delete writeNBestErrorsFile;
- double FSent=pair_no;
- cout << "#centers(pre/hillclimbed/real): " << NAlignment/FSent << " " << NHillClimbed/FSent << " " << NCenter/FSent << " #al: " << NTotal/FSent << " #alsophisticatedcountcollection: " << NumberOfAlignmentsInSophisticatedCountCollection/FSent << " #hcsteps: " << HillClimbingSteps/FSent << '\n';
- cout << "#peggingImprovements: " << NBetterByPegging/FSent << '\n';
+ }
+ if( writeNBestErrorsFile )
+ *writeNBestErrorsFile << '\n';
+ }
+ addAL((setOfGoodCenters[bestAlignment].first)->getAlignment(),sent.sentenceNo,l);
+ for(unsigned int i=0; i<setOfGoodCenters.size(); i++)
+ delete setOfGoodCenters[i].first;
+ double period = difftime(time(NULL), sent_s);
+ if (Verbose)
+ cerr << "processing this sentence pair took : " << period
+ << " seconds\n";
+
+ } /* of sentence pair E, F */
+ //sHandler1.rewind();
+ if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
+ of2.close();
+ delete of3;
+ delete writeNBestErrorsFile;
+ double FSent=pair_no;
+ cout << "#centers(pre/hillclimbed/real): " << NAlignment/FSent << " " << NHillClimbed/FSent << " " << NCenter/FSent << " #al: " << NTotal/FSent << " #alsophisticatedcountcollection: " << NumberOfAlignmentsInSophisticatedCountCollection/FSent << " #hcsteps: " << HillClimbingSteps/FSent << '\n';
+ cout << "#peggingImprovements: " << NBetterByPegging/FSent << '\n';
}
#endif
diff --git a/mgizapp/src/myassert.cpp b/mgizapp/src/myassert.cpp
index 2d49be8..26ef8f5 100644
--- a/mgizapp/src/myassert.cpp
+++ b/mgizapp/src/myassert.cpp
@@ -5,15 +5,15 @@
#ifndef STANDARD_ASSERT
void myerror(int line,const char *file,const char *expression)
{
- cerr << "(general.h):Assertion failed: '" << expression << "' ::: b "
- << file << ":" << line << endl;
- cout << "(general.h):Assertion failed: '" << expression << "' ::: b "
- << file << ":" << line << endl;
+ cerr << "(general.h):Assertion failed: '" << expression << "' ::: b "
+ << file << ":" << line << endl;
+ cout << "(general.h):Assertion failed: '" << expression << "' ::: b "
+ << file << ":" << line << endl;
}
void imyerror(int line,const char *file,const char *expression)
{
- cerr << "Error: '" << expression << "' ::: in Source " << file
- << ":" << line << endl;
+ cerr << "Error: '" << expression << "' ::: in Source " << file
+ << ":" << line << endl;
}
#endif
diff --git a/mgizapp/src/mymath.h b/mgizapp/src/mymath.h
index 47c5bfb..3e4142e 100644
--- a/mgizapp/src/mymath.h
+++ b/mgizapp/src/mymath.h
@@ -4,6 +4,9 @@
/* ---------------------------------------------------------------- */
#ifndef HEADER_MYMATH_DEFINED
#define HEADER_MYMATH_DEFINED
-inline double mfabs(double x){return (x<0)?(-x):x;}
+inline double mfabs(double x)
+{
+ return (x<0)?(-x):x;
+}
#include <cmath>
#endif
diff --git a/mgizapp/src/mystl.h b/mgizapp/src/mystl.h
index ae42159..aa915ef 100644
--- a/mgizapp/src/mystl.h
+++ b/mgizapp/src/mystl.h
@@ -52,15 +52,15 @@ inline double verfProb(int n1,int n2)
if( n1==1 )return prob*n1mult;
else if( n1==2 )return prob*n2mult;
else if( n1==3 )return prob*n3mult;
- else
- return prob;
+ else
+ return prob;
}
inline bool prefix(const string&x,const string&y)
{
if(y.size()>x.size() )
return 0;
- for(unsigned int i=0;i<y.size();++i)
+ for(unsigned int i=0; i<y.size(); ++i)
if( y[i]!=x[i] )
return 0;
return 1;
@@ -115,21 +115,23 @@ float rel_lev(const T&s1,const T&s2)
return min(1.0,lev(s1,s2)/(double)s1.size());
}*/
-template<class V> int Hash(const pair<V,V>&a)
-{ return Hash(a.first)+13001*Hash(a.second); }
+template<class V> int Hash(const pair<V,V>&a)
+{
+ return Hash(a.first)+13001*Hash(a.second);
+}
template<class T1,class T2>
ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
-{
+{
out << "(" << ir.first << "," << ir.second << ")";
return out;
-}
+}
inline int Hash(const string& s)
{
int sum=0;
string::const_iterator i=s.begin(),end=s.end();
- for(;i!=end;i++)sum=5*sum+(*i);
+ for(; i!=end; i++)sum=5*sum+(*i);
return sum;
}
template<class A,class B,class C>
@@ -139,13 +141,15 @@ public:
A a;
B b;
C c;
- tri(){};
+ tri() {};
tri(const A&_a,const B&_b,const C&_c)
: a(_a),b(_b),c(_c) {}
};
template<class A,class B,class C>
bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
+{
+ return x.a==y.a&&x.b==y.b&&x.c==y.c;
+}
template<class A,class B,class C>
bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
@@ -166,29 +170,36 @@ template<class T ,class _Pr = less<T> >
class my_hash
{
public:
- int operator()(const T&t)const {return Hash(t);}
+ int operator()(const T&t)const {
+ return Hash(t);
+ }
#ifdef WIN32
- enum
- { // parameters for hash table
- bucket_size = 1 // 0 < bucket_size
- };
- my_hash()
- : comp()
- { // construct with default comparator
- }
-
- my_hash(_Pr _Pred)
- : comp(_Pred)
- { // construct with _Pred comparator
- }
-protected:
- _Pr comp;
+ enum {
+ // parameters for hash table
+ bucket_size = 1 // 0 < bucket_size
+ };
+ my_hash()
+ : comp() {
+ // construct with default comparator
+ }
+
+ my_hash(_Pr _Pred)
+ : comp(_Pred) {
+ // construct with _Pred comparator
+ }
+protected:
+ _Pr comp;
public:
- int operator()(const T&t , const T&t1)const {return comp(t,t1);}
+ int operator()(const T&t , const T&t1)const {
+ return comp(t,t1);
+ }
#endif
};
-inline int Hash(int value) { return value; }
+inline int Hash(int value)
+{
+ return value;
+}
#define MY_HASH_BASE hash_map<A,B,my_hash<A> >
template<class A,class B>
@@ -200,29 +211,28 @@ public:
leda_h_array() : MY_HASH_BASE() {}
leda_h_array(const B&_init)
: MY_HASH_BASE(),init(_init) {}
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename MY_HASH_BASE::const_iterator pos=find(a);
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename MY_HASH_BASE::iterator pos=find(a);
- if( pos==this->end() )
- {
- insert(MY_HASH_BASE::value_type(a,init));
- pos=find(a);
- iassert(pos!=this->end());
- }
+ bool defined(const A&a) const {
+ return find(a)!=this->end();
+ }
+ const B&operator[](const A&a)const {
+ typename MY_HASH_BASE::const_iterator pos=find(a);
+ if( pos==this->end() )
+ return init;
+ else
return pos->second;
+ }
+ B&operator[](const A&a) {
+ typename MY_HASH_BASE::iterator pos=find(a);
+ if( pos==this->end() ) {
+ insert(MY_HASH_BASE::value_type(a,init));
+ pos=find(a);
+ iassert(pos!=this->end());
}
- const B&initValue()const
- {return init;}
+ return pos->second;
+ }
+ const B&initValue()const {
+ return init;
+ }
};
#define forall_defined_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
@@ -232,13 +242,12 @@ ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
T t;
bool makeNl=0;
out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
+ forall_defined_h(T,U,t,w) {
+ if( makeNl )
+ out << "\n ";
+ out << "EL:" << t << " INH:" << w[t] << ".";
+ makeNl=1;
+ }
return out << "}\n";
}
@@ -253,21 +262,20 @@ bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
{
A v;
forall_defined_h(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
+ if( !( p1[v]==p2[v]) ) return 0;
forall_defined_h(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
+ if( !( p1[v]==p2[v]) ) return 0;
+ return 1;
}
template<class T>
int count_elements(T a,T b)
{
int c=0;
- while(a!=b)
- {
- a++;
- c++;
- }
+ while(a!=b) {
+ a++;
+ c++;
+ }
return c;
}
@@ -275,17 +283,16 @@ template<class T>
T normalize_if_possible_with_increment(T*a,T*b,int increment)
{
T sum=0;
- for(T*i=a;i!=b;i+=increment)
+ for(T*i=a; i!=b; i+=increment)
sum+=*i;
if( sum )
- for(T*i=a;i!=b;i+=increment)
+ for(T*i=a; i!=b; i+=increment)
*i/=sum;
- else
- {
- T factor=increment/(b-a);
- for(T*i=a;i!=b;i+=increment)
- *i=factor;
- }
+ else {
+ T factor=increment/(b-a);
+ for(T*i=a; i!=b; i+=increment)
+ *i=factor;
+ }
return sum;
}
@@ -293,15 +300,14 @@ template<class T>
inline int m_comp_3way(T a,T b,int n)
{
int _n=0;
- while((_n++<n) && a && b)
- {
- const typename T::value_type &aa=*a;
- const typename T::value_type &bb=*b;
- if( aa<bb )return 1;
- if( bb<aa )return -1;
- ++a;
- ++b;
- }
+ while((_n++<n) && a && b) {
+ const typename T::value_type &aa=*a;
+ const typename T::value_type &bb=*b;
+ if( aa<bb )return 1;
+ if( bb<aa )return -1;
+ ++a;
+ ++b;
+ }
return 0;
}
@@ -309,10 +315,10 @@ template<class T>
void smooth_standard(T*a,T*b,double p)
{
int n=b-a;
- if( n==0 )
+ if( n==0 )
return;
double pp=p/n;
- for(T*i=a;i!=b;++i)
+ for(T*i=a; i!=b; ++i)
*i = (1.0-p)*(*i)+pp;
}
diff --git a/mgizapp/src/parse.cpp b/mgizapp/src/parse.cpp
index c8eb570..a0162c1 100644
--- a/mgizapp/src/parse.cpp
+++ b/mgizapp/src/parse.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,82 +36,76 @@ USA.
extern bool ONLYALDUMPS;
void parseConfigFile (char * fname )
- // This functions reads in the configuration file to set up some run-time
- // parameters. The parameters are global variables that are defined in
- // main.cc and used all over the place in the program
- // The format of the configuration file can be explained in the following way
- // FORMAT:
- // the character '\n' separates lines ..
- // lines that start with "//" (skipping over white spaces are considered
- // as comments and will be ignored.
- // Any other line is considered as an attribute setting instruction and it
- // is divided into haves (separated by a colon ":"). The first half is the
- // attribute value which consists of the concatenation of all non-white space
- // tokens before the colon. These tokens will have spaces eseparating them.
- // The attribute vlue is the first token after the colon (any thing after
- // it will be ignored ;
- // For example :
- // if the configuration file has the following entry:
- //
- // NO. ITERATIONS MODEL 2 : 10
- //
- // then the attribute is "NO. ITERATIONS MODEL 2" , and the attribute value
- // is "10" (these do not include the quotation marks).
+// This functions reads in the configuration file to set up some run-time
+// parameters. The parameters are global variables that are defined in
+// main.cc and used all over the place in the program
+// The format of the configuration file can be explained in the following way
+// FORMAT:
+// the character '\n' separates lines ..
+// lines that start with "//" (skipping over white spaces are considered
+// as comments and will be ignored.
+// Any other line is considered as an attribute setting instruction and it
+// is divided into haves (separated by a colon ":"). The first half is the
+// attribute value which consists of the concatenation of all non-white space
+// tokens before the colon. These tokens will have spaces eseparating them.
+// The attribute vlue is the first token after the colon (any thing after
+// it will be ignored ;
+// For example :
+// if the configuration file has the following entry:
+//
+// NO. ITERATIONS MODEL 2 : 10
+//
+// then the attribute is "NO. ITERATIONS MODEL 2" , and the attribute value
+// is "10" (these do not include the quotation marks).
{
string line, word, attrib, attribval ;
ifstream Config_File(fname);
- if(!Config_File){
+ if(!Config_File) {
cerr << "ERROR: Cannot open configuration file " << fname << "!\n" ;
exit(1);
}
cout << "The following options are from the config file and will be overwritten by any command line options.\n";
-
- while(getline(Config_File, line)){
+
+ while(getline(Config_File, line)) {
istrstream buffer(line.c_str());
word = attrib = attribval = "" ;
buffer >> word ;
- if (word != "//"){ // if line does not start with "//" (i.e. not a comment)
+ if (word != "//") { // if line does not start with "//" (i.e. not a comment)
attrib = word ;
- while((buffer >> word) && (word != ":")){
- attrib += " " + word ;
- }
- if(!(buffer >> attribval))
- {
- istrstream buffer2(line.c_str());
- buffer2>>attrib;
- buffer2>>attribval;
- }
+ while((buffer >> word) && (word != ":")) {
+ attrib += " " + word ;
+ }
+ if(!(buffer >> attribval)) {
+ istrstream buffer2(line.c_str());
+ buffer2>>attrib;
+ buffer2>>attribval;
+ }
// This# is where (1) the configuration file is defined and
// (2) parsing of its attributes occurs.
-
- if(attrib == "t FILE"){
- t_Filename = attribval;
- cout << "\tt file: " << t_Filename << '\n';
- }
- else if(attrib == "a FILE"){
- a_Filename = attribval;
- cout << "\ta file: " << a_Filename << '\n';
- }
- else if(attrib == "d FILE"){
- d_Filename = attribval;
- cout << "\td file: " << d_Filename << '\n';
- }
- else if(attrib == "n FILE"){
- n_Filename = attribval;
- cout << "\tn file: " << n_Filename << '\n';
- }
- else if(attrib == "p0 FILE"){
- p0_Filename = attribval;
- cout << "\tp0 file: " << p0_Filename << '\n';
- }
- else if ( line == ""){}
+
+ if(attrib == "t FILE") {
+ t_Filename = attribval;
+ cout << "\tt file: " << t_Filename << '\n';
+ } else if(attrib == "a FILE") {
+ a_Filename = attribval;
+ cout << "\ta file: " << a_Filename << '\n';
+ } else if(attrib == "d FILE") {
+ d_Filename = attribval;
+ cout << "\td file: " << d_Filename << '\n';
+ } else if(attrib == "n FILE") {
+ n_Filename = attribval;
+ cout << "\tn file: " << n_Filename << '\n';
+ } else if(attrib == "p0 FILE") {
+ p0_Filename = attribval;
+ cout << "\tp0 file: " << p0_Filename << '\n';
+ } else if ( line == "") {}
else if( !makeSetCommand(attrib,attribval,getGlobalParSet(),2) )
- cerr << "ERROR: Unrecognized attribute :" << attrib << '\n';
+ cerr << "ERROR: Unrecognized attribute :" << attrib << '\n';
}
}
}
@@ -120,31 +114,29 @@ void parseConfigFile (char * fname )
void parseArguments(int argc, char *argv[])
{
int arg = 1;
-
- if(!strcmp(argv[1], "--h") || !strcmp(argv[1], "--help")){
- printHelp();
- exit(0);
- }
- if( argv[1][0]=='-' )
- arg=0;
- else
- parseConfigFile(argv[1]);
- while(++arg<argc){
- if( strlen(argv[arg])>2 && argv[arg][0]=='-' && argv[arg][1]=='-' ) {
- if( !makeSetCommand(argv[arg]+1,"1",getGlobalParSet(),2))
- cerr << "WARNING: ignoring unrecognized option: "<< argv[arg] << '\n' ;
- }
- else if( arg+1<argc && !makeSetCommand(argv[arg],argv[arg+1],getGlobalParSet(),2))
- cerr << "WARNING: ignoring unrecognized option: "<< argv[arg] << '\n' ;
- else
- {
- arg++;
- }
+
+ if(!strcmp(argv[1], "--h") || !strcmp(argv[1], "--help")) {
+ printHelp();
+ exit(0);
+ }
+ if( argv[1][0]=='-' )
+ arg=0;
+ else
+ parseConfigFile(argv[1]);
+ while(++arg<argc) {
+ if( strlen(argv[arg])>2 && argv[arg][0]=='-' && argv[arg][1]=='-' ) {
+ if( !makeSetCommand(argv[arg]+1,"1",getGlobalParSet(),2))
+ cerr << "WARNING: ignoring unrecognized option: "<< argv[arg] << '\n' ;
+ } else if( arg+1<argc && !makeSetCommand(argv[arg],argv[arg+1],getGlobalParSet(),2))
+ cerr << "WARNING: ignoring unrecognized option: "<< argv[arg] << '\n' ;
+ else {
+ arg++;
}
- if( OPath.length() )
- OPath+="/";
- Prefix = (OPath + Prefix);
- LogFilename = (OPath + LogFilename);
- printGIZAPars(cout);
+ }
+ if( OPath.length() )
+ OPath+="/";
+ Prefix = (OPath + Prefix);
+ LogFilename = (OPath + LogFilename);
+ printGIZAPars(cout);
}
diff --git a/mgizapp/src/plain2snt.cpp b/mgizapp/src/plain2snt.cpp
index 663d376..b2c1df8 100644
--- a/mgizapp/src/plain2snt.cpp
+++ b/mgizapp/src/plain2snt.cpp
@@ -14,7 +14,7 @@ int main(int argc,char**argv)
string snt1(""), snt2(""), vcb1(""), vcb2("");
vector<double>weights;
vector<string>filenames;
- for(int i=1;i<argc;++i)
+ for(int i=1; i<argc; ++i)
if(string(argv[i])=="-weight")
weights.push_back(atof(argv[++i]));
else if(string(argv[i])=="-snt1")
@@ -27,28 +27,26 @@ int main(int argc,char**argv)
vcb2=argv[++i];
else
filenames.push_back(argv[i]);
-
- if((filenames.size()%2)==1||filenames.size()==0 )
- {
- cerr << argv[0] << " txt1 txt2 [txt3 txt4 -weight w -vcb1 output1.vcb -vcb2 output2.vcb -snt1 output1_output2.snt -snt2 output2_output1.snt]\n";
- cerr << " Converts plain text into GIZA++ snt-format.\n";
- exit(1);
- }
+
+ if((filenames.size()%2)==1||filenames.size()==0 ) {
+ cerr << argv[0] << " txt1 txt2 [txt3 txt4 -weight w -vcb1 output1.vcb -vcb2 output2.vcb -snt1 output1_output2.snt -snt2 output2_output1.snt]\n";
+ cerr << " Converts plain text into GIZA++ snt-format.\n";
+ exit(1);
+ }
string line1,line2,word;
map<string,int> v1,v2;
map<string,int> id1,id2;
vector<string> iid1(2),iid2(2);
-
+
string w1(filenames[0]);
string w2(filenames[1]);
-
+
if( w1.length()>4&&w2.length()>4&&((w1.substr(w1.length()-4,w1.length())==".tok" && w2.substr(w2.length()-4,w2.length())==".tok" )||
- (w1.substr(w1.length()-4,w1.length())==".txt" && w2.substr(w2.length()-4,w2.length())==".txt" ) ))
- {
- w1=w1.substr(0,w1.length()-4);
- w2=w2.substr(0,w2.length()-4);
- cerr << "w1:"<< w1 << " w2:" << w2 << endl;
- }
+ (w1.substr(w1.length()-4,w1.length())==".txt" && w2.substr(w2.length()-4,w2.length())==".txt" ) )) {
+ w1=w1.substr(0,w1.length()-4);
+ w2=w2.substr(0,w2.length()-4);
+ cerr << "w1:"<< w1 << " w2:" << w2 << endl;
+ }
string vocab1(w1),vocab2(w2);
unsigned int slashpos=vocab1.rfind('/')+1;
@@ -83,61 +81,53 @@ int main(int argc,char**argv)
}
ofstream ovocab1(vocab1.c_str()),ovocab2(vocab2.c_str()),osnt1(snt1.c_str()),osnt2(snt2.c_str());
- for(unsigned int i=0;i<filenames.size();i+=2)
- {
- ifstream i1(filenames[i].c_str()),i2(filenames[i+1].c_str());
- if(!i1)cerr << "WARNING: " << filenames[i] << " cannot be read.\n";
- if(!i2)cerr << "WARNING: " << filenames[i+1] << " cannot be read.\n";
- while(getline(i1,line1) && getline(i2,line2) )
- {
- vector<string> t1,t2;
- istrstream ii1(line1.c_str());
- while(ii1>>word)
- {
- t1.push_back(word);
- v1[word]++;
- if( id1.find(word)==id1.end() )
- {
- iid1.push_back(word);
- id1[word]=iid1.size()-1;
- }
- }
- istrstream ii2(line2.c_str());
- while(ii2>>word)
- {
- t2.push_back(word);
- v2[word]++;
- if( id2.find(word)==id2.end() )
- {
- iid2.push_back(word);
- id2[word]=iid2.size()-1;
- }
- }
- double w=1.0;
- if( i/2<weights.size() )
- w=weights[i/2];
- if( t1.size()&&t2.size() )
- {
- osnt1 << w << "\n";
- for(unsigned int j=0;j<t1.size();++j)osnt1 << id1[t1[j]] << ' ';
- osnt1 << '\n';
- for(unsigned int j=0;j<t2.size();++j)osnt1 << id2[t2[j]] << ' ';
- osnt1 << '\n';
+ for(unsigned int i=0; i<filenames.size(); i+=2) {
+ ifstream i1(filenames[i].c_str()),i2(filenames[i+1].c_str());
+ if(!i1)cerr << "WARNING: " << filenames[i] << " cannot be read.\n";
+ if(!i2)cerr << "WARNING: " << filenames[i+1] << " cannot be read.\n";
+ while(getline(i1,line1) && getline(i2,line2) ) {
+ vector<string> t1,t2;
+ istrstream ii1(line1.c_str());
+ while(ii1>>word) {
+ t1.push_back(word);
+ v1[word]++;
+ if( id1.find(word)==id1.end() ) {
+ iid1.push_back(word);
+ id1[word]=iid1.size()-1;
+ }
+ }
+ istrstream ii2(line2.c_str());
+ while(ii2>>word) {
+ t2.push_back(word);
+ v2[word]++;
+ if( id2.find(word)==id2.end() ) {
+ iid2.push_back(word);
+ id2[word]=iid2.size()-1;
+ }
+ }
+ double w=1.0;
+ if( i/2<weights.size() )
+ w=weights[i/2];
+ if( t1.size()&&t2.size() ) {
+ osnt1 << w << "\n";
+ for(unsigned int j=0; j<t1.size(); ++j)osnt1 << id1[t1[j]] << ' ';
+ osnt1 << '\n';
+ for(unsigned int j=0; j<t2.size(); ++j)osnt1 << id2[t2[j]] << ' ';
+ osnt1 << '\n';
- osnt2 << w << "\n";
- for(unsigned int j=0;j<t2.size();++j)osnt2 << id2[t2[j]] << ' ';
- osnt2 << '\n';
- for(unsigned int j=0;j<t1.size();++j)osnt2 << id1[t1[j]] << ' ';
- osnt2 << '\n';
- }
- else
- cerr << "WARNING: filtered out empty sentence (source: " << filenames[i] << " " << t1.size() <<
- " target: " << filenames[i+1] << " " << t2.size() << ").\n";
- }
+ osnt2 << w << "\n";
+ for(unsigned int j=0; j<t2.size(); ++j)osnt2 << id2[t2[j]] << ' ';
+ osnt2 << '\n';
+ for(unsigned int j=0; j<t1.size(); ++j)osnt2 << id1[t1[j]] << ' ';
+ osnt2 << '\n';
+ } else
+ cerr << "WARNING: filtered out empty sentence (source: " << filenames[i] << " " << t1.size() <<
+ " target: " << filenames[i+1] << " " << t2.size() << ").\n";
}
-
- for(unsigned int i=2;i<iid1.size();++i)
+ }
+
+ for(unsigned int i=2; i<iid1.size(); ++i)
ovocab1 << i << ' ' << iid1[i] << ' ' << v1[iid1[i]] << '\n';
- for(unsigned int i=2;i<iid2.size();++i)
+ for(unsigned int i=2; i<iid2.size(); ++i)
ovocab2 << i << ' ' << iid2[i] << ' ' << v2[iid2[i]] << '\n';
}
diff --git a/mgizapp/src/reports.cpp b/mgizapp/src/reports.cpp
index 3ce3e80..577435c 100644
--- a/mgizapp/src/reports.cpp
+++ b/mgizapp/src/reports.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -23,7 +23,7 @@ USA.
#include <ctime>
#include <set>
#include "defs.h"
-#include "vocab.h"
+#include "vocab.h"
#include "Perplexity.h"
#include "getSentence.h"
#include "TTables.h"
@@ -43,50 +43,50 @@ void printHelp(void)
}
-void generatePerplexityReport(const Perplexity& trainperp,
- const Perplexity& testperp,
- const Perplexity& trainVperp,
- const Perplexity& testVperp,
- ostream& of, int trainsize, int testsize,
- bool)
+void generatePerplexityReport(const Perplexity& trainperp,
+ const Perplexity& testperp,
+ const Perplexity& trainVperp,
+ const Perplexity& testVperp,
+ ostream& of, int trainsize, int testsize,
+ bool)
{
unsigned int i, m;
unsigned int m1 = max(trainperp.size(), testperp.size());
unsigned int m2 = max(trainVperp.size(), testVperp.size());
m = max(m1,m2);
of << "#trnsz\ttstsz\titer\tmodel\ttrn-pp\t\ttest-pp\t\ttrn-vit-pp\t\ttst-vit-pp\n";
- for (i = 0 ; i <m ; i++){
+ for (i = 0 ; i <m ; i++) {
of << trainsize << '\t' << testsize << '\t' << i<< '\t' << trainperp.modelid[i] << '\t';
if (i < trainperp.perp.size())
of << trainperp.perp[i] << "\t\t" ;
- else
+ else
of << "N/A\t\t";
if (i<testperp.perp.size())
of << testperp.perp[i] << "\t\t" ;
- else
+ else
of << "N/A\t\t";
if (i < trainVperp.perp.size())
of << trainVperp.perp[i] << "\t\t" ;
- else
+ else
of << "N/A\t";
if (i< testVperp.perp.size())
of << testVperp.perp[i] << '\n' ;
- else
+ else
of << "N/A\n";
}
}
-void printSentencePair(Vector<WordIndex>& es,
- Vector<WordIndex>& fs,
- ostream& of)
-
- // just writes a sentece pair to the give output stream, one sentence pair line
- // it writes token ids not actual tokens.
+void printSentencePair(Vector<WordIndex>& es,
+ Vector<WordIndex>& fs,
+ ostream& of)
+
+// just writes a sentece pair to the give output stream, one sentence pair line
+// it writes token ids not actual tokens.
{
WordIndex i, j, l, m;
l = es.size() - 1;
m = fs.size() - 1;
- of << "Source sentence length : " << l << " , target : " << m << "\n";
+ of << "Source sentence length : " << l << " , target : " << m << "\n";
for (i = 1 ; i <= l ; i++)
of << es[i] << ' ';
of << "\n";
@@ -97,107 +97,103 @@ void printSentencePair(Vector<WordIndex>& es,
}
extern short CompactAlignmentFormat;
-void printAlignToFile(const Vector<WordIndex>& es,
- const Vector<WordIndex>& fs,
- const Vector<WordEntry>& evlist,
- const Vector<WordEntry>& fvlist,
- ostream& of2,
- const Vector<WordIndex>& viterbi_alignment,
- int pair_no, double alignment_score)
-
- // prints the given alignment to alignments file (given it stream pointer)
- // in a format recognizable by the draw-alignment tool ... which is of the
- // example (each line triple is one sentence pair):
- // # sentence caption
- // target_word_1 target_word_2 ..... target_word_m
- // source_word_1 ({ x y z }) source_word_2 ({ }) .. source_word_n ({w})
- // where x, y, z, and w are positions of target words that each source word
- // is connected to.
+void printAlignToFile(const Vector<WordIndex>& es,
+ const Vector<WordIndex>& fs,
+ const Vector<WordEntry>& evlist,
+ const Vector<WordEntry>& fvlist,
+ ostream& of2,
+ const Vector<WordIndex>& viterbi_alignment,
+ int pair_no, double alignment_score)
+
+// prints the given alignment to alignments file (given it stream pointer)
+// in a format recognizable by the draw-alignment tool ... which is of the
+// example (each line triple is one sentence pair):
+// # sentence caption
+// target_word_1 target_word_2 ..... target_word_m
+// source_word_1 ({ x y z }) source_word_2 ({ }) .. source_word_n ({w})
+// where x, y, z, and w are positions of target words that each source word
+// is connected to.
{
WordIndex l, m;
- Vector<Vector<WordIndex> > translations(es.size()); // each english words has a vector
+ Vector<Vector<WordIndex> > translations(es.size()); // each english words has a vector
// of zero or more translations .
l = es.size() - 1;
m = fs.size() - 1;
- if( CompactAlignmentFormat )
- {
- for (WordIndex j = 1 ; j <= m ; j++)
- if( viterbi_alignment[j] )
- of2 << viterbi_alignment[j]-1 << ' ' << j-1 << ' ';
- of2 << '\n';
+ if( CompactAlignmentFormat ) {
+ for (WordIndex j = 1 ; j <= m ; j++)
+ if( viterbi_alignment[j] )
+ of2 << viterbi_alignment[j]-1 << ' ' << j-1 << ' ';
+ of2 << '\n';
+ } else {
+ of2 << "# Sentence pair (" << pair_no <<") source length " << l << " target length "<< m <<
+ " alignment score : "<< alignment_score << '\n';
+ for (WordIndex j = 1 ; j <= m ; j++) {
+ of2 << fvlist[fs[j]].word << " " ;
+ translations[viterbi_alignment[j]].push_back(j);
}
- else
- {
- of2 << "# Sentence pair (" << pair_no <<") source length " << l << " target length "<< m <<
- " alignment score : "<< alignment_score << '\n';
- for (WordIndex j = 1 ; j <= m ; j++){
- of2 << fvlist[fs[j]].word << " " ;
- translations[viterbi_alignment[j]].push_back(j);
- }
- of2 << '\n';
-
- for (WordIndex i = 0 ; i <= l ; i++){
- of2 << evlist[es[i]].word << " ({ " ;
- for (WordIndex j = 0 ; j < translations[i].size() ; j++)
- of2 << translations[i][j] << " " ;
- of2 << "}) ";
- }
- of2 << '\n';
+ of2 << '\n';
+
+ for (WordIndex i = 0 ; i <= l ; i++) {
+ of2 << evlist[es[i]].word << " ({ " ;
+ for (WordIndex j = 0 ; j < translations[i].size() ; j++)
+ of2 << translations[i][j] << " " ;
+ of2 << "}) ";
}
+ of2 << '\n';
+ }
}
-void printOverlapReport(const tmodel<COUNT, PROB>& tTable,
- sentenceHandler& testHandler, vcbList& trainEList,
- vcbList& trainFList, vcbList& testEList, vcbList& testFList)
+void printOverlapReport(const tmodel<COUNT, PROB>& tTable,
+ sentenceHandler& testHandler, vcbList& trainEList,
+ vcbList& trainFList, vcbList& testEList, vcbList& testFList)
{
set<pair<WordIndex, WordIndex> > testCoocur ;
sentPair s ;
/* string unseenCoocurFile = Prefix + ".tst.unseen.cooc" ;
ofstream of_unseenCoocur(unseenCoocurFile.c_str());
-
+
string seenCoocurFile = Prefix + ".tst.seen.cooc" ;
ofstream of_seenCoocur(seenCoocurFile.c_str());
- */
+ */
testHandler.rewind();
int seen_coocur = 0, unseen_coocur = 0, srcUnk = 0, trgUnk = 0 ;
- while(testHandler.getNextSentence(s)){
+ while(testHandler.getNextSentence(s)) {
for (WordIndex i = 1 ; i < s.eSent.size() ; i++)
- for (WordIndex j = 1 ; j < s.fSent.size() ; j++)
- testCoocur.insert(pair<WordIndex, WordIndex> (s.eSent[i], s.fSent[j])) ;
+ for (WordIndex j = 1 ; j < s.fSent.size() ; j++)
+ testCoocur.insert(pair<WordIndex, WordIndex> (s.eSent[i], s.fSent[j])) ;
}
set<pair<WordIndex, WordIndex> >::const_iterator i ;
- for (i = testCoocur.begin() ; i != testCoocur.end() ; ++i){
- if (tTable.getProb((*i).first, (*i).second) > PROB_SMOOTH){
+ for (i = testCoocur.begin() ; i != testCoocur.end() ; ++i) {
+ if (tTable.getProb((*i).first, (*i).second) > PROB_SMOOTH) {
seen_coocur ++ ;
// of_seenCoocur << (*i).first << ' ' << (*i).second << '\n';
- }
- else {
+ } else {
unseen_coocur++;
// of_unseenCoocur << (*i).first << ' ' << (*i).second << '\n';
}
}
-
+
string trgUnkFile = Prefix + ".tst.trg.unk" ;
ofstream of_trgUnk(trgUnkFile.c_str());
- for (WordIndex i = 0 ; i < testFList.getVocabList().size() && i < testFList.uniqTokens();i++)
- if (testFList.getVocabList()[i].freq > 0 && trainFList.getVocabList()[i].freq <= 0){
+ for (WordIndex i = 0 ; i < testFList.getVocabList().size() && i < testFList.uniqTokens(); i++)
+ if (testFList.getVocabList()[i].freq > 0 && trainFList.getVocabList()[i].freq <= 0) {
of_trgUnk << i << ' ' << testFList.getVocabList()[i].word << ' ' << testFList.getVocabList()[i].freq
- << '\n';
+ << '\n';
trgUnk++ ;
}
string srcUnkFile = Prefix + ".tst.src.unk" ;
ofstream of_srcUnk(srcUnkFile.c_str());
- for (WordIndex j = 0 ; j < testEList.getVocabList().size() && j < testEList.uniqTokens();j++)
- if (testEList.getVocabList()[j].freq > 0 && trainEList.getVocabList()[j].freq <= 0){
+ for (WordIndex j = 0 ; j < testEList.getVocabList().size() && j < testEList.uniqTokens(); j++)
+ if (testEList.getVocabList()[j].freq > 0 && trainEList.getVocabList()[j].freq <= 0) {
srcUnk++ ;
of_srcUnk << j << ' ' << testEList.getVocabList()[j].word << ' ' << testEList.getVocabList()[j].freq
- << '\n';
+ << '\n';
}
- string summaryFile = Prefix + ".tst.stats" ;
+ string summaryFile = Prefix + ".tst.stats" ;
ofstream of_summary(summaryFile.c_str());
of_summary << "\t\t STATISTICS ABOUT TEST CORPUS\n\n";
of_summary << "source unique tokens: " << testEList.uniqTokens() << '\n';
@@ -206,6 +202,6 @@ void printOverlapReport(const tmodel<COUNT, PROB>& tTable,
of_summary << "unique unseen target tokens: " << trgUnk << '\n';
of_summary << "cooccurrences not found in the final t table: " << unseen_coocur << '\n';
of_summary << "cooccurrences found in the final t table: " << seen_coocur << '\n';
-
+
}
diff --git a/mgizapp/src/snt2cooc-reduce-mem-preprocess.cpp b/mgizapp/src/snt2cooc-reduce-mem-preprocess.cpp
index bafcb4d..c969774 100644
--- a/mgizapp/src/snt2cooc-reduce-mem-preprocess.cpp
+++ b/mgizapp/src/snt2cooc-reduce-mem-preprocess.cpp
@@ -12,65 +12,58 @@ using namespace std;
void readVoc(istream&in,map<string,string>&voc)
{
- string line,s1,s2;
- voc["1"]="UNK";
- if( !in )cerr <<"Vocabulary does not exist.\n";
- while(getline(in,line))
- {
- istrstream eingabe(line.c_str());
- if( !(eingabe>>s1>>s2))
- cerr << "ERROR in vocabulary '" << line << "'\n";
- voc[s1]=s2;
- }
+ string line,s1,s2;
+ voc["1"]="UNK";
+ if( !in )cerr <<"Vocabulary does not exist.\n";
+ while(getline(in,line)) {
+ istrstream eingabe(line.c_str());
+ if( !(eingabe>>s1>>s2))
+ cerr << "ERROR in vocabulary '" << line << "'\n";
+ voc[s1]=s2;
+ }
}
int maxElems=0;
int main(int argc,char **argv)
{
- if( argc!=4&&argc!=5 )
- {
- cerr << "Usage: " << argv[0] << " output vcb1 vcb2 snt12 \n";
- cerr << "Converts GIZA++ snt-format into plain text.\n";
- exit(1);
- }
- if( argc==6 )
- {
- if(string(argv[4])!="-counts")
- cerr << "ERROR: wrong option " << argv[5] << endl;
- maxElems=10000000;
+ if( argc!=4&&argc!=5 ) {
+ cerr << "Usage: " << argv[0] << " output vcb1 vcb2 snt12 \n";
+ cerr << "Converts GIZA++ snt-format into plain text.\n";
+ exit(1);
+ }
+ if( argc==6 ) {
+ if(string(argv[4])!="-counts")
+ cerr << "ERROR: wrong option " << argv[5] << endl;
+ maxElems=10000000;
+ }
+ ifstream v1(argv[1]),v2(argv[2]),t(argv[3]);
+ map<string,string>voc1,voc2;
+ readVoc(v1,voc1);
+ readVoc(v2,voc2);
+ string line1,line2,line3;
+ int nLine=0;
+ while(getline(t,line1)&&getline(t,line2)&&getline(t,line3)) {
+ istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
+ double count;
+ string word;
+ eingabe1>>count;
+ vector<int>l1,l2;
+ while(eingabe2>>word)
+ l1.push_back(atoi(word.c_str()));
+ while(eingabe3>>word)
+ l2.push_back(atoi(word.c_str()));
+ if( ((++nLine)%1000)==0 )
+ cerr << "line " << nLine << '\n';
+ for(unsigned int j=0; j<l2.size(); ++j) {
+ cout << 0 << " " << l2[j] << endl;
}
- ifstream v1(argv[1]),v2(argv[2]),t(argv[3]);
- map<string,string>voc1,voc2;
- readVoc(v1,voc1);
- readVoc(v2,voc2);
- string line1,line2,line3;
- int nLine=0;
- while(getline(t,line1)&&getline(t,line2)&&getline(t,line3))
- {
- istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
- double count;
- string word;
- eingabe1>>count;
- vector<int>l1,l2;
- while(eingabe2>>word)
- l1.push_back(atoi(word.c_str()));
- while(eingabe3>>word)
- l2.push_back(atoi(word.c_str()));
- if( ((++nLine)%1000)==0 )
- cerr << "line " << nLine << '\n';
- for(unsigned int j=0; j<l2.size(); ++j)
- {
- cout << 0 << " " << l2[j] << endl;
- }
- for(unsigned int i=0; i<l1.size(); ++i)
- {
- for(unsigned int j=0; j<l2.size(); ++j)
- {
- cout << l1[i] << " " << l2[j] << endl;
- }
- }
+ for(unsigned int i=0; i<l1.size(); ++i) {
+ for(unsigned int j=0; j<l2.size(); ++j) {
+ cout << l1[i] << " " << l2[j] << endl;
+ }
}
- cerr << "END.\n";
+ }
+ cerr << "END.\n";
}
diff --git a/mgizapp/src/snt2cooc.cpp b/mgizapp/src/snt2cooc.cpp
index 48328dc..df8f7bd 100644
--- a/mgizapp/src/snt2cooc.cpp
+++ b/mgizapp/src/snt2cooc.cpp
@@ -12,35 +12,32 @@ using namespace std;
void readVoc(istream&in,map<string,string>&voc)
{
- string line,s1,s2;
+ string line,s1,s2;
voc["1"]="UNK";
if( !in )cerr <<"Vocabulary does not exist.\n";
- while(getline(in,line))
- {
- istrstream eingabe(line.c_str());
- if( !(eingabe>>s1>>s2))
- cerr << "ERROR in vocabulary '" << line << "'\n";
- voc[s1]=s2;
- }
+ while(getline(in,line)) {
+ istrstream eingabe(line.c_str());
+ if( !(eingabe>>s1>>s2))
+ cerr << "ERROR in vocabulary '" << line << "'\n";
+ voc[s1]=s2;
+ }
}
int maxElems=0;
int main(int argc,char **argv)
{
- if( argc!=5&&argc!=6 )
- {
- cerr << "Usage: " << argv[0] << " output vcb1 vcb2 snt12 \n";
- cerr << "Converts GIZA++ snt-format into plain text.\n";
- exit(1);
- }
+ if( argc!=5&&argc!=6 ) {
+ cerr << "Usage: " << argv[0] << " output vcb1 vcb2 snt12 \n";
+ cerr << "Converts GIZA++ snt-format into plain text.\n";
+ exit(1);
+ }
bool counts=0;
- if( argc==6 )
- {
- if(string(argv[5])!="-counts")
- cerr << "ERROR: wrong option " << argv[6] << endl;
- counts=1;
- maxElems=10000000;
- }
+ if( argc==6 ) {
+ if(string(argv[5])!="-counts")
+ cerr << "ERROR: wrong option " << argv[6] << endl;
+ counts=1;
+ maxElems=10000000;
+ }
ifstream v1(argv[2]),v2(argv[3]),t(argv[4]);
ofstream ou(argv[1]);
map<string,string>voc1,voc2;
@@ -50,62 +47,56 @@ int main(int argc,char **argv)
vector<map<int,int> > vsi(voc1.size()+1000);
int nLine=0;
int totalElems=0;
- while(getline(t,line1)&&getline(t,line2)&&getline(t,line3))
- {
- istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
- double count;
- string word;
- eingabe1>>count;
- vector<int>l1,l2;
- while(eingabe2>>word)
- l1.push_back(atoi(word.c_str()));
- while(eingabe3>>word)
- l2.push_back(atoi(word.c_str()));
- if( ((++nLine)%1000)==0 )
- cerr << "line " << nLine << '\n';
- totalElems-=vsi[0].size();
- for(unsigned int j=0;j<l2.size();++j)
- vsi[0][l2[j]]++;
- totalElems+=vsi[0].size();
- for(unsigned int i=0;i<l1.size();++i)
- {
- if( l1[i]>=int(vsi.size()) )
- {
- cerr << "I have to resize: " << l1[i] << endl;
- vsi.resize(l1[i]+1);
- }
- map<int,int>&theset=vsi[l1[i]];
- totalElems-=theset.size();
- for(unsigned int j=0;j<l2.size();++j)
- theset[l2[j]]++;
- totalElems+=theset.size();
- }
- if( totalElems>maxElems&&maxElems )
- {
- cerr << "INFO: print out " << totalElems << " entries.\n";
- for(unsigned int i=0;i<vsi.size();++i)
- for(map<int,int>::const_iterator j=vsi[i].begin();j!=vsi[i].end();++j)
- {
- if(counts==1 )
- ou << j->second << " " << i << " " << j->first << '\n';
- else
- ou << i << " " << j->first << '\n';
- }
- totalElems=0;
- vsi.clear();
- vsi.resize(voc1.size()+1000);
- }
+ while(getline(t,line1)&&getline(t,line2)&&getline(t,line3)) {
+ istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
+ double count;
+ string word;
+ eingabe1>>count;
+ vector<int>l1,l2;
+ while(eingabe2>>word)
+ l1.push_back(atoi(word.c_str()));
+ while(eingabe3>>word)
+ l2.push_back(atoi(word.c_str()));
+ if( ((++nLine)%1000)==0 )
+ cerr << "line " << nLine << '\n';
+ totalElems-=vsi[0].size();
+ for(unsigned int j=0; j<l2.size(); ++j)
+ vsi[0][l2[j]]++;
+ totalElems+=vsi[0].size();
+ for(unsigned int i=0; i<l1.size(); ++i) {
+ if( l1[i]>=int(vsi.size()) ) {
+ cerr << "I have to resize: " << l1[i] << endl;
+ vsi.resize(l1[i]+1);
+ }
+ map<int,int>&theset=vsi[l1[i]];
+ totalElems-=theset.size();
+ for(unsigned int j=0; j<l2.size(); ++j)
+ theset[l2[j]]++;
+ totalElems+=theset.size();
}
+ if( totalElems>maxElems&&maxElems ) {
+ cerr << "INFO: print out " << totalElems << " entries.\n";
+ for(unsigned int i=0; i<vsi.size(); ++i)
+ for(map<int,int>::const_iterator j=vsi[i].begin(); j!=vsi[i].end(); ++j) {
+ if(counts==1 )
+ ou << j->second << " " << i << " " << j->first << '\n';
+ else
+ ou << i << " " << j->first << '\n';
+ }
+ totalElems=0;
+ vsi.clear();
+ vsi.resize(voc1.size()+1000);
+ }
+ }
cerr << "END.\n";
- for(unsigned int i=0;i<vsi.size();++i)
- for(map<int,int>::const_iterator j=vsi[i].begin();j!=vsi[i].end();++j)
- {
- if(counts==1 )
- ou << j->second << " " << i << " " << j->first << '\n';
- else
- ou << i << " " << j->first << '\n';
- }
- ou.flush();
- ou.close();
+ for(unsigned int i=0; i<vsi.size(); ++i)
+ for(map<int,int>::const_iterator j=vsi[i].begin(); j!=vsi[i].end(); ++j) {
+ if(counts==1 )
+ ou << j->second << " " << i << " " << j->first << '\n';
+ else
+ ou << i << " " << j->first << '\n';
+ }
+ ou.flush();
+ ou.close();
}
diff --git a/mgizapp/src/snt2plain.cpp b/mgizapp/src/snt2plain.cpp
index 9950050..2f1998f 100644
--- a/mgizapp/src/snt2plain.cpp
+++ b/mgizapp/src/snt2plain.cpp
@@ -12,33 +12,30 @@ using namespace std;
void readVoc(istream&in,map<string,string>&voc)
{
- string line,s1,s2;
+ string line,s1,s2;
voc["1"]="UNK";
if( !in )cerr <<"Vocabulary does not exist.\n";
- while(getline(in,line))
- {
- istrstream eingabe(line.c_str());
- if( !(eingabe>>s1>>s2))
- cerr << "ERROR in vocabulary '" << line << "'\n";
- voc[s1]=s2;
- }
+ while(getline(in,line)) {
+ istrstream eingabe(line.c_str());
+ if( !(eingabe>>s1>>s2))
+ cerr << "ERROR in vocabulary '" << line << "'\n";
+ voc[s1]=s2;
+ }
}
int main(int argc,char **argv)
{
- if( argc!=5&&argc!=6 )
- {
- cerr << "Usage: " << argv[0] << " vcb1 vcb2 snt12 output_prefix [ -counts ]\n";
- cerr << "Converts GIZA++ snt-format into plain text.\n";
- exit(1);
- }
+ if( argc!=5&&argc!=6 ) {
+ cerr << "Usage: " << argv[0] << " vcb1 vcb2 snt12 output_prefix [ -counts ]\n";
+ cerr << "Converts GIZA++ snt-format into plain text.\n";
+ exit(1);
+ }
bool counts=0;
- if( argc==6 )
- {
- if(string(argv[5])!="-counts")
- cerr << "ERROR: wrong option " << argv[5] << endl;
- counts=1;
- }
+ if( argc==6 ) {
+ if(string(argv[5])!="-counts")
+ cerr << "ERROR: wrong option " << argv[5] << endl;
+ counts=1;
+ }
ifstream v1(argv[1]),v2(argv[2]),t(argv[3]);
string prefix(argv[4]);
string outfil1=prefix+"1.txt";
@@ -51,43 +48,37 @@ int main(int argc,char **argv)
int source=0,target=0;
string line1,line2,line3;
int printed=0;
- while(getline(t,line1)&&getline(t,line2)&&getline(t,line3))
- {
- istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
- double count;
- string word;
- eingabe1>>count;
- vector<string>l1,l2;
- while(eingabe2>>word)
- l1.push_back(word);
- while(eingabe3>>word)
- l2.push_back(word);
- if( counts )
- cout << count << '\n';
- for(unsigned int p=0;p<l1.size();p++)
- {
- if(voc1.count(l1[p])==0)
- {
- if( printed++==0)
- cerr << "ERROR: source vocabulary entry " << l1[p] << " unknown.\n";
- out1 << l1[p]<<' ';
- }
- else
- out1 << voc1[l1[p]] << ' ';
- source++;
- }
- for(unsigned int p=0;p<l2.size();p++)
- {
- if(voc2.count(l2[p])==0)
- {
- if( printed++ ==0)
- cerr << "ERROR: target vocabulary entry " << l2[p] << " unknown.\n";
- out2 <<l2[p]<<' ';
- }
- out2 << voc2[l2[p]] << ' ';
- target++;
- }
- out1<<'\n';
- out2<<'\n';
+ while(getline(t,line1)&&getline(t,line2)&&getline(t,line3)) {
+ istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
+ double count;
+ string word;
+ eingabe1>>count;
+ vector<string>l1,l2;
+ while(eingabe2>>word)
+ l1.push_back(word);
+ while(eingabe3>>word)
+ l2.push_back(word);
+ if( counts )
+ cout << count << '\n';
+ for(unsigned int p=0; p<l1.size(); p++) {
+ if(voc1.count(l1[p])==0) {
+ if( printed++==0)
+ cerr << "ERROR: source vocabulary entry " << l1[p] << " unknown.\n";
+ out1 << l1[p]<<' ';
+ } else
+ out1 << voc1[l1[p]] << ' ';
+ source++;
+ }
+ for(unsigned int p=0; p<l2.size(); p++) {
+ if(voc2.count(l2[p])==0) {
+ if( printed++ ==0)
+ cerr << "ERROR: target vocabulary entry " << l2[p] << " unknown.\n";
+ out2 <<l2[p]<<' ';
+ }
+ out2 << voc2[l2[p]] << ' ';
+ target++;
}
+ out1<<'\n';
+ out2<<'\n';
+ }
}
diff --git a/mgizapp/src/symal.cpp b/mgizapp/src/symal.cpp
index e39200e..107a89f 100644
--- a/mgizapp/src/symal.cpp
+++ b/mgizapp/src/symal.cpp
@@ -15,9 +15,9 @@
using namespace std;
-#define MAX_WORD 1000 //maximum lengthsource/target strings
+#define MAX_WORD 1000 //maximum lengthsource/target strings
#define MAX_M 200 //maximum length of source strings
-#define MAX_N 200 //maximum length of target strings
+#define MAX_N 200 //maximum length of target strings
#define UNION 1
#define INTERSECT 2
@@ -30,27 +30,27 @@ using namespace std;
#define END_ENUM { (char*)0, 0 }
static Enum_T AlignEnum [] = {
-{ "union", UNION },
-{ "u", UNION },
-{ "intersect", INTERSECT},
-{ "i", INTERSECT},
-{ "grow", GROW },
-{ "g", GROW },
-{ "srctotgt", SRCTOTGT },
-{ "s2t", SRCTOTGT },
-{ "tgttosrc", TGTTOSRC },
-{ "t2s", TGTTOSRC },
+ { "union", UNION },
+ { "u", UNION },
+ { "intersect", INTERSECT},
+ { "i", INTERSECT},
+ { "grow", GROW },
+ { "g", GROW },
+ { "srctotgt", SRCTOTGT },
+ { "s2t", SRCTOTGT },
+ { "tgttosrc", TGTTOSRC },
+ { "t2s", TGTTOSRC },
END_ENUM
};
static Enum_T BoolEnum [] = {
- { "true", BOOL_YES },
- { "yes", BOOL_YES },
- { "y", BOOL_YES },
- { "false", BOOL_NO },
- { "no", BOOL_NO },
- { "n", BOOL_NO },
- END_ENUM
+ { "true", BOOL_YES },
+ { "yes", BOOL_YES },
+ { "y", BOOL_YES },
+ { "false", BOOL_NO },
+ { "no", BOOL_NO },
+ { "n", BOOL_NO },
+ END_ENUM
};
@@ -63,7 +63,7 @@ int** A; //alignment matrix with information symmetric/direct/inverse alignments
int verbose=0;
-//read an alignment pair from the input stream.
+//read an alignment pair from the input stream.
int lc = 0;
@@ -72,69 +72,71 @@ int getals(fstream& inp,int& m, int *a,int& n, int *b,ostream& out)
char w[MAX_WORD], dummy[10];
string tgtsent;
int i,j,freq;
- if (inp >> freq){
+ if (inp >> freq) {
++lc;
//target sentence
- inp >> n; assert(n<MAX_N);
- for (i=1;i<=n;i++){
+ inp >> n;
+ assert(n<MAX_N);
+ for (i=1; i<=n; i++) {
inp >> setw(MAX_WORD) >> w;
if (strlen(w)>=MAX_WORD-1) {
- cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1="
+ cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1="
<< MAX_WORD-1 << endl;
assert(strlen(w)<MAX_WORD-1);
}
- tgtsent+=w;
- tgtsent+=" ";
+ tgtsent+=w;
+ tgtsent+=" ";
}
-
+
inp >> dummy; //# separator
// inverse alignment
- for (i=1;i<=n;i++) inp >> b[i];
+ for (i=1; i<=n; i++) inp >> b[i];
//source sentence
- inp >> m; assert(m<MAX_M);
- for (j=1;j<=m;j++){
+ inp >> m;
+ assert(m<MAX_M);
+ for (j=1; j<=m; j++) {
inp >> setw(MAX_WORD) >> w;
if (strlen(w)>=MAX_WORD-1) {
- cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1="
+ cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1="
<< MAX_WORD-1 << endl;
assert(strlen(w)<MAX_WORD-1);
}
- out << w << " ";
+ out << w << " ";
}
- out << "{##} " << tgtsent << "{##} ";
-
-
+ out << "{##} " << tgtsent << "{##} ";
+
+
inp >> dummy; //# separator
// direct alignment
- for (j=1;j<=m;j++) {
+ for (j=1; j<=m; j++) {
inp >> a[j];
assert(0<=a[j] && a[j]<=n);
}
//check inverse alignemnt
- for (i=1;i<=n;i++)
+ for (i=1; i<=n; i++)
assert(0<=b[i] && b[i]<=m);
-
+
return 1;
- }
- else
+ } else
return 0;
};
//compute union alignment
-int prunionalignment(fstream& out,int m,int *a,int n,int* b){
-
+int prunionalignment(fstream& out,int m,int *a,int n,int* b)
+{
+
ostringstream sout;
-
- for (int j=1;j<=m;j++)
- if (a[j])
+
+ for (int j=1; j<=m; j++)
+ if (a[j])
sout << j-1 << "-" << a[j]-1 << " ";
- for (int i=1;i<=n;i++)
+ for (int i=1; i<=n; i++)
if (b[i] && a[b[i]]!=i)
sout << b[i]-1 << "-" << i-1 << " ";
@@ -146,19 +148,20 @@ int prunionalignment(fstream& out,int m,int *a,int n,int* b){
str.replace(str.length()-1,1,"\n");
out << str;
- out.flush();
-
- return 1;
+ out.flush();
+
+ return 1;
}
//Compute intersection alignment
-int printersect(fstream& out,int m,int *a,int n,int* b){
+int printersect(fstream& out,int m,int *a,int n,int* b)
+{
ostringstream sout;
- for (int j=1;j<=m;j++)
+ for (int j=1; j<=m; j++)
if (a[j] && b[a[j]]==j)
sout << j-1 << "-" << a[j]-1 << " ";
@@ -170,20 +173,21 @@ int printersect(fstream& out,int m,int *a,int n,int* b){
str.replace(str.length()-1,1,"\n");
out << str;
- out.flush();
+ out.flush();
- return 1;
+ return 1;
}
//Compute target-to-source alignment
-int printtgttosrc(fstream& out,int m,int *a,int n,int* b){
-
+int printtgttosrc(fstream& out,int m,int *a,int n,int* b)
+{
+
ostringstream sout;
- for (int i=1;i<=n;i++)
+ for (int i=1; i<=n; i++)
if (b[i])
- sout << b[i]-1 << "-" << i-1 << " ";
+ sout << b[i]-1 << "-" << i-1 << " ";
//fix the last " "
string str = sout.str();
@@ -193,20 +197,21 @@ int printtgttosrc(fstream& out,int m,int *a,int n,int* b){
str.replace(str.length()-1,1,"\n");
out << str;
- out.flush();
+ out.flush();
- return 1;
+ return 1;
}
//Compute source-to-target alignment
-int printsrctotgt(fstream& out,int m,int *a,int n,int* b){
+int printsrctotgt(fstream& out,int m,int *a,int n,int* b)
+{
ostringstream sout;
- for (int j=1;j<=m;j++)
+ for (int j=1; j<=m; j++)
if (a[j])
- sout << j-1 << "-" << a[j]-1 << " ";
+ sout << j-1 << "-" << a[j]-1 << " ";
//fix the last " "
string str = sout.str();
@@ -216,9 +221,9 @@ int printsrctotgt(fstream& out,int m,int *a,int n,int* b){
str.replace(str.length()-1,1,"\n");
out << str;
- out.flush();
+ out.flush();
- return 1;
+ return 1;
}
//Compute Grow Diagonal Alignment
@@ -227,159 +232,160 @@ int printsrctotgt(fstream& out,int m,int *a,int n,int* b){
//to represent the grow alignment as the unionalignment of a
//directed and inverted alignment
-int printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool isfinal=false,bool bothuncovered=false){
-
- ostringstream sout;
-
- vector <pair <int,int> > neighbors; //neighbors
-
- pair <int,int> entry;
-
- neighbors.push_back(make_pair(-1,-0));
- neighbors.push_back(make_pair(0,-1));
- neighbors.push_back(make_pair(1,0));
- neighbors.push_back(make_pair(0,1));
-
-
- if (diagonal){
- neighbors.push_back(make_pair(-1,-1));
- neighbors.push_back(make_pair(-1,1));
- neighbors.push_back(make_pair(1,-1));
- neighbors.push_back(make_pair(1,1));
- }
-
-
- int i,j,o;
-
-
- //covered foreign and english positions
-
- memset(fa,0,(m+1)*sizeof(int));
- memset(ea,0,(n+1)*sizeof(int));
-
- //matrix to quickly check if one point is in the symmetric
- //alignment (value=2), direct alignment (=1) and inverse alignment
-
- for (int i=1;i<=n;i++) memset(A[i],0,(m+1)*sizeof(int));
-
- set <pair <int,int> > currentpoints; //symmetric alignment
- set <pair <int,int> > unionalignment; //union alignment
-
- pair <int,int> point; //variable to store points
- set<pair <int,int> >::const_iterator k; //iterator over sets
-
- //fill in the alignments
- for (j=1;j<=m;j++){
- if (a[j]){
- unionalignment.insert(make_pair(a[j],j));
- if (b[a[j]]==j){
- fa[j]=1;ea[a[j]]=1;
- A[a[j]][j]=2;
- currentpoints.insert(make_pair(a[j],j));
- }
- else
- A[a[j]][j]=-1;
+int printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool isfinal=false,bool bothuncovered=false)
+{
+
+ ostringstream sout;
+
+ vector <pair <int,int> > neighbors; //neighbors
+
+ pair <int,int> entry;
+
+ neighbors.push_back(make_pair(-1,-0));
+ neighbors.push_back(make_pair(0,-1));
+ neighbors.push_back(make_pair(1,0));
+ neighbors.push_back(make_pair(0,1));
+
+
+ if (diagonal) {
+ neighbors.push_back(make_pair(-1,-1));
+ neighbors.push_back(make_pair(-1,1));
+ neighbors.push_back(make_pair(1,-1));
+ neighbors.push_back(make_pair(1,1));
+ }
+
+
+ int i,j,o;
+
+
+ //covered foreign and english positions
+
+ memset(fa,0,(m+1)*sizeof(int));
+ memset(ea,0,(n+1)*sizeof(int));
+
+ //matrix to quickly check if one point is in the symmetric
+ //alignment (value=2), direct alignment (=1) and inverse alignment
+
+ for (int i=1; i<=n; i++) memset(A[i],0,(m+1)*sizeof(int));
+
+ set <pair <int,int> > currentpoints; //symmetric alignment
+ set <pair <int,int> > unionalignment; //union alignment
+
+ pair <int,int> point; //variable to store points
+ set<pair <int,int> >::const_iterator k; //iterator over sets
+
+ //fill in the alignments
+ for (j=1; j<=m; j++) {
+ if (a[j]) {
+ unionalignment.insert(make_pair(a[j],j));
+ if (b[a[j]]==j) {
+ fa[j]=1;
+ ea[a[j]]=1;
+ A[a[j]][j]=2;
+ currentpoints.insert(make_pair(a[j],j));
+ } else
+ A[a[j]][j]=-1;
+ }
+ }
+
+ for (i=1; i<=n; i++)
+ if (b[i] && a[b[i]]!=i) { //not intersection
+ unionalignment.insert(make_pair(i,b[i]));
+ A[i][b[i]]=1;
+ }
+
+
+ int added=1;
+
+ while (added) {
+ added=0;
+ ///scan the current alignment
+ for (k=currentpoints.begin(); k!=currentpoints.end(); k++) {
+ //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
+ for (o=0; o<neighbors.size(); o++) {
+ //cout << "go over check all neighbors\n";
+ point.first=k->first+neighbors[o].first;
+ point.second=k->second+neighbors[o].second;
+ //cout << point.second-1 << " " << point.first-1 << "\n";
+ //check if neighbor is inside 'matrix'
+ if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
+ //check if neighbor is in the unionalignment alignment
+ if (b[point.first]==point.second || a[point.second]==point.first) {
+ //cout << "In unionalignment ";cout.flush();
+ //check if it connects at least one uncovered word
+ if (!(ea[point.first] && fa[point.second])) {
+ //insert point in currentpoints!
+ currentpoints.insert(point);
+ A[point.first][point.second]=2;
+ ea[point.first]=1;
+ fa[point.second]=1;
+ added=1;
+ //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
+ }
+ }
+ }
+ }
+ }
+
+ if (isfinal) {
+ for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
+ if (A[k->first][k->second]==1) {
+ point.first=k->first;
+ point.second=k->second;
+ //one of the two words is not covered yet
+ //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
+ if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
+ (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
+ //add it!
+ currentpoints.insert(point);
+ A[point.first][point.second]=2;
+ //keep track of new covered positions
+ ea[point.first]=1;
+ fa[point.second]=1;
+
+ //added=1;
+ //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
+ }
}
- }
-
- for (i=1;i<=n;i++)
- if (b[i] && a[b[i]]!=i){ //not intersection
- unionalignment.insert(make_pair(i,b[i]));
- A[i][b[i]]=1;
- }
-
-
- int added=1;
-
- while (added){
- added=0;
- ///scan the current alignment
- for (k=currentpoints.begin();k!=currentpoints.end();k++){
- //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
- for (o=0;o<neighbors.size();o++){
- //cout << "go over check all neighbors\n";
- point.first=k->first+neighbors[o].first;
- point.second=k->second+neighbors[o].second;
- //cout << point.second-1 << " " << point.first-1 << "\n";
- //check if neighbor is inside 'matrix'
- if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
- //check if neighbor is in the unionalignment alignment
- if (b[point.first]==point.second || a[point.second]==point.first){
- //cout << "In unionalignment ";cout.flush();
- //check if it connects at least one uncovered word
- if (!(ea[point.first] && fa[point.second]))
- {
- //insert point in currentpoints!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- ea[point.first]=1; fa[point.second]=1;
- added=1;
- //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
- }
- }
- }
+
+ for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
+ if (A[k->first][k->second]==-1) {
+ point.first=k->first;
+ point.second=k->second;
+ //one of the two words is not covered yet
+ //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
+ if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
+ (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
+ //add it!
+ currentpoints.insert(point);
+ A[point.first][point.second]=2;
+ //keep track of new covered positions
+ ea[point.first]=1;
+ fa[point.second]=1;
+
+ //added=1;
+ //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
+ }
}
- }
-
- if (isfinal){
- for (k=unionalignment.begin();k!=unionalignment.end();k++)
- if (A[k->first][k->second]==1)
- {
- point.first=k->first;point.second=k->second;
- //one of the two words is not covered yet
- //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
- if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
- (!bothuncovered && !(ea[point.first] && fa[point.second])))
- {
- //add it!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- //keep track of new covered positions
- ea[point.first]=1;fa[point.second]=1;
-
- //added=1;
- //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
- }
- }
-
- for (k=unionalignment.begin();k!=unionalignment.end();k++)
- if (A[k->first][k->second]==-1)
- {
- point.first=k->first;point.second=k->second;
- //one of the two words is not covered yet
- //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
- if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
- (!bothuncovered && !(ea[point.first] && fa[point.second])))
- {
- //add it!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- //keep track of new covered positions
- ea[point.first]=1;fa[point.second]=1;
-
- //added=1;
- //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
- }
- }
- }
-
-
- for (k=currentpoints.begin();k!=currentpoints.end();k++)
- sout << k->second-1 << "-" << k->first-1 << " ";
-
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
- return 1;
-
- return 1;
+ }
+
+
+ for (k=currentpoints.begin(); k!=currentpoints.end(); k++)
+ sout << k->second-1 << "-" << k->first-1 << " ";
+
+
+ //fix the last " "
+ string str = sout.str();
+ if (str.length() == 0)
+ str = "\n";
+ else
+ str.replace(str.length()-1,1,"\n");
+
+ out << str;
+ out.flush();
+ return 1;
+
+ return 1;
}
@@ -387,115 +393,117 @@ int printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool i
//Main file here
-int main(int argc, char** argv){
-
-int alignment=0;
-const char* input="/dev/stdin";
-const char* output="/dev/stdout";
-int diagonal=false;
-int isfinal=false;
-int bothuncovered=false;
-
-
- DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum,
- "alignment", CMDENUMTYPE, &alignment, AlignEnum,
- "d", CMDENUMTYPE, &diagonal, BoolEnum,
- "diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
- "f", CMDENUMTYPE, &isfinal, BoolEnum,
- "final", CMDENUMTYPE, &isfinal, BoolEnum,
- "b", CMDENUMTYPE, &bothuncovered, BoolEnum,
- "both", CMDENUMTYPE, &bothuncovered, BoolEnum,
- "i", CMDSTRINGTYPE, &input,
- "o", CMDSTRINGTYPE, &output,
- "v", CMDENUMTYPE, &verbose, BoolEnum,
- "verbose", CMDENUMTYPE, &verbose, BoolEnum,
-
- NULL);
-
- GetParams(&argc, &argv, (char*) NULL);
-
- if (alignment==0){
- cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
- << "Input file or std must be in .bal format (see script giza2bal.pl).\n";
-
- exit(1);
-
- }
-
- fstream inp(input,ios::in);
- fstream out(output,ios::out);
-
- if (!inp.is_open()){
- cerr << "cannot open " << input << "\n";
- exit(1);
- }
-
- if (!out.is_open()){
- cerr << "cannot open " << output << "\n";
- exit(1);
- }
-
+int main(int argc, char** argv)
+{
+
+ int alignment=0;
+ const char* input="/dev/stdin";
+ const char* output="/dev/stdout";
+ int diagonal=false;
+ int isfinal=false;
+ int bothuncovered=false;
+
+
+ DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum,
+ "alignment", CMDENUMTYPE, &alignment, AlignEnum,
+ "d", CMDENUMTYPE, &diagonal, BoolEnum,
+ "diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
+ "f", CMDENUMTYPE, &isfinal, BoolEnum,
+ "final", CMDENUMTYPE, &isfinal, BoolEnum,
+ "b", CMDENUMTYPE, &bothuncovered, BoolEnum,
+ "both", CMDENUMTYPE, &bothuncovered, BoolEnum,
+ "i", CMDSTRINGTYPE, &input,
+ "o", CMDSTRINGTYPE, &output,
+ "v", CMDENUMTYPE, &verbose, BoolEnum,
+ "verbose", CMDENUMTYPE, &verbose, BoolEnum,
+
+ NULL);
+
+ GetParams(&argc, &argv, (char*) NULL);
+
+ if (alignment==0) {
+ cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
+ << "Input file or std must be in .bal format (see script giza2bal.pl).\n";
+
+ exit(1);
+
+ }
+
+ fstream inp(input,ios::in);
+ fstream out(output,ios::out);
+
+ if (!inp.is_open()) {
+ cerr << "cannot open " << input << "\n";
+ exit(1);
+ }
+
+ if (!out.is_open()) {
+ cerr << "cannot open " << output << "\n";
+ exit(1);
+ }
+
int a[MAX_M],b[MAX_N],m,n;
fa=new int[MAX_M+1];
ea=new int[MAX_N+1];
-
-
+
+
int sents = 0;
- A=new int *[MAX_N+1];
- for (int i=1;i<=MAX_N;i++) A[i]=new int[MAX_M+1];
-
- switch (alignment){
- case UNION:
- cerr << "symal: computing union alignment\n";
- while(getals(inp,m,a,n,b,out)) {
- prunionalignment(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- case INTERSECT:
- cerr << "symal: computing intersect alignment\n";
- while(getals(inp,m,a,n,b,out)) {
- printersect(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- case GROW:
- cerr << "symal: computing grow alignment: diagonal ("
- << diagonal << ") final ("<< isfinal << ")"
- << "both-uncovered (" << bothuncovered <<")\n";
-
- while(getals(inp,m,a,n,b,out))
- printgrow(out,m,a,n,b,diagonal,isfinal,bothuncovered);
-
- break;
- case TGTTOSRC:
- cerr << "symal: computing target-to-source alignment\n";
-
- while(getals(inp,m,a,n,b,out)){
- printtgttosrc(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- case SRCTOTGT:
- cerr << "symal: computing source-to-target alignment\n";
-
- while(getals(inp,m,a,n,b,out)){
- printsrctotgt(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- default:
- exit(1);
- }
-
- delete [] fa; delete [] ea;
- for (int i=1;i<=MAX_N;i++) delete [] A[i];
- delete [] A;
-
- exit(0);
+ A=new int *[MAX_N+1];
+ for (int i=1; i<=MAX_N; i++) A[i]=new int[MAX_M+1];
+
+ switch (alignment) {
+ case UNION:
+ cerr << "symal: computing union alignment\n";
+ while(getals(inp,m,a,n,b,out)) {
+ prunionalignment(out,m,a,n,b);
+ sents++;
+ }
+ cerr << "Sents: " << sents << endl;
+ break;
+ case INTERSECT:
+ cerr << "symal: computing intersect alignment\n";
+ while(getals(inp,m,a,n,b,out)) {
+ printersect(out,m,a,n,b);
+ sents++;
+ }
+ cerr << "Sents: " << sents << endl;
+ break;
+ case GROW:
+ cerr << "symal: computing grow alignment: diagonal ("
+ << diagonal << ") final ("<< isfinal << ")"
+ << "both-uncovered (" << bothuncovered <<")\n";
+
+ while(getals(inp,m,a,n,b,out))
+ printgrow(out,m,a,n,b,diagonal,isfinal,bothuncovered);
+
+ break;
+ case TGTTOSRC:
+ cerr << "symal: computing target-to-source alignment\n";
+
+ while(getals(inp,m,a,n,b,out)) {
+ printtgttosrc(out,m,a,n,b);
+ sents++;
+ }
+ cerr << "Sents: " << sents << endl;
+ break;
+ case SRCTOTGT:
+ cerr << "symal: computing source-to-target alignment\n";
+
+ while(getals(inp,m,a,n,b,out)) {
+ printsrctotgt(out,m,a,n,b);
+ sents++;
+ }
+ cerr << "Sents: " << sents << endl;
+ break;
+ default:
+ exit(1);
+ }
+
+ delete [] fa;
+ delete [] ea;
+ for (int i=1; i<=MAX_N; i++) delete [] A[i];
+ delete [] A;
+
+ exit(0);
}
diff --git a/mgizapp/src/syncObj.h b/mgizapp/src/syncObj.h
index 9b1b7bf..f36bac3 100644
--- a/mgizapp/src/syncObj.h
+++ b/mgizapp/src/syncObj.h
@@ -8,67 +8,131 @@
#ifdef WIN32
#include <boost/thread/mutex.hpp>
-class Mutex{
- private:
- mutable boost::mutex* my_mutex;
- Mutex(const Mutex&){
-
- }
- public:
- Mutex(){
- my_mutex = new boost::mutex();
- };
- ~Mutex(){delete my_mutex;my_mutex = 0;}
+class Mutex
+{
+private:
+ mutable boost::mutex* my_mutex;
+ Mutex(const Mutex&) {
+
+ }
+public:
+ Mutex() {
+ my_mutex = new boost::mutex();
+ };
+ ~Mutex() {
+ delete my_mutex;
+ my_mutex = 0;
+ }
- inline void operator=(const Mutex& ref){}
+ inline void operator=(const Mutex& ref) {}
- public:
- inline void lock() const{my_mutex->lock();};
- inline void unlock() const{my_mutex->unlock();};
+public:
+ inline void lock() const {
+ my_mutex->lock();
+ };
+ inline void unlock() const {
+ my_mutex->unlock();
+ };
};
#else
-class Mutex{
+class Mutex
+{
private:
- mutable pthread_mutex_t mutex;
+ mutable pthread_mutex_t mutex;
public:
- Mutex(){
- pthread_mutex_init(&mutex,NULL);
- };
- ~Mutex(){pthread_mutex_destroy(&mutex);}
+ Mutex() {
+ pthread_mutex_init(&mutex,NULL);
+ };
+ ~Mutex() {
+ pthread_mutex_destroy(&mutex);
+ }
public:
- inline void lock() const{pthread_mutex_lock(&mutex);};
- inline void unlock() const{pthread_mutex_unlock(&mutex);};
+ inline void lock() const {
+ pthread_mutex_lock(&mutex);
+ };
+ inline void unlock() const {
+ pthread_mutex_unlock(&mutex);
+ };
};
#endif
-class SyncDouble{
+class SyncDouble
+{
private:
- double i;
- Mutex m;
+ double i;
+ Mutex m;
public:
- SyncDouble(double d) {i=d;};
- SyncDouble() {i=0;};
- //inline operator const double()const{return i;}
- inline bool operator ==(const double& r) const{return i == r;};
- inline void operator +=(const double& r) {m.lock();i += r;m.unlock();};
- inline void operator +=(const SyncDouble& r) {m.lock();i += r.i;m.unlock();};
- inline void operator -=(const double& r) {m.lock();i -= r;m.unlock();};
- inline void operator *=(const double& r) {m.lock();i *= r;m.unlock();};
- inline void operator /=(const double& r) {m.lock();i /= r;m.unlock();};
- inline double operator =(const double& r) {m.lock();i = r;m.unlock();return i;};
- inline double operator =(const int& r) {m.lock();i = r;m.unlock();return i;};
- inline void operator ++() {m.lock();i++;m.unlock();};
- inline double operator +(const SyncDouble& r){return r.i+i;};
- inline double operator /(const SyncDouble& r){return i/r.i;};
- //inline void operator --() {m.lock();i--;m.unlock();};
- //inline const istream& operator<<(const istream& p)const{p<<i;return p;};
- friend ostream& operator<<( ostream& p,const SyncDouble& d);
+ SyncDouble(double d) {
+ i=d;
+ };
+ SyncDouble() {
+ i=0;
+ };
+ //inline operator const double()const{return i;}
+ inline bool operator ==(const double& r) const {
+ return i == r;
+ };
+ inline void operator +=(const double& r) {
+ m.lock();
+ i += r;
+ m.unlock();
+ };
+ inline void operator +=(const SyncDouble& r) {
+ m.lock();
+ i += r.i;
+ m.unlock();
+ };
+ inline void operator -=(const double& r) {
+ m.lock();
+ i -= r;
+ m.unlock();
+ };
+ inline void operator *=(const double& r) {
+ m.lock();
+ i *= r;
+ m.unlock();
+ };
+ inline void operator /=(const double& r) {
+ m.lock();
+ i /= r;
+ m.unlock();
+ };
+ inline double operator =(const double& r) {
+ m.lock();
+ i = r;
+ m.unlock();
+ return i;
+ };
+ inline double operator =(const int& r) {
+ m.lock();
+ i = r;
+ m.unlock();
+ return i;
+ };
+ inline void operator ++() {
+ m.lock();
+ i++;
+ m.unlock();
+ };
+ inline double operator +(const SyncDouble& r) {
+ return r.i+i;
+ };
+ inline double operator /(const SyncDouble& r) {
+ return i/r.i;
+ };
+ //inline void operator --() {m.lock();i--;m.unlock();};
+ //inline const istream& operator<<(const istream& p)const{p<<i;return p;};
+ friend ostream& operator<<( ostream& p,const SyncDouble& d);
};
-inline ostream& operator<<( ostream& p, const SyncDouble& d){p<<d.i;return p;};
+inline ostream& operator<<( ostream& p, const SyncDouble& d)
+{
+ p<<d.i;
+ return p;
+};
#endif
diff --git a/mgizapp/src/transpair_model1.h b/mgizapp/src/transpair_model1.h
index dd1425d..b89347f 100644
--- a/mgizapp/src/transpair_model1.h
+++ b/mgizapp/src/transpair_model1.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,73 +36,76 @@ USA.
#include "Array2.h"
#include "mystl.h"
-class transpair_model1
+class transpair_model1
{
- public:
+public:
bool verboseTP;
Array2<PROB, Vector<PROB> > t;
WordIndex l, m;
Vector<WordIndex> E,F;
void setMode(bool)
- {}
+ {}
transpair_model1(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable)
- : verboseTP(0),t(es.size(), fs.size()),l(es.size()-1), m(fs.size()-1),E(es),F(fs)
- {
- WordIndex l=es.size()-1,m=fs.size()-1;
- for(WordIndex i=0;i<=l;i++)
- for(WordIndex j=1;j<=m;j++)
- {
- t(i, j)=tTable.getProb(es[i], fs[j]);
- if( !(t(i,j)>=PROB_SMOOTH) )
- cerr << "ERROR IN PROBABILITY: " << t(i,j) << " " << PROB_SMOOTH << endl;
- }
- }
+ : verboseTP(0),t(es.size(), fs.size()),l(es.size()-1), m(fs.size()-1),E(es),F(fs) {
+ WordIndex l=es.size()-1,m=fs.size()-1;
+ for(WordIndex i=0; i<=l; i++)
+ for(WordIndex j=1; j<=m; j++) {
+ t(i, j)=tTable.getProb(es[i], fs[j]);
+ if( !(t(i,j)>=PROB_SMOOTH) )
+ cerr << "ERROR IN PROBABILITY: " << t(i,j) << " " << PROB_SMOOTH << endl;
+ }
+ }
/* transpair_model1(const Vector<WordIndex>&es, const Vector<WordIndex>&fs)
: verboseTP(0),t(es.size(), fs.size()),l(es.size()-1), m(fs.size()-1),E(es),F(fs)
{
WordIndex l=es.size()-1,m=fs.size()-1;
for(WordIndex i=0;i<=l;i++)
- for(WordIndex j=1;j<=m;j++)
- {
- const string&estr=globeTrainVcbList->getVocabList()[es[i]].word;
- const string&fstr=globfTrainVcbList->getVocabList()[fs[j]].word;
- if( lev(estr,fstr)==0 )
- t(i,j)=1.0;
- else
- t(i,j)=1/100.0;
- massert( t(i,j)>=PROB_SMOOTH );
- }
-}*/
- WordIndex get_l()const
- {return l;}
- WordIndex get_m()const
- {return m;}
- const PROB&get_t(WordIndex i, WordIndex j)const
- {massert( t(i,j)>=PROB_SMOOTH);
- return t(i, j);}
- WordIndex get_es(int i)const {return E[i];}
- WordIndex get_fs(int j)const {return F[j];}
- bool greedyHillClimbing()const
- {return 0;}
- void computeScores(const alignment&,vector<double>&)const
- {}
- LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double=-1.0)const
+ for(WordIndex j=1;j<=m;j++)
{
- int old_i=a(j);
- return (t(new_i, j) /t(old_i, j));
- }
- LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const
- {
- WordIndex i1=a(j1), i2=a(j2);
- return (t(i2, j1)/t(i1, j1))*(t(i1, j2)/t(i2, j2));
- }
- LogProb prob_of_target_and_alignment_given_source(const alignment&al)const
- {
- LogProb prob=1.0;
- int lp1=al.get_l()+1;
- for(unsigned int j=1;j<=al.get_m();++j)
- prob*=t(al(j),j)/lp1;
- return prob;
+ const string&estr=globeTrainVcbList->getVocabList()[es[i]].word;
+ const string&fstr=globfTrainVcbList->getVocabList()[fs[j]].word;
+ if( lev(estr,fstr)==0 )
+ t(i,j)=1.0;
+ else
+ t(i,j)=1/100.0;
+ massert( t(i,j)>=PROB_SMOOTH );
}
+ }*/
+ WordIndex get_l()const {
+ return l;
+ }
+ WordIndex get_m()const {
+ return m;
+ }
+ const PROB&get_t(WordIndex i, WordIndex j)const {
+ massert( t(i,j)>=PROB_SMOOTH);
+ return t(i, j);
+ }
+ WordIndex get_es(int i)const {
+ return E[i];
+ }
+ WordIndex get_fs(int j)const {
+ return F[j];
+ }
+ bool greedyHillClimbing()const {
+ return 0;
+ }
+ void computeScores(const alignment&,vector<double>&)const
+ {}
+ LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double=-1.0)const {
+ int old_i=a(j);
+ return (t(new_i, j) /t(old_i, j));
+ }
+ LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const {
+ WordIndex i1=a(j1), i2=a(j2);
+ return (t(i2, j1)/t(i1, j1))*(t(i1, j2)/t(i2, j2));
+ }
+ LogProb prob_of_target_and_alignment_given_source(const alignment&al)const {
+ LogProb prob=1.0;
+ int lp1=al.get_l()+1;
+ for(unsigned int j=1; j<=al.get_m(); ++j)
+ prob*=t(al(j),j)/lp1;
+ return prob;
+ }
};
#endif
diff --git a/mgizapp/src/transpair_model2.h b/mgizapp/src/transpair_model2.h
index 751ce52..c963350 100644
--- a/mgizapp/src/transpair_model2.h
+++ b/mgizapp/src/transpair_model2.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -35,18 +35,18 @@ USA.
class transpair_model2 : public transpair_model1
{
- protected:
+protected:
Array2<PROB, Vector<PROB> > a;
- public:
- transpair_model2(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable,
- const amodel<PROB>&aTable)
- : transpair_model1(es,fs,tTable),a(es.size(),fs.size())
- {
- for(WordIndex i=0;i<=l;i++)
- for(WordIndex j=1;j<=m;j++)
- a(i, j)=aTable.getValue(i, j, l, m);
- }
- const PROB&get_a(WordIndex i, WordIndex j)const
- {return a(i, j);}
+public:
+ transpair_model2(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable,
+ const amodel<PROB>&aTable)
+ : transpair_model1(es,fs,tTable),a(es.size(),fs.size()) {
+ for(WordIndex i=0; i<=l; i++)
+ for(WordIndex j=1; j<=m; j++)
+ a(i, j)=aTable.getValue(i, j, l, m);
+ }
+ const PROB&get_a(WordIndex i, WordIndex j)const {
+ return a(i, j);
+ }
};
#endif
diff --git a/mgizapp/src/transpair_model3.cpp b/mgizapp/src/transpair_model3.cpp
index 0ab4c54..8651623 100644
--- a/mgizapp/src/transpair_model3.cpp
+++ b/mgizapp/src/transpair_model3.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -30,19 +30,17 @@ Franz Josef Och (30/07/99)
transpair_model3::transpair_model3(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable, amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0, void*)
: transpair_model2(es,fs,tTable,aTable),d(es.size(), fs.size()),n(es.size(), MAX_FERTILITY+1), p0(_p0), p1(_p1)
-{
+{
WordIndex l=es.size()-1,m=fs.size()-1;
- for(WordIndex i=0;i<=l;i++)
- {
- for(WordIndex j=1;j<=m;j++)
- d(i, j)=dTable.getValue(j, i, l, m);
- if( i>0 )
- {
- for(WordIndex f=0;f<MAX_FERTILITY;f++)
- n(i, f)=nTable.getValue(es[i], f);
- n(i,MAX_FERTILITY)=PROB_SMOOTH;
- }
+ for(WordIndex i=0; i<=l; i++) {
+ for(WordIndex j=1; j<=m; j++)
+ d(i, j)=dTable.getValue(j, i, l, m);
+ if( i>0 ) {
+ for(WordIndex f=0; f<MAX_FERTILITY; f++)
+ n(i, f)=nTable.getValue(es[i], f);
+ n(i,MAX_FERTILITY)=PROB_SMOOTH;
}
+ }
}
LogProb transpair_model3::scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j, double,bool forModel3)const
@@ -54,58 +52,55 @@ LogProb transpair_model3::scoreOfMove(const alignment&a, WordIndex new_i, WordIn
change=1.0;
else if (old_i == 0)
change=((double)p0*p0/p1) *
- (( (DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):f0)*(m-f0+1.0)) / ((m-2*f0+1)*(m-2*f0+2.0))) *
- ((PROB)(forModel3?(a.fert(new_i)+1.0):1.0)) *
- (get_fertility(new_i, a.fert(new_i)+1) / get_fertility(new_i, a.fert(new_i)))*
- (t(new_i, j)/t(old_i, j))*
- (forModel3?d(new_i, j):1.0);
+ (( (DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):f0)*(m-f0+1.0)) / ((m-2*f0+1)*(m-2*f0+2.0))) *
+ ((PROB)(forModel3?(a.fert(new_i)+1.0):1.0)) *
+ (get_fertility(new_i, a.fert(new_i)+1) / get_fertility(new_i, a.fert(new_i)))*
+ (t(new_i, j)/t(old_i, j))*
+ (forModel3?d(new_i, j):1.0);
else if (new_i == 0)
change=(double(p1) / (p0*p0)) *
- (double((m-2*f0)*(m-2*f0-1))/( (DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):(1+f0))*(m-f0))) *
- (forModel3?(1.0/a.fert(old_i)):1.0) *
- (get_fertility(old_i, a.fert(old_i)-1) /get_fertility(old_i, a.fert(old_i)))*
- (t(new_i, j) /t(old_i, j)) *
- (forModel3?(1.0 / d(old_i, j)):1.0);
+ (double((m-2*f0)*(m-2*f0-1))/( (DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):(1+f0))*(m-f0))) *
+ (forModel3?(1.0/a.fert(old_i)):1.0) *
+ (get_fertility(old_i, a.fert(old_i)-1) /get_fertility(old_i, a.fert(old_i)))*
+ (t(new_i, j) /t(old_i, j)) *
+ (forModel3?(1.0 / d(old_i, j)):1.0);
else
change=(forModel3?((a.fert(new_i)+1.0)/a.fert(old_i)):1.0) *
- (get_fertility(old_i,a.fert(old_i)-1) / get_fertility(old_i,a.fert(old_i))) *
- (get_fertility(new_i,a.fert(new_i)+1) /get_fertility(new_i,a.fert(new_i))) *
- (t(new_i,j)/t(old_i,j)) *
- (forModel3?(d(new_i,j)/d(old_i,j)):1.0);
+ (get_fertility(old_i,a.fert(old_i)-1) / get_fertility(old_i,a.fert(old_i))) *
+ (get_fertility(new_i,a.fert(new_i)+1) /get_fertility(new_i,a.fert(new_i))) *
+ (t(new_i,j)/t(old_i,j)) *
+ (forModel3?(d(new_i,j)/d(old_i,j)):1.0);
return change;
}
-LogProb transpair_model3::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2, double,bool forModel3)const
+LogProb transpair_model3::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2, double,bool forModel3)const
{
PROB score=1;
assert(j1<j2);
WordIndex i1=a(j1), i2=a(j2);
- if (i1!=i2)
- {
- score=(t(i2, j1)/t(i1, j1))*(t(i1, j2)/t(i2, j2));
- if( forModel3 )
- {
- if (i1)
- score *= d(i1, j2)/d(i1, j1);
- if (i2)
- score *= d(i2, j1)/d(i2, j2);
- }
+ if (i1!=i2) {
+ score=(t(i2, j1)/t(i1, j1))*(t(i1, j2)/t(i2, j2));
+ if( forModel3 ) {
+ if (i1)
+ score *= d(i1, j2)/d(i1, j1);
+ if (i2)
+ score *= d(i2, j1)/d(i2, j2);
}
+ }
return score;
}
ostream&operator<<(ostream&out, const transpair_model3&m)
{
- for(WordIndex i=0;i<=m.get_l();i++)
- {
- out << "EF-I:"<<i<<' ';
- for(WordIndex j=1;j<=m.get_m();j++)
- out << "("<<m.t(i,j)<<","<<m.d(i,j)<<")";
- for(WordIndex j=1;j<MAX_FERTILITY;j++)
- if( i>0 )
- out << "(fert:"<<m.get_fertility(i,j)<<")";
- out << '\n';
- }
+ for(WordIndex i=0; i<=m.get_l(); i++) {
+ out << "EF-I:"<<i<<' ';
+ for(WordIndex j=1; j<=m.get_m(); j++)
+ out << "("<<m.t(i,j)<<","<<m.d(i,j)<<")";
+ for(WordIndex j=1; j<MAX_FERTILITY; j++)
+ if( i>0 )
+ out << "(fert:"<<m.get_fertility(i,j)<<")";
+ out << '\n';
+ }
out << "T:" << m.t << "D:" << m.d << "A:" << m.a << "N:" << m.n << m.p0 << m.p1 << '\n';
return out;
}
@@ -145,28 +140,25 @@ LogProb transpair_model3::_scoreOfSwap(const alignment&a, WordIndex j1, WordInde
LogProb transpair_model3::prob_of_target_and_alignment_given_source(const alignment&al,bool verb)const
{
LogProb total = 1.0 ;
- static const LogProb zero = 1E-299 ;
+ static const LogProb zero = 1E-299 ;
total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
if( verb) cerr << "IBM-3: (1-p1)^(m-2 f0)*p1^f0: " << total << '\n';
for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
total *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
if( verb) cerr << "IBM-3: +NULL:binomial+distortion " << total << '\n';
- for (WordIndex i = 1 ; i <= l ; i++)
- {
- total *= get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i));
- if( verb) cerr << "IBM-3: fertility of " << i << " with factorial " << get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i)) << " -> " << total << '\n';
- }
- for (WordIndex j = 1 ; j <= m ; j++)
- {
- total*= get_t(al(j), j) ;
- massert( get_t(al(j), j)>=PROB_SMOOTH );
- if( verb) cerr << "IBM-3: t of " << j << " " << al(j) << ": " << get_t(al(j), j) << " -> " << total << '\n';
- if (al(j))
- {
- total *= get_d(al(j), j);
- if( verb) cerr << "IBM-3: d of " << j << ": " << get_d(al(j), j) << " -> " << total << '\n';
- }
+ for (WordIndex i = 1 ; i <= l ; i++) {
+ total *= get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i));
+ if( verb) cerr << "IBM-3: fertility of " << i << " with factorial " << get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i)) << " -> " << total << '\n';
+ }
+ for (WordIndex j = 1 ; j <= m ; j++) {
+ total*= get_t(al(j), j) ;
+ massert( get_t(al(j), j)>=PROB_SMOOTH );
+ if( verb) cerr << "IBM-3: t of " << j << " " << al(j) << ": " << get_t(al(j), j) << " -> " << total << '\n';
+ if (al(j)) {
+ total *= get_d(al(j), j);
+ if( verb) cerr << "IBM-3: d of " << j << ": " << get_d(al(j), j) << " -> " << total << '\n';
}
+ }
return total?total:zero;
}
@@ -177,19 +169,16 @@ void transpair_model3::computeScores(const alignment&al,vector<double>&d)const
total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
total1 *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
- for (WordIndex i = 1 ; i <= l ; i++)
- {
- total2 *= get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i));
- }
- for (WordIndex j = 1 ; j <= m ; j++)
- {
- total3*= get_t(al(j), j) ;
- massert( get_t(al(j), j)>=PROB_SMOOTH );
- if (al(j))
- {
- total4 *= get_d(al(j), j);
- }
+ for (WordIndex i = 1 ; i <= l ; i++) {
+ total2 *= get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i));
+ }
+ for (WordIndex j = 1 ; j <= m ; j++) {
+ total3*= get_t(al(j), j) ;
+ massert( get_t(al(j), j)>=PROB_SMOOTH );
+ if (al(j)) {
+ total4 *= get_d(al(j), j);
}
+ }
d.push_back(total1);//5
d.push_back(total2);//6
d.push_back(total3);//7
diff --git a/mgizapp/src/transpair_model3.h b/mgizapp/src/transpair_model3.h
index 9c07fd9..c51ab4a 100644
--- a/mgizapp/src/transpair_model3.h
+++ b/mgizapp/src/transpair_model3.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -45,40 +45,48 @@ inline bool doubleEqual(const double a, const double b)
bool bl=fabs(1.0-a/b)<1e-10;
if( bl )
return 1;
- else
- {
- cerr << "DIFFERENT: " << a << " " << b << " " << a/b << " " << 1.0-a/b << endl;
- return 0;
- }
+ else {
+ cerr << "DIFFERENT: " << a << " " << b << " " << a/b << " " << 1.0-a/b << endl;
+ return 0;
+ }
}
class transpair_model3 : public transpair_model2
{
- protected:
+protected:
Array2<PROB, Vector<PROB> > d, n;
PROB p0, p1;
- public:
+public:
typedef transpair_model3 simpler_transpair_model;
- transpair_model3(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable,
- amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable,
- double _p1, double _p0, void*x=0);
- const PROB&get_d(WordIndex i, WordIndex j)const
- {return d(i, j);}
- const PROB&get_a(WordIndex i, WordIndex j)const
- {return a(i, j);}
- const PROB&get_fertility(WordIndex i, WordIndex f)const
- {massert(i>0);return (f>=MAX_FERTILITY)?n(i, MAX_FERTILITY):n(i, f);}
- int modelnr()const{return 3;}
- LogProb scoreOfAlignmentForChange(const alignment&)const
- {return -1.0; }
+ transpair_model3(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable,
+ amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable,
+ double _p1, double _p0, void*x=0);
+ const PROB&get_d(WordIndex i, WordIndex j)const {
+ return d(i, j);
+ }
+ const PROB&get_a(WordIndex i, WordIndex j)const {
+ return a(i, j);
+ }
+ const PROB&get_fertility(WordIndex i, WordIndex f)const {
+ massert(i>0);
+ return (f>=MAX_FERTILITY)?n(i, MAX_FERTILITY):n(i, f);
+ }
+ int modelnr()const {
+ return 3;
+ }
+ LogProb scoreOfAlignmentForChange(const alignment&)const {
+ return -1.0;
+ }
LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j, double thisValue=-1.0,bool withDistortions=1)const;
LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2, double thisValue=-1.0,bool withDistortions=1)const ;
LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const;
friend ostream&operator<<(ostream&out, const transpair_model3&m);
LogProb prob_of_target_and_alignment_given_source(const alignment&al,bool verb=0)const;
- bool isSubOptimal()const{return 1;}
+ bool isSubOptimal()const {
+ return 1;
+ }
void computeScores(const alignment&al,vector<double>&d)const;
};
#endif
diff --git a/mgizapp/src/transpair_model4.cpp b/mgizapp/src/transpair_model4.cpp
index ebc2666..93f5b6f 100644
--- a/mgizapp/src/transpair_model4.cpp
+++ b/mgizapp/src/transpair_model4.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -38,7 +38,7 @@ LogProb transpair_model4::_scoreOfMove(const alignment&a, WordIndex new_i, WordI
else
return 1.0;
}
-LogProb transpair_model4::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double)const
+LogProb transpair_model4::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double)const
{
LogProb a_prob=prob_of_target_and_alignment_given_source(a);
alignment b(a);
@@ -71,7 +71,7 @@ LogProb transpair_model4::scoreOfMove(const alignment&a, WordIndex new_i, WordIn
return change;
}
//increasing efficiency: no copy of alignment (calc. everything incrementally)
-LogProb transpair_model4::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
+LogProb transpair_model4::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
{
WordIndex aj1=a(j1),aj2=a(j2);
if( aj1==aj2 )
@@ -105,47 +105,40 @@ LogProb transpair_model4::prob_of_target_and_alignment_given_source_1(const alig
for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
total *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
if( verb) cerr << "IBM-4: +NULL:binomial+distortion " << total << endl;
- for (WordIndex i = 1 ; i <= l ; i++)
- {
- total *= get_fertility(i, al.fert(i));// * (LogProb) factorial(al.fert(i));
- if( verb) cerr << "IBM-4: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl;
- }
- for (WordIndex j = 1 ; j <= m ; j++)
- {
- total*= get_t(al(j), j) ;
- if( verb) cerr << "IBM-4: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j) << " -> " << total << endl;
- }
+ for (WordIndex i = 1 ; i <= l ; i++) {
+ total *= get_fertility(i, al.fert(i));// * (LogProb) factorial(al.fert(i));
+ if( verb) cerr << "IBM-4: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl;
+ }
+ for (WordIndex j = 1 ; j <= m ; j++) {
+ total*= get_t(al(j), j) ;
+ if( verb) cerr << "IBM-4: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j) << " -> " << total << endl;
+ }
return total;
}
LogProb transpair_model4::prob_of_target_and_alignment_given_source(const alignment&al, short distortionType,bool verb)const
{
LogProb total = 1.0 ;
- static const LogProb almostZero = 1E-299 ;
- if( distortionType&1 )
- {
- total *= prob_of_target_and_alignment_given_source_1(al,verb);
- }
- if( distortionType&2 )
- {
- for(WordIndex j=1;j<=m;j++)
- if( al(j) )
- if( al.get_head(al(j))==j)
- {
- int ep=al.prev_cept(al(j));
- float x2=probFirst[ep](j,al.get_center(ep));
- massert(x2<=1.0);
- total*=x2;
- if( verb) cerr << "IBM-4: d=1 of " << j << ": " << x2 << " -> " << total << endl;
- }
- else
- {
- float x2=probSecond(j,al.prev_in_cept(j));
- massert(x2<=1.0);
- total*=x2;
- if( verb) cerr << "IBM-4: d>1 of " << j << ": " << x2 << " -> " << total << endl;
- }
- }
+ static const LogProb almostZero = 1E-299 ;
+ if( distortionType&1 ) {
+ total *= prob_of_target_and_alignment_given_source_1(al,verb);
+ }
+ if( distortionType&2 ) {
+ for(WordIndex j=1; j<=m; j++)
+ if( al(j) )
+ if( al.get_head(al(j))==j) {
+ int ep=al.prev_cept(al(j));
+ float x2=probFirst[ep](j,al.get_center(ep));
+ massert(x2<=1.0);
+ total*=x2;
+ if( verb) cerr << "IBM-4: d=1 of " << j << ": " << x2 << " -> " << total << endl;
+ } else {
+ float x2=probSecond(j,al.prev_in_cept(j));
+ massert(x2<=1.0);
+ total*=x2;
+ if( verb) cerr << "IBM-4: d>1 of " << j << ": " << x2 << " -> " << total << endl;
+ }
+ }
return total?total:almostZero;
}
@@ -159,19 +152,16 @@ void transpair_model4::computeScores(const alignment&al,vector<double>&d)const
total2 *= get_fertility(i, al.fert(i));// * (LogProb) factorial(al.fert(i));
for (WordIndex j = 1 ; j <= m ; j++)
total3*= get_t(al(j), j) ;
- for(WordIndex j=1;j<=m;j++)
+ for(WordIndex j=1; j<=m; j++)
if( al(j) )
- if( al.get_head(al(j))==j)
- {
- int ep=al.prev_cept(al(j));
- float x2=probFirst[ep](j,al.get_center(ep));
- total4*=x2;
- }
- else
- {
- float x2=probSecond(j,al.prev_in_cept(j));
- total4*=x2;
- }
+ if( al.get_head(al(j))==j) {
+ int ep=al.prev_cept(al(j));
+ float x2=probFirst[ep](j,al.get_center(ep));
+ total4*=x2;
+ } else {
+ float x2=probSecond(j,al.prev_in_cept(j));
+ total4*=x2;
+ }
d.push_back(total1);//9
d.push_back(total2);//10
d.push_back(total3);//11
diff --git a/mgizapp/src/transpair_model4.h b/mgizapp/src/transpair_model4.h
index c8e1853..4a9cd2d 100644
--- a/mgizapp/src/transpair_model4.h
+++ b/mgizapp/src/transpair_model4.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,43 +36,41 @@ extern double factorial(int n);
class transpair_model4 : public transpair_model3
{
- private:
+private:
d4model&d4m;
Array2<double> probSecond;
Vector<Array2<double> > probFirst;
- public:
+public:
typedef transpair_model3 simpler_transpair_model;
- transpair_model4(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable, amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0,d4model*_d4m)
- : transpair_model3(es, fs, tTable, aTable, dTable, nTable, _p1, _p0),
- d4m(*_d4m),probSecond(m+1,m+1,0.0),probFirst(l+1)
- {
- for(unsigned int j1=1;j1<=m;++j1)
- for(unsigned int j2=1;j2<j1;++j2)
- {
- probSecond(j1,j2)=d4m.getProb_bigger(j1,j2,0,d4m.fwordclasses->getClass(get_fs(j1)),l,m);
- }
- for(unsigned int i=0;i<=l;++i)
- {
- Array2<double> &pf=probFirst[i]=Array2<double>(m+1,m+1,0.0);
- for(unsigned int j1=1;j1<=m;++j1)
- {
- map<m4_key,d4model::Vpff,compare1 >::const_iterator ci=d4m.getProb_first_iterator(d4m.ewordclasses->getClass(get_es(i)),d4m.fwordclasses->getClass(get_fs(j1)),l,m);
- for(unsigned int j2=0;j2<=m;++j2)
- {
- pf(j1,j2)=d4m.getProb_first_withiterator(j1,j2,m,ci);
- massert(pf(j1,j2)==d4m.getProb_first(j1,j2,d4m.ewordclasses->getClass(get_es(i)),d4m.fwordclasses->getClass(get_fs(j1)),l,m));
- }
- }
- }
+ transpair_model4(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable, amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0,d4model*_d4m)
+ : transpair_model3(es, fs, tTable, aTable, dTable, nTable, _p1, _p0),
+ d4m(*_d4m),probSecond(m+1,m+1,0.0),probFirst(l+1) {
+ for(unsigned int j1=1; j1<=m; ++j1)
+ for(unsigned int j2=1; j2<j1; ++j2) {
+ probSecond(j1,j2)=d4m.getProb_bigger(j1,j2,0,d4m.fwordclasses->getClass(get_fs(j1)),l,m);
+ }
+ for(unsigned int i=0; i<=l; ++i) {
+ Array2<double> &pf=probFirst[i]=Array2<double>(m+1,m+1,0.0);
+ for(unsigned int j1=1; j1<=m; ++j1) {
+ map<m4_key,d4model::Vpff,compare1 >::const_iterator ci=d4m.getProb_first_iterator(d4m.ewordclasses->getClass(get_es(i)),d4m.fwordclasses->getClass(get_fs(j1)),l,m);
+ for(unsigned int j2=0; j2<=m; ++j2) {
+ pf(j1,j2)=d4m.getProb_first_withiterator(j1,j2,m,ci);
+ massert(pf(j1,j2)==d4m.getProb_first(j1,j2,d4m.ewordclasses->getClass(get_es(i)),d4m.fwordclasses->getClass(get_fs(j1)),l,m));
+ }
+ }
}
+ }
LogProb prob_of_target_and_alignment_given_source_1(const alignment&al,bool verb)const;
- LogProb scoreOfAlignmentForChange(const alignment&a)const
- {return prob_of_target_and_alignment_given_source(a,2); }
+ LogProb scoreOfAlignmentForChange(const alignment&a)const {
+ return prob_of_target_and_alignment_given_source(a,2);
+ }
LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
- int modelnr()const{return 4;}
+ int modelnr()const {
+ return 4;
+ }
LogProb prob_of_target_and_alignment_given_source(const alignment&al, short distortionType=3,bool verb=0)const;
void computeScores(const alignment&al,vector<double>&d)const;
};
diff --git a/mgizapp/src/transpair_model5.cpp b/mgizapp/src/transpair_model5.cpp
index c621206..220a3c8 100644
--- a/mgizapp/src/transpair_model5.cpp
+++ b/mgizapp/src/transpair_model5.cpp
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -43,7 +43,7 @@ LogProb transpair_model5::_scoreOfMove(const alignment&a, WordIndex new_i, WordI
else
return 1.0;
}
-LogProb transpair_model5::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
+LogProb transpair_model5::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
{
if( doModel4Scoring )
return transpair_model4::_scoreOfSwap(a,j1,j2,thisValue);
@@ -69,7 +69,7 @@ LogProb transpair_model5::scoreOfMove(const alignment&a, WordIndex new_i, WordIn
return transpair_model4::scoreOfMove(a,new_i,j,thisValue);
alignment b(a);
b.set(j,new_i);
-
+
LogProb change;
const WordIndex old_i=a(j);
WordIndex f0=a.fert(0);
@@ -77,34 +77,34 @@ LogProb transpair_model5::scoreOfMove(const alignment&a, WordIndex new_i, WordIn
change=1.0;
else if (old_i == 0)
change=((double)p0*p0/p1) *
- ((f0*(m-f0+1.0)) / ((m-2*f0+1)*(m-2*f0+2.0))) *
- ((PROB)(1.0)) *
- (get_fertility(new_i, a.fert(new_i)+1) / get_fertility(new_i, a.fert(new_i)))*
- (t(new_i, j)/t(old_i, j))*
- 1.0;
+ ((f0*(m-f0+1.0)) / ((m-2*f0+1)*(m-2*f0+2.0))) *
+ ((PROB)(1.0)) *
+ (get_fertility(new_i, a.fert(new_i)+1) / get_fertility(new_i, a.fert(new_i)))*
+ (t(new_i, j)/t(old_i, j))*
+ 1.0;
else if (new_i == 0)
change=(double(p1) / (p0*p0)) *
- (double((m-2*f0)*(m-2*f0-1))/((1+f0)*(m-f0))) *
- (1.0) *
- (get_fertility(old_i, a.fert(old_i)-1) /get_fertility(old_i, a.fert(old_i)))*
- (t(new_i, j) /t(old_i, j)) *
- (1.0);
+ (double((m-2*f0)*(m-2*f0-1))/((1+f0)*(m-f0))) *
+ (1.0) *
+ (get_fertility(old_i, a.fert(old_i)-1) /get_fertility(old_i, a.fert(old_i)))*
+ (t(new_i, j) /t(old_i, j)) *
+ (1.0);
else
change=(1.0) *
- (get_fertility(old_i,a.fert(old_i)-1) / get_fertility(old_i,a.fert(old_i))) *
- (get_fertility(new_i,a.fert(new_i)+1) /get_fertility(new_i,a.fert(new_i))) *
- (t(new_i,j)/t(old_i,j)) *
- (1.0);
+ (get_fertility(old_i,a.fert(old_i)-1) / get_fertility(old_i,a.fert(old_i))) *
+ (get_fertility(new_i,a.fert(new_i)+1) /get_fertility(new_i,a.fert(new_i))) *
+ (t(new_i,j)/t(old_i,j)) *
+ (1.0);
LogProb a_prob=thisValue;
if( a_prob<0.0 )
a_prob=prob_of_target_and_alignment_given_source(a,2);
massert(a_prob==prob_of_target_and_alignment_given_source(a,2));
-
+
LogProb b_prob=prob_of_target_and_alignment_given_source(b,2);
change*=b_prob/a_prob;
return change;
}
-LogProb transpair_model5::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
+LogProb transpair_model5::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
{
if( doModel4Scoring )
return transpair_model4::scoreOfSwap(a,j1,j2,thisValue);
@@ -126,70 +126,65 @@ LogProb transpair_model5::prob_of_target_and_alignment_given_source(const alignm
if( doModel4Scoring )
return transpair_model4::prob_of_target_and_alignment_given_source(al,distortionType);
LogProb total = 1.0 ;
- static const LogProb almostZero = 1E-299 ;
+ static const LogProb almostZero = 1E-299 ;
double x2;
- if( distortionType&1 )
- {
- total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
- if( verb) cerr << "IBM-5: (1-p1)^(m-2 f0)*p1^f0: " << total << endl;
- for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
- total *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient!
- if( verb) cerr << "IBM-5: +NULL:binomial+distortion " << total << endl;
- for (WordIndex i = 1 ; i <= l ; i++)
- {
- total *= get_fertility(i, al.fert(i));
- if( verb) cerr << "IBM-5: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl;
- }
- for (WordIndex j = 1 ; j <= m ; j++)
- {
- total*= get_t(al(j), j) ;
- if( verb) cerr << "IBM-5: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j) << " -> " << total << endl;
- }
+ if( distortionType&1 ) {
+ total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
+ if( verb) cerr << "IBM-5: (1-p1)^(m-2 f0)*p1^f0: " << total << endl;
+ for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
+ total *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient!
+ if( verb) cerr << "IBM-5: +NULL:binomial+distortion " << total << endl;
+ for (WordIndex i = 1 ; i <= l ; i++) {
+ total *= get_fertility(i, al.fert(i));
+ if( verb) cerr << "IBM-5: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl;
}
- if( distortionType&2 )
- {
- PositionIndex prev_cept=0;
- PositionIndex vac_all=m;
- Vector<char> vac(m+1,0);
- for(WordIndex i=1;i<=l;i++)
- {
- PositionIndex cur_j=al.als_i[i];
- PositionIndex prev_j=0;
- PositionIndex k=0;
- if(cur_j) { // process first word of cept
- k++;
- // previous position
- total*= (x2=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k));
-
- vac_all--;
- assert(vac[cur_j]==0);
- vac[cur_j]=1;
-
- if( verb) cerr << "IBM-5: d=1 of " << cur_j << ": " << x2 << " -> " << total << endl;
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- while(cur_j) { // process following words of cept
- k++;
- // previous position
- int vprev=vacancies(vac,prev_j);
- total*= (x2=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k));
-
-
- vac_all--;
- vac[cur_j]=1;
-
-
- if( verb) cerr << "IBM-5: d>1 of " << cur_j << ": " << x2 << " -> " << total << endl;
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- assert(k==al.fert(i));
- if( k )
- prev_cept=i;
- }
- assert(vac_all==al.fert(0));
+ for (WordIndex j = 1 ; j <= m ; j++) {
+ total*= get_t(al(j), j) ;
+ if( verb) cerr << "IBM-5: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j) << " -> " << total << endl;
+ }
+ }
+ if( distortionType&2 ) {
+ PositionIndex prev_cept=0;
+ PositionIndex vac_all=m;
+ Vector<char> vac(m+1,0);
+ for(WordIndex i=1; i<=l; i++) {
+ PositionIndex cur_j=al.als_i[i];
+ PositionIndex prev_j=0;
+ PositionIndex k=0;
+ if(cur_j) { // process first word of cept
+ k++;
+ // previous position
+ total*= (x2=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k));
+
+ vac_all--;
+ assert(vac[cur_j]==0);
+ vac[cur_j]=1;
+
+ if( verb) cerr << "IBM-5: d=1 of " << cur_j << ": " << x2 << " -> " << total << endl;
+ prev_j=cur_j;
+ cur_j=al.als_j[cur_j].next;
+ }
+ while(cur_j) { // process following words of cept
+ k++;
+ // previous position
+ int vprev=vacancies(vac,prev_j);
+ total*= (x2=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k));
+
+
+ vac_all--;
+ vac[cur_j]=1;
+
+
+ if( verb) cerr << "IBM-5: d>1 of " << cur_j << ": " << x2 << " -> " << total << endl;
+ prev_j=cur_j;
+ cur_j=al.als_j[cur_j].next;
+ }
+ assert(k==al.fert(i));
+ if( k )
+ prev_cept=i;
}
+ assert(vac_all==al.fert(0));
+ }
total = total?total:almostZero;
return total;
}
@@ -208,33 +203,32 @@ void transpair_model5::computeScores(const alignment&al,vector<double>&d)const
PositionIndex prev_cept=0;
PositionIndex vac_all=m;
Vector<char> vac(m+1,0);
- for(WordIndex i=1;i<=l;i++)
- {
- PositionIndex cur_j=al.als_i[i];
- PositionIndex prev_j=0;
- PositionIndex k=0;
- if(cur_j) { // process first word of cept
- k++;
- total4*=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k);
- vac_all--;
- assert(vac[cur_j]==0);
- vac[cur_j]=1;
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- while(cur_j) { // process following words of cept
- k++;
- int vprev=vacancies(vac,prev_j);
- total4*=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k);
- vac_all--;
- vac[cur_j]=1;
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- assert(k==al.fert(i));
- if( k )
- prev_cept=i;
+ for(WordIndex i=1; i<=l; i++) {
+ PositionIndex cur_j=al.als_i[i];
+ PositionIndex prev_j=0;
+ PositionIndex k=0;
+ if(cur_j) { // process first word of cept
+ k++;
+ total4*=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k);
+ vac_all--;
+ assert(vac[cur_j]==0);
+ vac[cur_j]=1;
+ prev_j=cur_j;
+ cur_j=al.als_j[cur_j].next;
+ }
+ while(cur_j) { // process following words of cept
+ k++;
+ int vprev=vacancies(vac,prev_j);
+ total4*=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses->getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k);
+ vac_all--;
+ vac[cur_j]=1;
+ prev_j=cur_j;
+ cur_j=al.als_j[cur_j].next;
}
+ assert(k==al.fert(i));
+ if( k )
+ prev_cept=i;
+ }
assert(vac_all==al.fert(0));
d.push_back(total1);//13
d.push_back(total2);//14
diff --git a/mgizapp/src/transpair_model5.h b/mgizapp/src/transpair_model5.h
index 5ecf49d..3223e00 100644
--- a/mgizapp/src/transpair_model5.h
+++ b/mgizapp/src/transpair_model5.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -46,28 +46,29 @@ inline int vacancies(const Vector<char>&vac,int u)
class transpair_model5 : public transpair_model4
{
- private:
+private:
const d5model&d5m;
bool doModel4Scoring;
- public:
+public:
typedef transpair_model3 simpler_transpair_model;
mutable map<Vector<PositionIndex>,LogProb> scores[4];
- transpair_model5(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable,
- amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0,
- const d5model*_d5m)
+ transpair_model5(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable,
+ amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0,
+ const d5model*_d5m)
: transpair_model4(es, fs, tTable, aTable, dTable, nTable, _p1, _p0,&_d5m->d4m),d5m(*_d5m),doModel4Scoring(0) {}
- LogProb scoreOfAlignmentForChange(const alignment&a)const
- {
- if( doModel4Scoring )
- return transpair_model4::prob_of_target_and_alignment_given_source(a,2);
- else
- return prob_of_target_and_alignment_given_source(a,2);
- }
+ LogProb scoreOfAlignmentForChange(const alignment&a)const {
+ if( doModel4Scoring )
+ return transpair_model4::prob_of_target_and_alignment_given_source(a,2);
+ else
+ return prob_of_target_and_alignment_given_source(a,2);
+ }
LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
- int modelnr()const{return 5;}
+ int modelnr()const {
+ return 5;
+ }
LogProb prob_of_target_and_alignment_given_source(const alignment&al, short distortionType=3,bool verb=0)const;
void computeScores(const alignment&al,vector<double>&d)const;
};
diff --git a/mgizapp/src/transpair_modelhmm.h b/mgizapp/src/transpair_modelhmm.h
index d836ad4..6f82b38 100644
--- a/mgizapp/src/transpair_modelhmm.h
+++ b/mgizapp/src/transpair_modelhmm.h
@@ -9,14 +9,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -36,188 +36,176 @@ USA.
class transpair_modelhmm : public transpair_model2
{
- public:
- typedef transpair_modelhmm simpler_transpair_model;
+public:
+ typedef transpair_modelhmm simpler_transpair_model;
HMMNetwork*net;
- transpair_modelhmm(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable,
- const amodel<PROB>&aTable,const amodel<PROB>&,const nmodel<PROB>&,
- double, double,const hmm*h)
+ transpair_modelhmm(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable,
+ const amodel<PROB>&aTable,const amodel<PROB>&,const nmodel<PROB>&,
+ double, double,const hmm*h)
: transpair_model2(es,fs,tTable,aTable),net(h->makeHMMNetwork(es,fs,0))
- {}
- ~transpair_modelhmm() { delete net; }
- int modelnr()const{return 6;}
- LogProb scoreOfMove(const alignment&a, WordIndex _new_i, WordIndex j,double=-1.0)const
- {
- int new_i=_new_i;
- LogProb change=1.0;
- int old_i=a(j);
- if (old_i == new_i)
- change=1.0;
- else
- {
- int theJ=j-1;
- old_i--;
- new_i--;
- int jj=j-1;
- while(jj>0&&a(jj)==0)
- jj--;
- int theIPrev= (jj>0)?(a(jj)-1):0;
- if( j>1&&a(j-1)==0 )
- theIPrev+=l;
- if( old_i==-1 ){old_i = theIPrev;if(old_i<int(l))old_i+=l;}
- if( new_i==-1 ){new_i = theIPrev;if(new_i<int(l))new_i+=l;}
- int theIPrevOld=theIPrev,theIPrevNew=theIPrev;
- if( theJ==0 )
- {
- change*=net->getAlphainit(new_i)/net->getAlphainit(old_i);
- }
- do
- {
- if( new_i!=old_i )
- {
- change*=net->nodeProb(new_i,theJ)/net->nodeProb(old_i,theJ);
- }
- if( theJ>0)
- change*=net->outProb(theJ,theIPrevNew,new_i)/net->outProb(theJ,theIPrevOld,old_i);
- theIPrevOld=old_i;
- theIPrevNew=new_i;
- theJ++;
- if( theJ<int(m) && a(theJ+1)==0 )
- {
- if( new_i<int(l)) new_i+=l;
- if( old_i<int(l)) old_i+=l;
- }
- } while( theJ<int(m) && a(theJ+1)==0 );
- if(theJ==int(m))
- {
- change*=net->getBetainit(new_i)/net->getBetainit(old_i);
- }
- else
- {
- new_i=a(theJ+1)-1;
- if( new_i==-1)
- new_i=theIPrevNew;
- change*=net->outProb(theJ,theIPrevNew,new_i)/net->outProb(theJ,theIPrevOld,new_i);
- }
- }
- return change;
+ {}
+ ~transpair_modelhmm() {
+ delete net;
+ }
+ int modelnr()const {
+ return 6;
+ }
+ LogProb scoreOfMove(const alignment&a, WordIndex _new_i, WordIndex j,double=-1.0)const {
+ int new_i=_new_i;
+ LogProb change=1.0;
+ int old_i=a(j);
+ if (old_i == new_i)
+ change=1.0;
+ else {
+ int theJ=j-1;
+ old_i--;
+ new_i--;
+ int jj=j-1;
+ while(jj>0&&a(jj)==0)
+ jj--;
+ int theIPrev= (jj>0)?(a(jj)-1):0;
+ if( j>1&&a(j-1)==0 )
+ theIPrev+=l;
+ if( old_i==-1 ) {
+ old_i = theIPrev;
+ if(old_i<int(l))old_i+=l;
+ }
+ if( new_i==-1 ) {
+ new_i = theIPrev;
+ if(new_i<int(l))new_i+=l;
+ }
+ int theIPrevOld=theIPrev,theIPrevNew=theIPrev;
+ if( theJ==0 ) {
+ change*=net->getAlphainit(new_i)/net->getAlphainit(old_i);
+ }
+ do {
+ if( new_i!=old_i ) {
+ change*=net->nodeProb(new_i,theJ)/net->nodeProb(old_i,theJ);
+ }
+ if( theJ>0)
+ change*=net->outProb(theJ,theIPrevNew,new_i)/net->outProb(theJ,theIPrevOld,old_i);
+ theIPrevOld=old_i;
+ theIPrevNew=new_i;
+ theJ++;
+ if( theJ<int(m) && a(theJ+1)==0 ) {
+ if( new_i<int(l)) new_i+=l;
+ if( old_i<int(l)) old_i+=l;
+ }
+ } while( theJ<int(m) && a(theJ+1)==0 );
+ if(theJ==int(m)) {
+ change*=net->getBetainit(new_i)/net->getBetainit(old_i);
+ } else {
+ new_i=a(theJ+1)-1;
+ if( new_i==-1)
+ new_i=theIPrevNew;
+ change*=net->outProb(theJ,theIPrevNew,new_i)/net->outProb(theJ,theIPrevOld,new_i);
+ }
}
- LogProb scoreOfAlignmentForChange(const alignment&)const
- {return -1.0; }
- LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const
- {
- return _scoreOfSwap(a,j1,j2);
- }
- LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double=-1.0)const
- {
- alignment b(a);
- b.set(j, new_i);
- LogProb a_prob=prob_of_target_and_alignment_given_source(a);
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
- }
- LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const
- {
- WordIndex aj1=a(j1),aj2=a(j2);
- if( aj1==aj2 )
- return 1.0;
- LogProb a_prob=prob_of_target_and_alignment_given_source(a);
+ return change;
+ }
+ LogProb scoreOfAlignmentForChange(const alignment&)const {
+ return -1.0;
+ }
+ LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const {
+ return _scoreOfSwap(a,j1,j2);
+ }
+ LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double=-1.0)const {
+ alignment b(a);
+ b.set(j, new_i);
+ LogProb a_prob=prob_of_target_and_alignment_given_source(a);
+ LogProb b_prob=prob_of_target_and_alignment_given_source(b);
+ if( a_prob )
+ return b_prob/a_prob;
+ else if( b_prob )
+ return 1e20;
+ else
+ return 1.0;
+ }
+ LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const {
+ WordIndex aj1=a(j1),aj2=a(j2);
+ if( aj1==aj2 )
+ return 1.0;
+ LogProb a_prob=prob_of_target_and_alignment_given_source(a);
- /*alignment b(a);
- b.set(j1, a(j2));
- b.set(j2, a(j1));
- LogProb b_prob=prob_of_target_and_alignment_given_source(b);*/
+ /*alignment b(a);
+ b.set(j1, a(j2));
+ b.set(j2, a(j1));
+ LogProb b_prob=prob_of_target_and_alignment_given_source(b);*/
- const_cast<alignment&>(a).set(j1,aj2);
- const_cast<alignment&>(a).set(j2,aj1);
- LogProb b_prob=prob_of_target_and_alignment_given_source(a);
- const_cast<alignment&>(a).set(j1,aj1);
- const_cast<alignment&>(a).set(j2,aj2);
+ const_cast<alignment&>(a).set(j1,aj2);
+ const_cast<alignment&>(a).set(j2,aj1);
+ LogProb b_prob=prob_of_target_and_alignment_given_source(a);
+ const_cast<alignment&>(a).set(j1,aj1);
+ const_cast<alignment&>(a).set(j2,aj2);
- if( a_prob )
- return b_prob/a_prob;
- else if( b_prob )
- return 1e20;
- else
- return 1.0;
- }
- inline friend ostream&operator<<(ostream&out, const transpair_modelhmm&)
- {
- return out << "NO-OUTPUT for transpair_modelhmm\n";
- }
- LogProb prob_of_target_and_alignment_given_source(const alignment&al,bool verbose=0)const
- {
- double prob=1.0;
- int theIPrev=0;
- for(unsigned int j=1;j<=m;j++)
- {
- int theJ=j-1;
- int theI=al(j)-1;
- if( theI==-1 )
- theI=(theIPrev%l)+l;
- prob*=net->nodeProb(theI,theJ);
- if( verbose )
- cout << "NP " << net->nodeProb(theI,theJ) << ' ';
- if( j==1 )
- {
- prob*=net->getAlphainit(theI);
- if( verbose )
- cout << "AP0 " << net->getAlphainit(theI) << ' ';
- }
- else
- {
- prob*=net->outProb(theJ,theIPrev,theI);
- if( verbose )
- cout << "AP1 " << net->outProb(theJ,theIPrev,theI) << ' ';
- }
- theIPrev=theI;
- if( j==m )
- {
- prob*=net->getBetainit(theI);
- if( verbose )
- cout << "AP2 " << net->getBetainit(theI) << ' ';
- }
- if( verbose )
- cout << "j:"<<theJ<<" i:"<<theI << "; ";
- }
+ if( a_prob )
+ return b_prob/a_prob;
+ else if( b_prob )
+ return 1e20;
+ else
+ return 1.0;
+ }
+ inline friend ostream&operator<<(ostream&out, const transpair_modelhmm&) {
+ return out << "NO-OUTPUT for transpair_modelhmm\n";
+ }
+ LogProb prob_of_target_and_alignment_given_source(const alignment&al,bool verbose=0)const {
+ double prob=1.0;
+ int theIPrev=0;
+ for(unsigned int j=1; j<=m; j++) {
+ int theJ=j-1;
+ int theI=al(j)-1;
+ if( theI==-1 )
+ theI=(theIPrev%l)+l;
+ prob*=net->nodeProb(theI,theJ);
+ if( verbose )
+ cout << "NP " << net->nodeProb(theI,theJ) << ' ';
+ if( j==1 ) {
+ prob*=net->getAlphainit(theI);
+ if( verbose )
+ cout << "AP0 " << net->getAlphainit(theI) << ' ';
+ } else {
+ prob*=net->outProb(theJ,theIPrev,theI);
+ if( verbose )
+ cout << "AP1 " << net->outProb(theJ,theIPrev,theI) << ' ';
+ }
+ theIPrev=theI;
+ if( j==m ) {
+ prob*=net->getBetainit(theI);
+ if( verbose )
+ cout << "AP2 " << net->getBetainit(theI) << ' ';
+ }
if( verbose )
- cout << '\n';
- return prob*net->finalMultiply;
+ cout << "j:"<<theJ<<" i:"<<theI << "; ";
}
- void computeScores(const alignment&al,vector<double>&d)const
- {
- double prob1=1.0,prob2=1.0;
- int theIPrev=0;
- for(unsigned int j=1;j<=m;j++)
- {
- int theJ=j-1;
- int theI=al(j)-1;
- if( theI==-1 )
- theI=(theIPrev%l)+l;
- prob1*=net->nodeProb(theI,theJ);
- if( j==1 )
- {
- prob2*=net->getAlphainit(theI);
- }
- else
- {
- prob2*=net->outProb(theJ,theIPrev,theI);
- }
- theIPrev=theI;
- if( j==m )
- {
- prob2*=net->getBetainit(theI);
- }
- }
- d.push_back(prob1);
- d.push_back(prob2);
+ if( verbose )
+ cout << '\n';
+ return prob*net->finalMultiply;
+ }
+ void computeScores(const alignment&al,vector<double>&d)const {
+ double prob1=1.0,prob2=1.0;
+ int theIPrev=0;
+ for(unsigned int j=1; j<=m; j++) {
+ int theJ=j-1;
+ int theI=al(j)-1;
+ if( theI==-1 )
+ theI=(theIPrev%l)+l;
+ prob1*=net->nodeProb(theI,theJ);
+ if( j==1 ) {
+ prob2*=net->getAlphainit(theI);
+ } else {
+ prob2*=net->outProb(theJ,theIPrev,theI);
+ }
+ theIPrev=theI;
+ if( j==m ) {
+ prob2*=net->getBetainit(theI);
+ }
}
+ d.push_back(prob1);
+ d.push_back(prob2);
+ }
- bool isSubOptimal()const{return 0;}
+ bool isSubOptimal()const {
+ return 0;
+ }
};
#endif
diff --git a/mgizapp/src/ttableDiff.hpp b/mgizapp/src/ttableDiff.hpp
index 280cadd..8e62c70 100644
--- a/mgizapp/src/ttableDiff.hpp
+++ b/mgizapp/src/ttableDiff.hpp
@@ -2,19 +2,19 @@
/*
* newgiza
* Copyright (C) Qin Gao 2007 <qing@cs.cmu.edu>
- *
+ *
* newgiza is free software.
- *
+ *
* You may redistribute it and/or modify it under the terms of the
* GNU General Public License, as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
- *
+ *
* newgiza is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with newgiza. If not, write to:
* The Free Software Foundation, Inc.,
@@ -32,86 +32,87 @@
using namespace std;
#ifdef WIN32
- typedef hash_map<wordPairIds, COUNT, hashpair> wordpair_hash;
+typedef hash_map<wordPairIds, COUNT, hashpair> wordpair_hash;
#else
- typedef hash_map<wordPairIds, COUNT, hashpair, equal_to<wordPairIds> > wordpair_hash;
+typedef hash_map<wordPairIds, COUNT, hashpair, equal_to<wordPairIds> > wordpair_hash;
#endif
/*!
This class is meant to create a difference file in order to make
GIZA paralell.
*/
template <class COUNT,class PROB>
-class CTTableDiff{
+class CTTableDiff
+{
private:
- INT32 noEnglishWords; // total number of unique source words
- INT32 noFrenchWords; // total number of unique target words
- /*!
- Store only the counting*/
- wordpair_hash ef;
+ INT32 noEnglishWords; // total number of unique source words
+ INT32 noFrenchWords; // total number of unique target words
+ /*!
+ Store only the counting*/
+ wordpair_hash ef;
public:
- INT32 SaveToFile(const char* filename){
- ofstream ofs(filename);
- if(!ofs.is_open()){
- return -1;
- }else{
- wordpair_hash::iterator it;
- for( it = ef.begin() ; it != ef.end(); it++){
- ofs << it->first.first << " " << it->first.second << " "
- << it->second << std::endl;
- }
- }
- return SUCCESS;
- }
-
- INT32 LoadFromFile(const char* filename){
- ef.clear();
- ifstream ifs(filename);
- if(!ifs.is_open()){
- return -1;
- }
- string sline;
- while(!ifs.eof()){
- sline = "";
- std::getline(ifs,sline);
- if(sline.length()){
- //cout << sline << endl;
- stringstream ss(sline.c_str());
- WordIndex we=-1,wf=-1;
- COUNT ct=-1 ;
- ss >> we >> wf >> ct;
- if(we==-1||wf==-1||ct==-1)
- continue;
- ef[wordPairIds(we,wf)] = ct;
- }
- }
- return SUCCESS;
- }
-
- COUNT * GetPtr(WordIndex e, WordIndex f){
- // look up this pair and return its position
- wordpair_hash::iterator i = ef.find(wordPairIds(e, f));
- if(i != ef.end()) // if it exists, return a pointer to it.
- return(&((*i).second));
- else return(0) ; // else return NULL pointer
- }
-
- void incCount(WordIndex e, WordIndex f, COUNT inc)
- // increments the count of the given word pair. if the pair does not exist,
- // it creates it with the given value.
- {
- if( inc )
- ef[wordPairIds(e, f)] += inc ;
- }
-
- INT32 AugmentTTable(tmodel<COUNT,PROB>& ttable){
- wordpair_hash::iterator it;
- for( it = ef.begin() ; it != ef.end(); it++){
- ttable.incCount(it->first.first,it->first.second,it->second);
- }
- return SUCCESS;
- }
-
+ INT32 SaveToFile(const char* filename) {
+ ofstream ofs(filename);
+ if(!ofs.is_open()) {
+ return -1;
+ } else {
+ wordpair_hash::iterator it;
+ for( it = ef.begin() ; it != ef.end(); it++) {
+ ofs << it->first.first << " " << it->first.second << " "
+ << it->second << std::endl;
+ }
+ }
+ return SUCCESS;
+ }
+
+ INT32 LoadFromFile(const char* filename) {
+ ef.clear();
+ ifstream ifs(filename);
+ if(!ifs.is_open()) {
+ return -1;
+ }
+ string sline;
+ while(!ifs.eof()) {
+ sline = "";
+ std::getline(ifs,sline);
+ if(sline.length()) {
+ //cout << sline << endl;
+ stringstream ss(sline.c_str());
+ WordIndex we=-1,wf=-1;
+ COUNT ct=-1 ;
+ ss >> we >> wf >> ct;
+ if(we==-1||wf==-1||ct==-1)
+ continue;
+ ef[wordPairIds(we,wf)] = ct;
+ }
+ }
+ return SUCCESS;
+ }
+
+ COUNT * GetPtr(WordIndex e, WordIndex f) {
+ // look up this pair and return its position
+ wordpair_hash::iterator i = ef.find(wordPairIds(e, f));
+ if(i != ef.end()) // if it exists, return a pointer to it.
+ return(&((*i).second));
+ else return(0) ; // else return NULL pointer
+ }
+
+ void incCount(WordIndex e, WordIndex f, COUNT inc)
+ // increments the count of the given word pair. if the pair does not exist,
+ // it creates it with the given value.
+ {
+ if( inc )
+ ef[wordPairIds(e, f)] += inc ;
+ }
+
+ INT32 AugmentTTable(tmodel<COUNT,PROB>& ttable) {
+ wordpair_hash::iterator it;
+ for( it = ef.begin() ; it != ef.end(); it++) {
+ ttable.incCount(it->first.first,it->first.second,it->second);
+ }
+ return SUCCESS;
+ }
+
protected:
};
diff --git a/mgizapp/src/types.h b/mgizapp/src/types.h
index b26db96..513c55f 100644
--- a/mgizapp/src/types.h
+++ b/mgizapp/src/types.h
@@ -8,5 +8,5 @@ typedef int INT32;
typedef float FLOAT32;
typedef double FLOAT64;
-#define SUCCESS 0
+#define SUCCESS 0
#define IS_SUCCESS(x) (x==0)
diff --git a/mgizapp/src/utility.cpp b/mgizapp/src/utility.cpp
index c0350a0..a353a05 100644
--- a/mgizapp/src/utility.cpp
+++ b/mgizapp/src/utility.cpp
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -33,6 +33,7 @@ double factorial(int n)
return f;
}
-string my_ctime(const time_t* t){
- return ctime(t);
+string my_ctime(const time_t* t)
+{
+ return ctime(t);
}
diff --git a/mgizapp/src/utility.h b/mgizapp/src/utility.h
index 0078a70..2eeed8c 100644
--- a/mgizapp/src/utility.h
+++ b/mgizapp/src/utility.h
@@ -8,14 +8,14 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
@@ -32,23 +32,23 @@ USA.
extern void printHelp(void);
extern void parseConfigFile (char * fname );
extern void parseArguments(int argc, char *argv[]);
-extern void generatePerplexityReport(const Perplexity& trainperp,
- const Perplexity& testperp,
- const Perplexity& trainVperp,
- const Perplexity& testVperp,
- ostream& of, int trainsize,
- int testsize, unsigned int last, bool);
+extern void generatePerplexityReport(const Perplexity& trainperp,
+ const Perplexity& testperp,
+ const Perplexity& trainVperp,
+ const Perplexity& testVperp,
+ ostream& of, int trainsize,
+ int testsize, unsigned int last, bool);
extern void printSentencePair(Vector<WordIndex>& es, Vector<WordIndex>& fs, ostream& of);
-
+
extern void printOverlapReport(const tmodel<COUNT, PROB>& tTable,
- sentenceHandler& testHandler, vcbList& trainEList,
- vcbList& trainFList, vcbList& testEList, vcbList& testFList);
+ sentenceHandler& testHandler, vcbList& trainEList,
+ vcbList& trainFList, vcbList& testEList, vcbList& testFList);
-extern void printAlignToFile(const Vector<WordIndex>& es, const Vector<WordIndex>& fs,
- const Vector<WordEntry>& evlist, const Vector<WordEntry>& fvlist,
- ostream& of2, const Vector<WordIndex>& viterbi_alignment, int pair_no,
- double viterbi_score);
+extern void printAlignToFile(const Vector<WordIndex>& es, const Vector<WordIndex>& fs,
+ const Vector<WordEntry>& evlist, const Vector<WordEntry>& fvlist,
+ ostream& of2, const Vector<WordIndex>& viterbi_alignment, int pair_no,
+ double viterbi_score);
extern double factorial(int) ;
diff --git a/mgizapp/src/vocab.cpp b/mgizapp/src/vocab.cpp
index e7bf13a..f10e962 100644
--- a/mgizapp/src/vocab.cpp
+++ b/mgizapp/src/vocab.cpp
@@ -8,23 +8,23 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
-#include "vocab.h"
+#include "vocab.h"
void vcbList::readVocabList()
- // reads a vocabulary file from fname. It expects the following format:
- //
- // token_id token_string frequency
+// reads a vocabulary file from fname. It expects the following format:
+//
+// token_id token_string frequency
{
int freq=0;
@@ -32,51 +32,48 @@ void vcbList::readVocabList()
WordEntry entry("NULL",0) ;
string line, word ;
- cerr << "Reading vocabulary file from:" << fname << "\n";
+ cerr << "Reading vocabulary file from:" << fname << "\n";
// total = 0 ;
ifstream ifs(fname);
- if(!ifs){
+ if(!ifs) {
cerr << "\nCannot open vocabulary file " << fname << "file";
exit(1);
}
size_t sline = 0;
- while(getline(ifs, line)){
- sline ++;
+ while(getline(ifs, line)) {
+ sline ++;
}
ifs.close();
ifstream vFile(fname);
- if(!vFile){
+ if(!vFile) {
cerr << "\nCannot open vocabulary file " << fname << "file";
exit(1);
}
list.reserve(sline+100); // Reserve space to prevent re-allocating
-
+
list.push_back(entry);
s2i[entry.word]=list.size()-1;
- while(getline(vFile, line)){
+ while(getline(vFile, line)) {
istrstream buffer(line.c_str());
if(!(buffer >> word_id >> word >> freq))
cerr << "ERROR: reading vocabulary; " << word_id << ' ' << word << ' ' << freq << endl;
- if (word_id == 0){
+ if (word_id == 0) {
cerr << "ERROR: TOKEN ID 0 is reserved for special token NULL, in line: \n"<< line<<"\n" ;
exit(-1);
- }
- else if (word_id >= MAX_VOCAB_SIZE){
+ } else if (word_id >= MAX_VOCAB_SIZE) {
cerr << "ERROR: TOKEN ID is greater than maximum vocabulary size "
- << MAX_VOCAB_SIZE << " in line :\n"<< line <<"\n" ;
+ << MAX_VOCAB_SIZE << " in line :\n"<< line <<"\n" ;
exit(-1);
- }
- else if (freq < 0){
+ } else if (freq < 0) {
cerr << "ERROR: frequency must be a positive integer, in line :\n"
- << line <<"\n";
+ << line <<"\n";
exit(-1);
- }
- else if(word_id >= list.size()){
+ } else if(word_id >= list.size()) {
list.resize(word_id+1);
list[word_id].word = word ;
s2i[word]=word_id;
@@ -84,15 +81,13 @@ void vcbList::readVocabList()
noUniqueTokens = word_id + 1 ;
// noUniqueTokens++ ;
// total += freq ;
- }
- else if(list[word_id].word != "\0"){
+ } else if(list[word_id].word != "\0") {
cerr << "ERROR: TOKEN ID must be unique for each token, in line :\n"
- << line <<"\n";
+ << line <<"\n";
cerr << "TOKEN ID " << word_id << " has already been assigned to: " <<
- list[word_id].word << "\n";
+ list[word_id].word << "\n";
exit(-1);
- }
- else { // line has valid information
+ } else { // line has valid information
list[word_id].word = word ;
s2i[word]=word_id;
list[word_id].freq = 0 ;
@@ -104,17 +99,18 @@ void vcbList::readVocabList()
}
-void vcbList::compact(const std::set<WordIndex>& evoc){
- int del = 0;
- for(int i=0; i< list.size() ; i++){
- if(evoc.find(i)==evoc.end()){ // Not appear in corpus
- s2i.erase(list[i].word);
- list[i].word = "";
- del++;
- }
- }
- cerr << "Compacted Vocabulary, eliminated " << del << " entries "
- << s2i.size() << " remains " << endl;
+void vcbList::compact(const std::set<WordIndex>& evoc)
+{
+ int del = 0;
+ for(int i=0; i< list.size() ; i++) {
+ if(evoc.find(i)==evoc.end()) { // Not appear in corpus
+ s2i.erase(list[i].word);
+ list[i].word = "";
+ del++;
+ }
+ }
+ cerr << "Compacted Vocabulary, eliminated " << del << " entries "
+ << s2i.size() << " remains " << endl;
}
diff --git a/mgizapp/src/vocab.h b/mgizapp/src/vocab.h
index 8bf5de7..88d8a0f 100644
--- a/mgizapp/src/vocab.h
+++ b/mgizapp/src/vocab.h
@@ -8,87 +8,100 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef _vocab_h
#define _vocab_h 1
-#include "defs.h"
-#include "Vector.h"
+#include "defs.h"
+#include "Vector.h"
#include <fstream>
#include <strstream>
#include <map>
#include <set>
-class WordEntry {
- public:
+class WordEntry
+{
+public:
string word ;
double freq ;
- WordEntry():word("\0"), freq(0){};
- WordEntry(string w, int f):word(w), freq(f){};
+ WordEntry():word("\0"), freq(0) {};
+ WordEntry(string w, int f):word(w), freq(f) {};
};
-class vcbList{
- private:
+class vcbList
+{
+private:
Vector<WordEntry>& list ;
map<string,int> s2i;
double total;
WordIndex noUniqueTokens ;
WordIndex noUniqueTokensInCorpus ;
const char* fname ;
- public:
- vcbList(Vector<WordEntry>& vcb,const char* f=0):list(vcb), total(0), noUniqueTokens(0), noUniqueTokensInCorpus(0), fname(f){};
- void setName(const char*f)
- { fname=f; }
- vcbList(const vcbList& a):list(a.list), total(a.total), noUniqueTokens(a.noUniqueTokens), noUniqueTokensInCorpus(0), fname(a.fname){};
- void compact(const std::set<WordIndex>& evoc);
- inline WordIndex size()const {return (list.size());};
- inline WordIndex uniqTokens()const {return noUniqueTokens;};
- inline WordIndex uniqTokensInCorpus()const {return noUniqueTokensInCorpus;};
- inline double totalVocab() const {return total;};
- inline Vector<WordEntry>& getVocabList() { return(list);};
- inline const Vector<WordEntry>& getVocabList()const { return(list);};
+public:
+ vcbList(Vector<WordEntry>& vcb,const char* f=0):list(vcb), total(0), noUniqueTokens(0), noUniqueTokensInCorpus(0), fname(f) {};
+ void setName(const char*f) {
+ fname=f;
+ }
+ vcbList(const vcbList& a):list(a.list), total(a.total), noUniqueTokens(a.noUniqueTokens), noUniqueTokensInCorpus(0), fname(a.fname) {};
+ void compact(const std::set<WordIndex>& evoc);
+ inline WordIndex size()const {
+ return (list.size());
+ };
+ inline WordIndex uniqTokens()const {
+ return noUniqueTokens;
+ };
+ inline WordIndex uniqTokensInCorpus()const {
+ return noUniqueTokensInCorpus;
+ };
+ inline double totalVocab() const {
+ return total;
+ };
+ inline Vector<WordEntry>& getVocabList() {
+ return(list);
+ };
+ inline const Vector<WordEntry>& getVocabList()const {
+ return(list);
+ };
void readVocabList();
- void incFreq(WordIndex id , double f){
- if(id < list.size()){
+ void incFreq(WordIndex id , double f) {
+ if(id < list.size()) {
if (list[id].freq == 0)
- noUniqueTokensInCorpus++;
+ noUniqueTokensInCorpus++;
list[id].freq += f ;
total += f ;
}
};
- void clearAllFreq(){
+ void clearAllFreq() {
for (WordIndex id = 0 ; id < list.size() ; id++)
list[id].freq = 0 ;
total = 0 ;
noUniqueTokensInCorpus = 0 ;
};
- const bool has_word(const string& x) const{
- map<string,int>::const_iterator i=s2i.find(x);
- return i!=s2i.end();
+ const bool has_word(const string& x) const {
+ map<string,int>::const_iterator i=s2i.find(x);
+ return i!=s2i.end();
}
- int operator()(const string&x)const
- {
- map<string,int>::const_iterator i=s2i.find(x);
- if( i!=s2i.end() )
- return i->second;
- else
- {
- cerr << "ERROR: no word index for '"<<x<<"'\n";
- return 0;
- }
+ int operator()(const string&x)const {
+ map<string,int>::const_iterator i=s2i.find(x);
+ if( i!=s2i.end() )
+ return i->second;
+ else {
+ cerr << "ERROR: no word index for '"<<x<<"'\n";
+ return 0;
}
+ }
const string operator()(WordIndex id) const { // Yaser - 2000-12-13
if (id < list.size())
return list[id].word ;
@@ -99,12 +112,12 @@ class vcbList{
return list[id].word ;
else return 0 ;
}
- void printVocabList(ostream& of){
- for (WordIndex i = 1 ; i < list.size() ; i++){
+ void printVocabList(ostream& of) {
+ for (WordIndex i = 1 ; i < list.size() ; i++) {
if (list[i].word != "" && list[i].freq > 0)
- of << i << ' ' << list[i].word << ' ' << list[i].freq << '\n';
+ of << i << ' ' << list[i].word << ' ' << list[i].freq << '\n';
}
}
-
+
};
-#endif
+#endif
diff --git a/mgizapp/w32/benchtest.h b/mgizapp/w32/benchtest.h
index 8090578..e927009 100644
--- a/mgizapp/w32/benchtest.h
+++ b/mgizapp/w32/benchtest.h
@@ -6,25 +6,25 @@
* Pthreads-win32 - POSIX Threads Library for Win32
* Copyright(C) 1998 John E. Bossom
* Copyright(C) 1999,2005 Pthreads-win32 contributors
- *
+ *
* Contact Email: rpj@callisto.canberra.edu.au
- *
+ *
* The current list of contributors is contained
* in the file CONTRIBUTORS included with the source
* code distribution. The list can also be seen at the
* following World Wide Web location:
* http://sources.redhat.com/pthreads-win32/contributors.html
- *
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library in the file COPYING.LIB;
* if not, write to the Free Software Foundation, Inc.,
diff --git a/mgizapp/w32/implement.h b/mgizapp/w32/implement.h
index 3d96483..f20c762 100644
--- a/mgizapp/w32/implement.h
+++ b/mgizapp/w32/implement.h
@@ -10,25 +10,25 @@
* Pthreads-win32 - POSIX Threads Library for Win32
* Copyright(C) 1998 John E. Bossom
* Copyright(C) 1999,2005 Pthreads-win32 contributors
- *
+ *
* Contact Email: rpj@callisto.canberra.edu.au
- *
+ *
* The current list of contributors is contained
* in the file CONTRIBUTORS included with the source
* code distribution. The list can also be seen at the
* following World Wide Web location:
* http://sources.redhat.com/pthreads-win32/contributors.html
- *
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library in the file COPYING.LIB;
* if not, write to the Free Software Foundation, Inc.,
@@ -98,8 +98,7 @@ typedef VOID (APIENTRY *PAPCFUNC)(DWORD dwParam);
#define int64_t _int64
#endif
-typedef enum
-{
+typedef enum {
/*
* This enumeration represents the state of the thread;
* The thread is still "alive" if the numeric value of the
@@ -122,8 +121,7 @@ PThreadState;
typedef struct ptw32_thread_t_ ptw32_thread_t;
-struct ptw32_thread_t_
-{
+struct ptw32_thread_t_ {
#ifdef _UWIN
DWORD dummy[5];
#endif
@@ -154,13 +152,12 @@ struct ptw32_thread_t_
};
-/*
+/*
* Special value to mark attribute objects as valid.
*/
#define PTW32_ATTR_VALID ((unsigned long) 0xC4C0FFEE)
-struct pthread_attr_t_
-{
+struct pthread_attr_t_ {
unsigned long valid;
void *stackaddr;
size_t stacksize;
@@ -182,8 +179,7 @@ struct pthread_attr_t_
* ====================
*/
-struct sem_t_
-{
+struct sem_t_ {
int value;
pthread_mutex_t lock;
HANDLE sem;
@@ -195,8 +191,7 @@ struct sem_t_
#define PTW32_OBJECT_AUTO_INIT ((void *) -1)
#define PTW32_OBJECT_INVALID NULL
-struct pthread_mutex_t_
-{
+struct pthread_mutex_t_ {
LONG lock_idx; /* Provides exclusive access to mutex state
via the Interlocked* mechanism.
0: unlocked/free.
@@ -212,8 +207,7 @@ struct pthread_mutex_t_
threads. */
};
-struct pthread_mutexattr_t_
-{
+struct pthread_mutexattr_t_ {
int pshared;
int kind;
};
@@ -239,18 +233,15 @@ struct pthread_mutexattr_t_
#define PTW32_SPIN_LOCKED (2)
#define PTW32_SPIN_USE_MUTEX (3)
-struct pthread_spinlock_t_
-{
+struct pthread_spinlock_t_ {
long interlock; /* Locking element for multi-cpus. */
- union
- {
+ union {
int cpus; /* No. of cpus if multi cpus, or */
pthread_mutex_t mutex; /* mutex if single cpu. */
} u;
};
-struct pthread_barrier_t_
-{
+struct pthread_barrier_t_ {
unsigned int nCurrentBarrierHeight;
unsigned int nInitialBarrierHeight;
int iStep;
@@ -258,13 +249,11 @@ struct pthread_barrier_t_
sem_t semBarrierBreeched[2];
};
-struct pthread_barrierattr_t_
-{
+struct pthread_barrierattr_t_ {
int pshared;
};
-struct pthread_key_t_
-{
+struct pthread_key_t_ {
DWORD key;
void (*destructor) (void *);
pthread_mutex_t keyLock;
@@ -275,16 +264,14 @@ struct pthread_key_t_
typedef struct ThreadParms ThreadParms;
typedef struct ThreadKeyAssoc ThreadKeyAssoc;
-struct ThreadParms
-{
+struct ThreadParms {
pthread_t tid;
void *(*start) (void *);
void *arg;
};
-struct pthread_cond_t_
-{
+struct pthread_cond_t_ {
long nWaitersBlocked; /* Number of threads blocked */
long nWaitersGone; /* Number of threads timed out */
long nWaitersToUnblock; /* Number of threads to unblock */
@@ -301,15 +288,13 @@ struct pthread_cond_t_
};
-struct pthread_condattr_t_
-{
+struct pthread_condattr_t_ {
int pshared;
};
#define PTW32_RWLOCK_MAGIC 0xfacade2
-struct pthread_rwlock_t_
-{
+struct pthread_rwlock_t_ {
pthread_mutex_t mtxExclusiveAccess;
pthread_mutex_t mtxSharedAccessCompleted;
pthread_cond_t cndSharedAccessCompleted;
@@ -319,16 +304,14 @@ struct pthread_rwlock_t_
int nMagic;
};
-struct pthread_rwlockattr_t_
-{
+struct pthread_rwlockattr_t_ {
int pshared;
};
/*
* MCS lock queue node - see ptw32_MCS_lock.c
*/
-struct ptw32_mcs_node_t_
-{
+struct ptw32_mcs_node_t_ {
struct ptw32_mcs_node_t_ **lock; /* ptr to tail of queue */
struct ptw32_mcs_node_t_ *next; /* ptr to successor in queue */
LONG readyFlag; /* set after lock is released by
@@ -341,8 +324,7 @@ typedef struct ptw32_mcs_node_t_ ptw32_mcs_local_node_t;
typedef struct ptw32_mcs_node_t_ *ptw32_mcs_lock_t;
-struct ThreadKeyAssoc
-{
+struct ThreadKeyAssoc {
/*
* Purpose:
* This structure creates an association between a thread and a key.
@@ -423,7 +405,7 @@ struct ThreadKeyAssoc
* The pthread_key_t->threads attribute is the head of
* a chain of assoctiations that runs through the
* nextThreads link. This chain provides the 1 to many
- * relationship between a pthread_key_t and all the
+ * relationship between a pthread_key_t and all the
* PThreads that have called pthread_setspecific for
* this pthread_key_t.
*
@@ -512,8 +494,8 @@ struct ThreadKeyAssoc
/* Declared in global.c */
extern PTW32_INTERLOCKED_LONG (WINAPI *
- ptw32_interlocked_compare_exchange)
- (PTW32_INTERLOCKED_LPLONG, PTW32_INTERLOCKED_LONG, PTW32_INTERLOCKED_LONG);
+ ptw32_interlocked_compare_exchange)
+(PTW32_INTERLOCKED_LPLONG, PTW32_INTERLOCKED_LONG, PTW32_INTERLOCKED_LONG);
/* Declared in pthread_cancel.c */
extern DWORD (*ptw32_register_cancelation) (PAPCFUNC, HANDLE, DWORD);
@@ -553,13 +535,13 @@ extern "C"
{
#endif /* __cplusplus */
-/*
- * =====================
- * =====================
- * Forward Declarations
- * =====================
- * =====================
- */
+ /*
+ * =====================
+ * =====================
+ * Forward Declarations
+ * =====================
+ * =====================
+ */
int ptw32_is_attr (const pthread_attr_t * attr);
@@ -568,17 +550,17 @@ extern "C"
int ptw32_rwlock_check_need_init (pthread_rwlock_t * rwlock);
PTW32_INTERLOCKED_LONG WINAPI
- ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location,
- PTW32_INTERLOCKED_LONG value,
- PTW32_INTERLOCKED_LONG comparand);
+ ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location,
+ PTW32_INTERLOCKED_LONG value,
+ PTW32_INTERLOCKED_LONG comparand);
LONG WINAPI
- ptw32_InterlockedExchange (LPLONG location,
- LONG value);
+ ptw32_InterlockedExchange (LPLONG location,
+ LONG value);
DWORD
- ptw32_RegisterCancelation (PAPCFUNC callback,
- HANDLE threadH, DWORD callback_arg);
+ ptw32_RegisterCancelation (PAPCFUNC callback,
+ HANDLE threadH, DWORD callback_arg);
int ptw32_processInitialize (void);
@@ -605,7 +587,7 @@ extern "C"
#else
void
#endif
- ptw32_threadStart (void *vthreadParms);
+ ptw32_threadStart (void *vthreadParms);
void ptw32_callUserDestroyRoutines (pthread_t thread);
@@ -626,13 +608,13 @@ extern "C"
void ptw32_filetime_to_timespec (const FILETIME * ft, struct timespec *ts);
#endif
-/* Declared in misc.c */
+ /* Declared in misc.c */
#ifdef NEED_CALLOC
#define calloc(n, s) ptw32_calloc(n, s)
void *ptw32_calloc (size_t n, size_t s);
#endif
-/* Declared in private.c */
+ /* Declared in private.c */
void ptw32_throw (DWORD exception);
#ifdef __cplusplus
@@ -647,11 +629,11 @@ extern "C"
{
# endif
_CRTIMP unsigned long __cdecl _beginthread (void (__cdecl *) (void *),
- unsigned, void *);
+ unsigned, void *);
_CRTIMP void __cdecl _endthread (void);
_CRTIMP unsigned long __cdecl _beginthreadex (void *, unsigned,
- unsigned (__stdcall *) (void *),
- void *, unsigned, unsigned *);
+ unsigned (__stdcall *) (void *),
+ void *, unsigned, unsigned *);
_CRTIMP void __cdecl _endthreadex (unsigned);
# ifdef __cplusplus
}
@@ -684,7 +666,7 @@ extern "C"
*/
#if defined(__CYGWIN32__) || defined(__CYGWIN__) || defined(NEED_CREATETHREAD)
-/*
+/*
* Macro uses args so we can cast start_proc to LPTHREAD_START_ROUTINE
* in order to avoid warnings because of return type
*/
diff --git a/mgizapp/w32/need_errno.h b/mgizapp/w32/need_errno.h
index 2609f8d..d7c9ecd 100644
--- a/mgizapp/w32/need_errno.h
+++ b/mgizapp/w32/need_errno.h
@@ -32,7 +32,7 @@ extern "C" {
-/* Define _CRTIMP */
+ /* Define _CRTIMP */
#ifndef _CRTIMP
#ifdef _DLL
@@ -43,13 +43,13 @@ extern "C" {
#endif /* _CRTIMP */
-/* Define __cdecl for non-Microsoft compilers */
+ /* Define __cdecl for non-Microsoft compilers */
#if ( !defined(_MSC_VER) && !defined(__cdecl) )
#define __cdecl
#endif
-/* Define _CRTAPI1 (for compatibility with the NT SDK) */
+ /* Define _CRTAPI1 (for compatibility with the NT SDK) */
#ifndef _CRTAPI1
#if _MSC_VER >= 800 && _M_IX86 >= 300
@@ -60,16 +60,16 @@ extern "C" {
#endif
-/* declare reference to errno */
+ /* declare reference to errno */
#if (defined(_MT) || defined(_MD) || defined(_DLL)) && !defined(_MAC)
-_CRTIMP extern int * __cdecl _errno(void);
+ _CRTIMP extern int * __cdecl _errno(void);
#define errno (*_errno())
#else /* ndef _MT && ndef _MD && ndef _DLL */
-_CRTIMP extern int errno;
+ _CRTIMP extern int errno;
#endif /* _MT || _MD || _DLL */
-/* Error Codes */
+ /* Error Codes */
#define EPERM 1
#define ENOENT 2
@@ -105,7 +105,7 @@ _CRTIMP extern int errno;
#define ERANGE 34
#define EDEADLK 36
-/* defined differently in winsock.h on WinCE */
+ /* defined differently in winsock.h on WinCE */
#ifndef ENAMETOOLONG
#define ENAMETOOLONG 38
#endif
@@ -113,16 +113,16 @@ _CRTIMP extern int errno;
#define ENOLCK 39
#define ENOSYS 40
-/* defined differently in winsock.h on WinCE */
+ /* defined differently in winsock.h on WinCE */
#ifndef ENOTEMPTY
#define ENOTEMPTY 41
#endif
#define EILSEQ 42
-/*
- * Support EDEADLOCK for compatibiity with older MS-C versions.
- */
+ /*
+ * Support EDEADLOCK for compatibiity with older MS-C versions.
+ */
#define EDEADLOCK EDEADLK
#ifdef __cplusplus
diff --git a/mgizapp/w32/pthread.h b/mgizapp/w32/pthread.h
index d46001f..0bccb24 100644
--- a/mgizapp/w32/pthread.h
+++ b/mgizapp/w32/pthread.h
@@ -5,25 +5,25 @@
* Pthreads-win32 - POSIX Threads Library for Win32
* Copyright(C) 1998 John E. Bossom
* Copyright(C) 1999,2005 Pthreads-win32 contributors
- *
+ *
* Contact Email: rpj@callisto.canberra.edu.au
- *
+ *
* The current list of contributors is contained
* in the file CONTRIBUTORS included with the source
* code distribution. The list can also be seen at the
* following World Wide Web location:
* http://sources.redhat.com/pthreads-win32/contributors.html
- *
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library in the file COPYING.LIB;
* if not, write to the Free Software Foundation, Inc.,
@@ -309,8 +309,8 @@ enum {
#ifndef HAVE_STRUCT_TIMESPEC
#define HAVE_STRUCT_TIMESPEC 1
struct timespec {
- long tv_sec;
- long tv_nsec;
+ long tv_sec;
+ long tv_nsec;
};
#endif /* HAVE_STRUCT_TIMESPEC */
@@ -318,7 +318,7 @@ struct timespec {
#define SIG_BLOCK 0
#endif /* SIG_BLOCK */
-#ifndef SIG_UNBLOCK
+#ifndef SIG_UNBLOCK
#define SIG_UNBLOCK 1
#endif /* SIG_UNBLOCK */
@@ -331,96 +331,96 @@ extern "C"
{
#endif /* __cplusplus */
-/*
- * -------------------------------------------------------------
- *
- * POSIX 1003.1-2001 Options
- * =========================
- *
- * Options are normally set in <unistd.h>, which is not provided
- * with pthreads-win32.
- *
- * For conformance with the Single Unix Specification (version 3), all of the
- * options below are defined, and have a value of either -1 (not supported)
- * or 200112L (supported).
- *
- * These options can neither be left undefined nor have a value of 0, because
- * either indicates that sysconf(), which is not implemented, may be used at
- * runtime to check the status of the option.
- *
- * _POSIX_THREADS (== 200112L)
- * If == 200112L, you can use threads
- *
- * _POSIX_THREAD_ATTR_STACKSIZE (== 200112L)
- * If == 200112L, you can control the size of a thread's
- * stack
- * pthread_attr_getstacksize
- * pthread_attr_setstacksize
- *
- * _POSIX_THREAD_ATTR_STACKADDR (== -1)
- * If == 200112L, you can allocate and control a thread's
- * stack. If not supported, the following functions
- * will return ENOSYS, indicating they are not
- * supported:
- * pthread_attr_getstackaddr
- * pthread_attr_setstackaddr
- *
- * _POSIX_THREAD_PRIORITY_SCHEDULING (== -1)
- * If == 200112L, you can use realtime scheduling.
- * This option indicates that the behaviour of some
- * implemented functions conforms to the additional TPS
- * requirements in the standard. E.g. rwlocks favour
- * writers over readers when threads have equal priority.
- *
- * _POSIX_THREAD_PRIO_INHERIT (== -1)
- * If == 200112L, you can create priority inheritance
- * mutexes.
- * pthread_mutexattr_getprotocol +
- * pthread_mutexattr_setprotocol +
- *
- * _POSIX_THREAD_PRIO_PROTECT (== -1)
- * If == 200112L, you can create priority ceiling mutexes
- * Indicates the availability of:
- * pthread_mutex_getprioceiling
- * pthread_mutex_setprioceiling
- * pthread_mutexattr_getprioceiling
- * pthread_mutexattr_getprotocol +
- * pthread_mutexattr_setprioceiling
- * pthread_mutexattr_setprotocol +
- *
- * _POSIX_THREAD_PROCESS_SHARED (== -1)
- * If set, you can create mutexes and condition
- * variables that can be shared with another
- * process.If set, indicates the availability
- * of:
- * pthread_mutexattr_getpshared
- * pthread_mutexattr_setpshared
- * pthread_condattr_getpshared
- * pthread_condattr_setpshared
- *
- * _POSIX_THREAD_SAFE_FUNCTIONS (== 200112L)
- * If == 200112L you can use the special *_r library
- * functions that provide thread-safe behaviour
- *
- * _POSIX_READER_WRITER_LOCKS (== 200112L)
- * If == 200112L, you can use read/write locks
- *
- * _POSIX_SPIN_LOCKS (== 200112L)
- * If == 200112L, you can use spin locks
- *
- * _POSIX_BARRIERS (== 200112L)
- * If == 200112L, you can use barriers
- *
- * + These functions provide both 'inherit' and/or
- * 'protect' protocol, based upon these macro
- * settings.
- *
- * -------------------------------------------------------------
- */
-
-/*
- * POSIX Options
- */
+ /*
+ * -------------------------------------------------------------
+ *
+ * POSIX 1003.1-2001 Options
+ * =========================
+ *
+ * Options are normally set in <unistd.h>, which is not provided
+ * with pthreads-win32.
+ *
+ * For conformance with the Single Unix Specification (version 3), all of the
+ * options below are defined, and have a value of either -1 (not supported)
+ * or 200112L (supported).
+ *
+ * These options can neither be left undefined nor have a value of 0, because
+ * either indicates that sysconf(), which is not implemented, may be used at
+ * runtime to check the status of the option.
+ *
+ * _POSIX_THREADS (== 200112L)
+ * If == 200112L, you can use threads
+ *
+ * _POSIX_THREAD_ATTR_STACKSIZE (== 200112L)
+ * If == 200112L, you can control the size of a thread's
+ * stack
+ * pthread_attr_getstacksize
+ * pthread_attr_setstacksize
+ *
+ * _POSIX_THREAD_ATTR_STACKADDR (== -1)
+ * If == 200112L, you can allocate and control a thread's
+ * stack. If not supported, the following functions
+ * will return ENOSYS, indicating they are not
+ * supported:
+ * pthread_attr_getstackaddr
+ * pthread_attr_setstackaddr
+ *
+ * _POSIX_THREAD_PRIORITY_SCHEDULING (== -1)
+ * If == 200112L, you can use realtime scheduling.
+ * This option indicates that the behaviour of some
+ * implemented functions conforms to the additional TPS
+ * requirements in the standard. E.g. rwlocks favour
+ * writers over readers when threads have equal priority.
+ *
+ * _POSIX_THREAD_PRIO_INHERIT (== -1)
+ * If == 200112L, you can create priority inheritance
+ * mutexes.
+ * pthread_mutexattr_getprotocol +
+ * pthread_mutexattr_setprotocol +
+ *
+ * _POSIX_THREAD_PRIO_PROTECT (== -1)
+ * If == 200112L, you can create priority ceiling mutexes
+ * Indicates the availability of:
+ * pthread_mutex_getprioceiling
+ * pthread_mutex_setprioceiling
+ * pthread_mutexattr_getprioceiling
+ * pthread_mutexattr_getprotocol +
+ * pthread_mutexattr_setprioceiling
+ * pthread_mutexattr_setprotocol +
+ *
+ * _POSIX_THREAD_PROCESS_SHARED (== -1)
+ * If set, you can create mutexes and condition
+ * variables that can be shared with another
+ * process.If set, indicates the availability
+ * of:
+ * pthread_mutexattr_getpshared
+ * pthread_mutexattr_setpshared
+ * pthread_condattr_getpshared
+ * pthread_condattr_setpshared
+ *
+ * _POSIX_THREAD_SAFE_FUNCTIONS (== 200112L)
+ * If == 200112L you can use the special *_r library
+ * functions that provide thread-safe behaviour
+ *
+ * _POSIX_READER_WRITER_LOCKS (== 200112L)
+ * If == 200112L, you can use read/write locks
+ *
+ * _POSIX_SPIN_LOCKS (== 200112L)
+ * If == 200112L, you can use spin locks
+ *
+ * _POSIX_BARRIERS (== 200112L)
+ * If == 200112L, you can use barriers
+ *
+ * + These functions provide both 'inherit' and/or
+ * 'protect' protocol, based upon these macro
+ * settings.
+ *
+ * -------------------------------------------------------------
+ */
+
+ /*
+ * POSIX Options
+ */
#undef _POSIX_THREADS
#define _POSIX_THREADS 200112L
@@ -439,9 +439,9 @@ extern "C"
#undef _POSIX_THREAD_ATTR_STACKSIZE
#define _POSIX_THREAD_ATTR_STACKSIZE 200112L
-/*
- * The following options are not supported
- */
+ /*
+ * The following options are not supported
+ */
#undef _POSIX_THREAD_ATTR_STACKADDR
#define _POSIX_THREAD_ATTR_STACKADDR -1
@@ -451,7 +451,7 @@ extern "C"
#undef _POSIX_THREAD_PRIO_PROTECT
#define _POSIX_THREAD_PRIO_PROTECT -1
-/* TPS is not fully supported. */
+ /* TPS is not fully supported. */
#undef _POSIX_THREAD_PRIORITY_SCHEDULING
#define _POSIX_THREAD_PRIORITY_SCHEDULING -1
@@ -459,38 +459,38 @@ extern "C"
#define _POSIX_THREAD_PROCESS_SHARED -1
-/*
- * POSIX 1003.1-2001 Limits
- * ===========================
- *
- * These limits are normally set in <limits.h>, which is not provided with
- * pthreads-win32.
- *
- * PTHREAD_DESTRUCTOR_ITERATIONS
- * Maximum number of attempts to destroy
- * a thread's thread-specific data on
- * termination (must be at least 4)
- *
- * PTHREAD_KEYS_MAX
- * Maximum number of thread-specific data keys
- * available per process (must be at least 128)
- *
- * PTHREAD_STACK_MIN
- * Minimum supported stack size for a thread
- *
- * PTHREAD_THREADS_MAX
- * Maximum number of threads supported per
- * process (must be at least 64).
- *
- * SEM_NSEMS_MAX
- * The maximum number of semaphores a process can have.
- * (must be at least 256)
- *
- * SEM_VALUE_MAX
- * The maximum value a semaphore can have.
- * (must be at least 32767)
- *
- */
+ /*
+ * POSIX 1003.1-2001 Limits
+ * ===========================
+ *
+ * These limits are normally set in <limits.h>, which is not provided with
+ * pthreads-win32.
+ *
+ * PTHREAD_DESTRUCTOR_ITERATIONS
+ * Maximum number of attempts to destroy
+ * a thread's thread-specific data on
+ * termination (must be at least 4)
+ *
+ * PTHREAD_KEYS_MAX
+ * Maximum number of thread-specific data keys
+ * available per process (must be at least 128)
+ *
+ * PTHREAD_STACK_MIN
+ * Minimum supported stack size for a thread
+ *
+ * PTHREAD_THREADS_MAX
+ * Maximum number of threads supported per
+ * process (must be at least 64).
+ *
+ * SEM_NSEMS_MAX
+ * The maximum number of semaphores a process can have.
+ * (must be at least 256)
+ *
+ * SEM_VALUE_MAX
+ * The maximum value a semaphore can have.
+ * (must be at least 32767)
+ *
+ */
#undef _POSIX_THREAD_DESTRUCTOR_ITERATIONS
#define _POSIX_THREAD_DESTRUCTOR_ITERATIONS 4
@@ -531,12 +531,12 @@ extern "C"
# error Please upgrade your GNU compiler to one that supports __declspec.
#endif
-/*
- * When building the DLL code, you should define PTW32_BUILD so that
- * the variables/functions are exported correctly. When using the DLL,
- * do NOT define PTW32_BUILD, and then the variables/functions will
- * be imported correctly.
- */
+ /*
+ * When building the DLL code, you should define PTW32_BUILD so that
+ * the variables/functions are exported correctly. When using the DLL,
+ * do NOT define PTW32_BUILD, and then the variables/functions will
+ * be imported correctly.
+ */
#ifndef PTW32_STATIC_LIB
# ifdef PTW32_BUILD
# define PTW32_DLLPORT __declspec (dllexport)
@@ -547,140 +547,139 @@ extern "C"
# define PTW32_DLLPORT
#endif
-/*
- * The Open Watcom C/C++ compiler uses a non-standard calling convention
- * that passes function args in registers unless __cdecl is explicitly specified
- * in exposed function prototypes.
- *
- * We force all calls to cdecl even though this could slow Watcom code down
- * slightly. If you know that the Watcom compiler will be used to build both
- * the DLL and application, then you can probably define this as a null string.
- * Remember that pthread.h (this file) is used for both the DLL and application builds.
- */
+ /*
+ * The Open Watcom C/C++ compiler uses a non-standard calling convention
+ * that passes function args in registers unless __cdecl is explicitly specified
+ * in exposed function prototypes.
+ *
+ * We force all calls to cdecl even though this could slow Watcom code down
+ * slightly. If you know that the Watcom compiler will be used to build both
+ * the DLL and application, then you can probably define this as a null string.
+ * Remember that pthread.h (this file) is used for both the DLL and application builds.
+ */
#define PTW32_CDECL __cdecl
#if defined(_UWIN) && PTW32_LEVEL >= PTW32_LEVEL_MAX
# include <sys/types.h>
#else
-/*
- * Generic handle type - intended to extend uniqueness beyond
- * that available with a simple pointer. It should scale for either
- * IA-32 or IA-64.
- */
-typedef struct {
+ /*
+ * Generic handle type - intended to extend uniqueness beyond
+ * that available with a simple pointer. It should scale for either
+ * IA-32 or IA-64.
+ */
+ typedef struct {
void * p; /* Pointer to actual object */
unsigned int x; /* Extra information - reuse count etc */
-} ptw32_handle_t;
-
-typedef ptw32_handle_t pthread_t;
-typedef struct pthread_attr_t_ * pthread_attr_t;
-typedef struct pthread_once_t_ pthread_once_t;
-typedef struct pthread_key_t_ * pthread_key_t;
-typedef struct pthread_mutex_t_ * pthread_mutex_t;
-typedef struct pthread_mutexattr_t_ * pthread_mutexattr_t;
-typedef struct pthread_cond_t_ * pthread_cond_t;
-typedef struct pthread_condattr_t_ * pthread_condattr_t;
+ } ptw32_handle_t;
+
+ typedef ptw32_handle_t pthread_t;
+ typedef struct pthread_attr_t_ * pthread_attr_t;
+ typedef struct pthread_once_t_ pthread_once_t;
+ typedef struct pthread_key_t_ * pthread_key_t;
+ typedef struct pthread_mutex_t_ * pthread_mutex_t;
+ typedef struct pthread_mutexattr_t_ * pthread_mutexattr_t;
+ typedef struct pthread_cond_t_ * pthread_cond_t;
+ typedef struct pthread_condattr_t_ * pthread_condattr_t;
#endif
-typedef struct pthread_rwlock_t_ * pthread_rwlock_t;
-typedef struct pthread_rwlockattr_t_ * pthread_rwlockattr_t;
-typedef struct pthread_spinlock_t_ * pthread_spinlock_t;
-typedef struct pthread_barrier_t_ * pthread_barrier_t;
-typedef struct pthread_barrierattr_t_ * pthread_barrierattr_t;
-
-/*
- * ====================
- * ====================
- * POSIX Threads
- * ====================
- * ====================
- */
-
-enum {
-/*
- * pthread_attr_{get,set}detachstate
- */
- PTHREAD_CREATE_JOINABLE = 0, /* Default */
- PTHREAD_CREATE_DETACHED = 1,
-
-/*
- * pthread_attr_{get,set}inheritsched
- */
- PTHREAD_INHERIT_SCHED = 0,
- PTHREAD_EXPLICIT_SCHED = 1, /* Default */
-
-/*
- * pthread_{get,set}scope
- */
- PTHREAD_SCOPE_PROCESS = 0,
- PTHREAD_SCOPE_SYSTEM = 1, /* Default */
-
-/*
- * pthread_setcancelstate paramters
- */
- PTHREAD_CANCEL_ENABLE = 0, /* Default */
- PTHREAD_CANCEL_DISABLE = 1,
-
-/*
- * pthread_setcanceltype parameters
- */
- PTHREAD_CANCEL_ASYNCHRONOUS = 0,
- PTHREAD_CANCEL_DEFERRED = 1, /* Default */
-
-/*
- * pthread_mutexattr_{get,set}pshared
- * pthread_condattr_{get,set}pshared
- */
- PTHREAD_PROCESS_PRIVATE = 0,
- PTHREAD_PROCESS_SHARED = 1,
-
-/*
- * pthread_barrier_wait
- */
- PTHREAD_BARRIER_SERIAL_THREAD = -1
-};
-
-/*
- * ====================
- * ====================
- * Cancelation
- * ====================
- * ====================
- */
+ typedef struct pthread_rwlock_t_ * pthread_rwlock_t;
+ typedef struct pthread_rwlockattr_t_ * pthread_rwlockattr_t;
+ typedef struct pthread_spinlock_t_ * pthread_spinlock_t;
+ typedef struct pthread_barrier_t_ * pthread_barrier_t;
+ typedef struct pthread_barrierattr_t_ * pthread_barrierattr_t;
+
+ /*
+ * ====================
+ * ====================
+ * POSIX Threads
+ * ====================
+ * ====================
+ */
+
+ enum {
+ /*
+ * pthread_attr_{get,set}detachstate
+ */
+ PTHREAD_CREATE_JOINABLE = 0, /* Default */
+ PTHREAD_CREATE_DETACHED = 1,
+
+ /*
+ * pthread_attr_{get,set}inheritsched
+ */
+ PTHREAD_INHERIT_SCHED = 0,
+ PTHREAD_EXPLICIT_SCHED = 1, /* Default */
+
+ /*
+ * pthread_{get,set}scope
+ */
+ PTHREAD_SCOPE_PROCESS = 0,
+ PTHREAD_SCOPE_SYSTEM = 1, /* Default */
+
+ /*
+ * pthread_setcancelstate paramters
+ */
+ PTHREAD_CANCEL_ENABLE = 0, /* Default */
+ PTHREAD_CANCEL_DISABLE = 1,
+
+ /*
+ * pthread_setcanceltype parameters
+ */
+ PTHREAD_CANCEL_ASYNCHRONOUS = 0,
+ PTHREAD_CANCEL_DEFERRED = 1, /* Default */
+
+ /*
+ * pthread_mutexattr_{get,set}pshared
+ * pthread_condattr_{get,set}pshared
+ */
+ PTHREAD_PROCESS_PRIVATE = 0,
+ PTHREAD_PROCESS_SHARED = 1,
+
+ /*
+ * pthread_barrier_wait
+ */
+ PTHREAD_BARRIER_SERIAL_THREAD = -1
+ };
+
+ /*
+ * ====================
+ * ====================
+ * Cancelation
+ * ====================
+ * ====================
+ */
#define PTHREAD_CANCELED ((void *) -1)
-/*
- * ====================
- * ====================
- * Once Key
- * ====================
- * ====================
- */
+ /*
+ * ====================
+ * ====================
+ * Once Key
+ * ====================
+ * ====================
+ */
#define PTHREAD_ONCE_INIT { PTW32_FALSE, 0, 0, 0}
-struct pthread_once_t_
-{
- int done; /* indicates if user function has been executed */
- void * lock;
- int reserved1;
- int reserved2;
-};
-
-
-/*
- * ====================
- * ====================
- * Object initialisers
- * ====================
- * ====================
- */
+ struct pthread_once_t_ {
+ int done; /* indicates if user function has been executed */
+ void * lock;
+ int reserved1;
+ int reserved2;
+ };
+
+
+ /*
+ * ====================
+ * ====================
+ * Object initialisers
+ * ====================
+ * ====================
+ */
#define PTHREAD_MUTEX_INITIALIZER ((pthread_mutex_t) -1)
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER ((pthread_mutex_t) -2)
#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER ((pthread_mutex_t) -3)
-/*
- * Compatibility with LinuxThreads
- */
+ /*
+ * Compatibility with LinuxThreads
+ */
#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP PTHREAD_RECURSIVE_MUTEX_INITIALIZER
#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP PTHREAD_ERRORCHECK_MUTEX_INITIALIZER
@@ -691,49 +690,47 @@ struct pthread_once_t_
#define PTHREAD_SPINLOCK_INITIALIZER ((pthread_spinlock_t) -1)
-/*
- * Mutex types.
- */
-enum
-{
- /* Compatibility with LinuxThreads */
- PTHREAD_MUTEX_FAST_NP,
- PTHREAD_MUTEX_RECURSIVE_NP,
- PTHREAD_MUTEX_ERRORCHECK_NP,
- PTHREAD_MUTEX_TIMED_NP = PTHREAD_MUTEX_FAST_NP,
- PTHREAD_MUTEX_ADAPTIVE_NP = PTHREAD_MUTEX_FAST_NP,
- /* For compatibility with POSIX */
- PTHREAD_MUTEX_NORMAL = PTHREAD_MUTEX_FAST_NP,
- PTHREAD_MUTEX_RECURSIVE = PTHREAD_MUTEX_RECURSIVE_NP,
- PTHREAD_MUTEX_ERRORCHECK = PTHREAD_MUTEX_ERRORCHECK_NP,
- PTHREAD_MUTEX_DEFAULT = PTHREAD_MUTEX_NORMAL
-};
+ /*
+ * Mutex types.
+ */
+ enum {
+ /* Compatibility with LinuxThreads */
+ PTHREAD_MUTEX_FAST_NP,
+ PTHREAD_MUTEX_RECURSIVE_NP,
+ PTHREAD_MUTEX_ERRORCHECK_NP,
+ PTHREAD_MUTEX_TIMED_NP = PTHREAD_MUTEX_FAST_NP,
+ PTHREAD_MUTEX_ADAPTIVE_NP = PTHREAD_MUTEX_FAST_NP,
+ /* For compatibility with POSIX */
+ PTHREAD_MUTEX_NORMAL = PTHREAD_MUTEX_FAST_NP,
+ PTHREAD_MUTEX_RECURSIVE = PTHREAD_MUTEX_RECURSIVE_NP,
+ PTHREAD_MUTEX_ERRORCHECK = PTHREAD_MUTEX_ERRORCHECK_NP,
+ PTHREAD_MUTEX_DEFAULT = PTHREAD_MUTEX_NORMAL
+ };
-typedef struct ptw32_cleanup_t ptw32_cleanup_t;
+ typedef struct ptw32_cleanup_t ptw32_cleanup_t;
#if defined(_MSC_VER)
-/* Disable MSVC 'anachronism used' warning */
+ /* Disable MSVC 'anachronism used' warning */
#pragma warning( disable : 4229 )
#endif
-typedef void (* PTW32_CDECL ptw32_cleanup_callback_t)(void *);
+ typedef void (* PTW32_CDECL ptw32_cleanup_callback_t)(void *);
#if defined(_MSC_VER)
#pragma warning( default : 4229 )
#endif
-struct ptw32_cleanup_t
-{
- ptw32_cleanup_callback_t routine;
- void *arg;
- struct ptw32_cleanup_t *prev;
-};
+ struct ptw32_cleanup_t {
+ ptw32_cleanup_callback_t routine;
+ void *arg;
+ struct ptw32_cleanup_t *prev;
+ };
#ifdef __CLEANUP_SEH
- /*
- * WIN32 SEH version of cancel cleanup.
- */
+ /*
+ * WIN32 SEH version of cancel cleanup.
+ */
#define pthread_cleanup_push( _rout, _arg ) \
{ \
@@ -759,9 +756,9 @@ struct ptw32_cleanup_t
#ifdef __CLEANUP_C
- /*
- * C implementation of PThreads cancel cleanup
- */
+ /*
+ * C implementation of PThreads cancel cleanup
+ */
#define pthread_cleanup_push( _rout, _arg ) \
{ \
@@ -777,71 +774,69 @@ struct ptw32_cleanup_t
#ifdef __CLEANUP_CXX
- /*
- * C++ version of cancel cleanup.
- * - John E. Bossom.
- */
-
- class PThreadCleanup {
- /*
- * PThreadCleanup
- *
- * Purpose
- * This class is a C++ helper class that is
- * used to implement pthread_cleanup_push/
- * pthread_cleanup_pop.
- * The destructor of this class automatically
- * pops the pushed cleanup routine regardless
- * of how the code exits the scope
- * (i.e. such as by an exception)
- */
- ptw32_cleanup_callback_t cleanUpRout;
- void * obj;
- int executeIt;
-
- public:
- PThreadCleanup() :
- cleanUpRout( 0 ),
- obj( 0 ),
- executeIt( 0 )
- /*
- * No cleanup performed
- */
- {
- }
-
- PThreadCleanup(
- ptw32_cleanup_callback_t routine,
- void * arg ) :
- cleanUpRout( routine ),
- obj( arg ),
- executeIt( 1 )
- /*
- * Registers a cleanup routine for 'arg'
- */
- {
- }
-
- ~PThreadCleanup()
- {
- if ( executeIt && ((void *) cleanUpRout != (void *) 0) )
- {
- (void) (*cleanUpRout)( obj );
- }
- }
-
- void execute( int exec )
- {
- executeIt = exec;
- }
- };
-
- /*
- * C++ implementation of PThreads cancel cleanup;
- * This implementation takes advantage of a helper
- * class who's destructor automatically calls the
- * cleanup routine if we exit our scope weirdly
- */
+ /*
+ * C++ version of cancel cleanup.
+ * - John E. Bossom.
+ */
+
+ class PThreadCleanup
+ {
+ /*
+ * PThreadCleanup
+ *
+ * Purpose
+ * This class is a C++ helper class that is
+ * used to implement pthread_cleanup_push/
+ * pthread_cleanup_pop.
+ * The destructor of this class automatically
+ * pops the pushed cleanup routine regardless
+ * of how the code exits the scope
+ * (i.e. such as by an exception)
+ */
+ ptw32_cleanup_callback_t cleanUpRout;
+ void * obj;
+ int executeIt;
+
+ public:
+ PThreadCleanup() :
+ cleanUpRout( 0 ),
+ obj( 0 ),
+ executeIt( 0 )
+ /*
+ * No cleanup performed
+ */
+ {
+ }
+
+ PThreadCleanup(
+ ptw32_cleanup_callback_t routine,
+ void * arg ) :
+ cleanUpRout( routine ),
+ obj( arg ),
+ executeIt( 1 )
+ /*
+ * Registers a cleanup routine for 'arg'
+ */
+ {
+ }
+
+ ~PThreadCleanup() {
+ if ( executeIt && ((void *) cleanUpRout != (void *) 0) ) {
+ (void) (*cleanUpRout)( obj );
+ }
+ }
+
+ void execute( int exec ) {
+ executeIt = exec;
+ }
+ };
+
+ /*
+ * C++ implementation of PThreads cancel cleanup;
+ * This implementation takes advantage of a helper
+ * class who's destructor automatically calls the
+ * cleanup routine if we exit our scope weirdly
+ */
#define pthread_cleanup_push( _rout, _arg ) \
{ \
PThreadCleanup cleanup((ptw32_cleanup_callback_t)(_rout), \
@@ -861,380 +856,380 @@ struct ptw32_cleanup_t
#endif /* __CLEANUP_SEH */
-/*
- * ===============
- * ===============
- * Methods
- * ===============
- * ===============
- */
+ /*
+ * ===============
+ * ===============
+ * Methods
+ * ===============
+ * ===============
+ */
-/*
- * PThread Attribute Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_init (pthread_attr_t * attr);
+ /*
+ * PThread Attribute Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_init (pthread_attr_t * attr);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_destroy (pthread_attr_t * attr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_destroy (pthread_attr_t * attr);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getdetachstate (const pthread_attr_t * attr,
- int *detachstate);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_getdetachstate (const pthread_attr_t * attr,
+ int *detachstate);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstackaddr (const pthread_attr_t * attr,
- void **stackaddr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstackaddr (const pthread_attr_t * attr,
+ void **stackaddr);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstacksize (const pthread_attr_t * attr,
- size_t * stacksize);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstacksize (const pthread_attr_t * attr,
+ size_t * stacksize);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setdetachstate (pthread_attr_t * attr,
- int detachstate);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_setdetachstate (pthread_attr_t * attr,
+ int detachstate);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstackaddr (pthread_attr_t * attr,
- void *stackaddr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstackaddr (pthread_attr_t * attr,
+ void *stackaddr);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstacksize (pthread_attr_t * attr,
- size_t stacksize);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstacksize (pthread_attr_t * attr,
+ size_t stacksize);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedparam (const pthread_attr_t *attr,
- struct sched_param *param);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedparam (const pthread_attr_t *attr,
+ struct sched_param *param);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedparam (pthread_attr_t *attr,
- const struct sched_param *param);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedparam (pthread_attr_t *attr,
+ const struct sched_param *param);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedpolicy (pthread_attr_t *,
- int);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedpolicy (pthread_attr_t *,
+ int);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedpolicy (pthread_attr_t *,
- int *);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedpolicy (pthread_attr_t *,
+ int *);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setinheritsched(pthread_attr_t * attr,
- int inheritsched);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_setinheritsched(pthread_attr_t * attr,
+ int inheritsched);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getinheritsched(pthread_attr_t * attr,
- int * inheritsched);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_getinheritsched(pthread_attr_t * attr,
+ int * inheritsched);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_setscope (pthread_attr_t *,
- int);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_setscope (pthread_attr_t *,
+ int);
-PTW32_DLLPORT int PTW32_CDECL pthread_attr_getscope (const pthread_attr_t *,
- int *);
+ PTW32_DLLPORT int PTW32_CDECL pthread_attr_getscope (const pthread_attr_t *,
+ int *);
-/*
- * PThread Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_create (pthread_t * tid,
- const pthread_attr_t * attr,
- void *(*start) (void *),
- void *arg);
+ /*
+ * PThread Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_create (pthread_t * tid,
+ const pthread_attr_t * attr,
+ void *(*start) (void *),
+ void *arg);
-PTW32_DLLPORT int PTW32_CDECL pthread_detach (pthread_t tid);
+ PTW32_DLLPORT int PTW32_CDECL pthread_detach (pthread_t tid);
-PTW32_DLLPORT int PTW32_CDECL pthread_equal (pthread_t t1,
- pthread_t t2);
+ PTW32_DLLPORT int PTW32_CDECL pthread_equal (pthread_t t1,
+ pthread_t t2);
-PTW32_DLLPORT void PTW32_CDECL pthread_exit (void *value_ptr);
+ PTW32_DLLPORT void PTW32_CDECL pthread_exit (void *value_ptr);
-PTW32_DLLPORT int PTW32_CDECL pthread_join (pthread_t thread,
- void **value_ptr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_join (pthread_t thread,
+ void **value_ptr);
-PTW32_DLLPORT pthread_t PTW32_CDECL pthread_self (void);
+ PTW32_DLLPORT pthread_t PTW32_CDECL pthread_self (void);
-PTW32_DLLPORT int PTW32_CDECL pthread_cancel (pthread_t thread);
+ PTW32_DLLPORT int PTW32_CDECL pthread_cancel (pthread_t thread);
-PTW32_DLLPORT int PTW32_CDECL pthread_setcancelstate (int state,
- int *oldstate);
+ PTW32_DLLPORT int PTW32_CDECL pthread_setcancelstate (int state,
+ int *oldstate);
-PTW32_DLLPORT int PTW32_CDECL pthread_setcanceltype (int type,
- int *oldtype);
+ PTW32_DLLPORT int PTW32_CDECL pthread_setcanceltype (int type,
+ int *oldtype);
-PTW32_DLLPORT void PTW32_CDECL pthread_testcancel (void);
+ PTW32_DLLPORT void PTW32_CDECL pthread_testcancel (void);
-PTW32_DLLPORT int PTW32_CDECL pthread_once (pthread_once_t * once_control,
- void (*init_routine) (void));
+ PTW32_DLLPORT int PTW32_CDECL pthread_once (pthread_once_t * once_control,
+ void (*init_routine) (void));
#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-PTW32_DLLPORT ptw32_cleanup_t * PTW32_CDECL ptw32_pop_cleanup (int execute);
+ PTW32_DLLPORT ptw32_cleanup_t * PTW32_CDECL ptw32_pop_cleanup (int execute);
-PTW32_DLLPORT void PTW32_CDECL ptw32_push_cleanup (ptw32_cleanup_t * cleanup,
- void (*routine) (void *),
- void *arg);
+ PTW32_DLLPORT void PTW32_CDECL ptw32_push_cleanup (ptw32_cleanup_t * cleanup,
+ void (*routine) (void *),
+ void *arg);
#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
-/*
- * Thread Specific Data Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_key_create (pthread_key_t * key,
- void (*destructor) (void *));
+ /*
+ * Thread Specific Data Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_key_create (pthread_key_t * key,
+ void (*destructor) (void *));
-PTW32_DLLPORT int PTW32_CDECL pthread_key_delete (pthread_key_t key);
+ PTW32_DLLPORT int PTW32_CDECL pthread_key_delete (pthread_key_t key);
-PTW32_DLLPORT int PTW32_CDECL pthread_setspecific (pthread_key_t key,
- const void *value);
+ PTW32_DLLPORT int PTW32_CDECL pthread_setspecific (pthread_key_t key,
+ const void *value);
-PTW32_DLLPORT void * PTW32_CDECL pthread_getspecific (pthread_key_t key);
+ PTW32_DLLPORT void * PTW32_CDECL pthread_getspecific (pthread_key_t key);
-/*
- * Mutex Attribute Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_init (pthread_mutexattr_t * attr);
+ /*
+ * Mutex Attribute Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_init (pthread_mutexattr_t * attr);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_destroy (pthread_mutexattr_t * attr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_destroy (pthread_mutexattr_t * attr);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getpshared (const pthread_mutexattr_t
- * attr,
- int *pshared);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getpshared (const pthread_mutexattr_t
+ * attr,
+ int *pshared);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setpshared (pthread_mutexattr_t * attr,
- int pshared);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setpshared (pthread_mutexattr_t * attr,
+ int pshared);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_settype (pthread_mutexattr_t * attr, int kind);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_gettype (pthread_mutexattr_t * attr, int *kind);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_settype (pthread_mutexattr_t * attr, int kind);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_gettype (pthread_mutexattr_t * attr, int *kind);
-/*
- * Barrier Attribute Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_init (pthread_barrierattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_destroy (pthread_barrierattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_getpshared (const pthread_barrierattr_t
- * attr,
- int *pshared);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_setpshared (pthread_barrierattr_t * attr,
- int pshared);
+ /*
+ * Barrier Attribute Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_init (pthread_barrierattr_t * attr);
-/*
- * Mutex Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_init (pthread_mutex_t * mutex,
- const pthread_mutexattr_t * attr);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_destroy (pthread_mutex_t * mutex);
+ PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_destroy (pthread_barrierattr_t * attr);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_lock (pthread_mutex_t * mutex);
+ PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_getpshared (const pthread_barrierattr_t
+ * attr,
+ int *pshared);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_timedlock(pthread_mutex_t *mutex,
- const struct timespec *abstime);
+ PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_setpshared (pthread_barrierattr_t * attr,
+ int pshared);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_trylock (pthread_mutex_t * mutex);
+ /*
+ * Mutex Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutex_init (pthread_mutex_t * mutex,
+ const pthread_mutexattr_t * attr);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutex_unlock (pthread_mutex_t * mutex);
-
-/*
- * Spinlock Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_init (pthread_spinlock_t * lock, int pshared);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutex_destroy (pthread_mutex_t * mutex);
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_destroy (pthread_spinlock_t * lock);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutex_lock (pthread_mutex_t * mutex);
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_lock (pthread_spinlock_t * lock);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutex_timedlock(pthread_mutex_t *mutex,
+ const struct timespec *abstime);
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_trylock (pthread_spinlock_t * lock);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutex_trylock (pthread_mutex_t * mutex);
-PTW32_DLLPORT int PTW32_CDECL pthread_spin_unlock (pthread_spinlock_t * lock);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutex_unlock (pthread_mutex_t * mutex);
-/*
- * Barrier Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_barrier_init (pthread_barrier_t * barrier,
- const pthread_barrierattr_t * attr,
- unsigned int count);
+ /*
+ * Spinlock Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_spin_init (pthread_spinlock_t * lock, int pshared);
-PTW32_DLLPORT int PTW32_CDECL pthread_barrier_destroy (pthread_barrier_t * barrier);
+ PTW32_DLLPORT int PTW32_CDECL pthread_spin_destroy (pthread_spinlock_t * lock);
-PTW32_DLLPORT int PTW32_CDECL pthread_barrier_wait (pthread_barrier_t * barrier);
+ PTW32_DLLPORT int PTW32_CDECL pthread_spin_lock (pthread_spinlock_t * lock);
-/*
- * Condition Variable Attribute Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_condattr_init (pthread_condattr_t * attr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_spin_trylock (pthread_spinlock_t * lock);
-PTW32_DLLPORT int PTW32_CDECL pthread_condattr_destroy (pthread_condattr_t * attr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_spin_unlock (pthread_spinlock_t * lock);
-PTW32_DLLPORT int PTW32_CDECL pthread_condattr_getpshared (const pthread_condattr_t * attr,
- int *pshared);
+ /*
+ * Barrier Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_barrier_init (pthread_barrier_t * barrier,
+ const pthread_barrierattr_t * attr,
+ unsigned int count);
-PTW32_DLLPORT int PTW32_CDECL pthread_condattr_setpshared (pthread_condattr_t * attr,
- int pshared);
+ PTW32_DLLPORT int PTW32_CDECL pthread_barrier_destroy (pthread_barrier_t * barrier);
-/*
- * Condition Variable Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_init (pthread_cond_t * cond,
- const pthread_condattr_t * attr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_barrier_wait (pthread_barrier_t * barrier);
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_destroy (pthread_cond_t * cond);
+ /*
+ * Condition Variable Attribute Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_condattr_init (pthread_condattr_t * attr);
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_wait (pthread_cond_t * cond,
- pthread_mutex_t * mutex);
+ PTW32_DLLPORT int PTW32_CDECL pthread_condattr_destroy (pthread_condattr_t * attr);
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_timedwait (pthread_cond_t * cond,
- pthread_mutex_t * mutex,
- const struct timespec *abstime);
+ PTW32_DLLPORT int PTW32_CDECL pthread_condattr_getpshared (const pthread_condattr_t * attr,
+ int *pshared);
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_signal (pthread_cond_t * cond);
+ PTW32_DLLPORT int PTW32_CDECL pthread_condattr_setpshared (pthread_condattr_t * attr,
+ int pshared);
-PTW32_DLLPORT int PTW32_CDECL pthread_cond_broadcast (pthread_cond_t * cond);
+ /*
+ * Condition Variable Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_cond_init (pthread_cond_t * cond,
+ const pthread_condattr_t * attr);
-/*
- * Scheduling
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_setschedparam (pthread_t thread,
- int policy,
- const struct sched_param *param);
+ PTW32_DLLPORT int PTW32_CDECL pthread_cond_destroy (pthread_cond_t * cond);
-PTW32_DLLPORT int PTW32_CDECL pthread_getschedparam (pthread_t thread,
- int *policy,
- struct sched_param *param);
+ PTW32_DLLPORT int PTW32_CDECL pthread_cond_wait (pthread_cond_t * cond,
+ pthread_mutex_t * mutex);
-PTW32_DLLPORT int PTW32_CDECL pthread_setconcurrency (int);
-
-PTW32_DLLPORT int PTW32_CDECL pthread_getconcurrency (void);
+ PTW32_DLLPORT int PTW32_CDECL pthread_cond_timedwait (pthread_cond_t * cond,
+ pthread_mutex_t * mutex,
+ const struct timespec *abstime);
-/*
- * Read-Write Lock Functions
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_init(pthread_rwlock_t *lock,
- const pthread_rwlockattr_t *attr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_cond_signal (pthread_cond_t * cond);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_destroy(pthread_rwlock_t *lock);
+ PTW32_DLLPORT int PTW32_CDECL pthread_cond_broadcast (pthread_cond_t * cond);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_tryrdlock(pthread_rwlock_t *);
+ /*
+ * Scheduling
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_setschedparam (pthread_t thread,
+ int policy,
+ const struct sched_param *param);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_trywrlock(pthread_rwlock_t *);
+ PTW32_DLLPORT int PTW32_CDECL pthread_getschedparam (pthread_t thread,
+ int *policy,
+ struct sched_param *param);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_rdlock(pthread_rwlock_t *lock);
+ PTW32_DLLPORT int PTW32_CDECL pthread_setconcurrency (int);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedrdlock(pthread_rwlock_t *lock,
- const struct timespec *abstime);
+ PTW32_DLLPORT int PTW32_CDECL pthread_getconcurrency (void);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_wrlock(pthread_rwlock_t *lock);
+ /*
+ * Read-Write Lock Functions
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_init(pthread_rwlock_t *lock,
+ const pthread_rwlockattr_t *attr);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedwrlock(pthread_rwlock_t *lock,
- const struct timespec *abstime);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_destroy(pthread_rwlock_t *lock);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_unlock(pthread_rwlock_t *lock);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_tryrdlock(pthread_rwlock_t *);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_init (pthread_rwlockattr_t * attr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_trywrlock(pthread_rwlock_t *);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_destroy (pthread_rwlockattr_t * attr);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_rdlock(pthread_rwlock_t *lock);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_getpshared (const pthread_rwlockattr_t * attr,
- int *pshared);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedrdlock(pthread_rwlock_t *lock,
+ const struct timespec *abstime);
-PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_setpshared (pthread_rwlockattr_t * attr,
- int pshared);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_wrlock(pthread_rwlock_t *lock);
-#if PTW32_LEVEL >= PTW32_LEVEL_MAX - 1
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedwrlock(pthread_rwlock_t *lock,
+ const struct timespec *abstime);
-/*
- * Signal Functions. Should be defined in <signal.h> but MSVC and MinGW32
- * already have signal.h that don't define these.
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_kill(pthread_t thread, int sig);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_unlock(pthread_rwlock_t *lock);
-/*
- * Non-portable functions
- */
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_init (pthread_rwlockattr_t * attr);
-/*
- * Compatibility with Linux.
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setkind_np(pthread_mutexattr_t * attr,
- int kind);
-PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getkind_np(pthread_mutexattr_t * attr,
- int *kind);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_destroy (pthread_rwlockattr_t * attr);
-/*
- * Possibly supported by other POSIX threads implementations
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_delay_np (struct timespec * interval);
-PTW32_DLLPORT int PTW32_CDECL pthread_num_processors_np(void);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_getpshared (const pthread_rwlockattr_t * attr,
+ int *pshared);
-/*
- * Useful if an application wants to statically link
- * the lib rather than load the DLL at run-time.
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_attach_np(void);
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_detach_np(void);
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_attach_np(void);
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_detach_np(void);
+ PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_setpshared (pthread_rwlockattr_t * attr,
+ int pshared);
-/*
- * Features that are auto-detected at load/run time.
- */
-PTW32_DLLPORT int PTW32_CDECL pthread_win32_test_features_np(int);
-enum ptw32_features {
- PTW32_SYSTEM_INTERLOCKED_COMPARE_EXCHANGE = 0x0001, /* System provides it. */
- PTW32_ALERTABLE_ASYNC_CANCEL = 0x0002 /* Can cancel blocked threads. */
-};
+#if PTW32_LEVEL >= PTW32_LEVEL_MAX - 1
-/*
- * Register a system time change with the library.
- * Causes the library to perform various functions
- * in response to the change. Should be called whenever
- * the application's top level window receives a
- * WM_TIMECHANGE message. It can be passed directly to
- * pthread_create() as a new thread if desired.
- */
-PTW32_DLLPORT void * PTW32_CDECL pthread_timechange_handler_np(void *);
+ /*
+ * Signal Functions. Should be defined in <signal.h> but MSVC and MinGW32
+ * already have signal.h that don't define these.
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_kill(pthread_t thread, int sig);
+
+ /*
+ * Non-portable functions
+ */
+
+ /*
+ * Compatibility with Linux.
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setkind_np(pthread_mutexattr_t * attr,
+ int kind);
+ PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getkind_np(pthread_mutexattr_t * attr,
+ int *kind);
+
+ /*
+ * Possibly supported by other POSIX threads implementations
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_delay_np (struct timespec * interval);
+ PTW32_DLLPORT int PTW32_CDECL pthread_num_processors_np(void);
+
+ /*
+ * Useful if an application wants to statically link
+ * the lib rather than load the DLL at run-time.
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_attach_np(void);
+ PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_detach_np(void);
+ PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_attach_np(void);
+ PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_detach_np(void);
+
+ /*
+ * Features that are auto-detected at load/run time.
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthread_win32_test_features_np(int);
+ enum ptw32_features {
+ PTW32_SYSTEM_INTERLOCKED_COMPARE_EXCHANGE = 0x0001, /* System provides it. */
+ PTW32_ALERTABLE_ASYNC_CANCEL = 0x0002 /* Can cancel blocked threads. */
+ };
+
+ /*
+ * Register a system time change with the library.
+ * Causes the library to perform various functions
+ * in response to the change. Should be called whenever
+ * the application's top level window receives a
+ * WM_TIMECHANGE message. It can be passed directly to
+ * pthread_create() as a new thread if desired.
+ */
+ PTW32_DLLPORT void * PTW32_CDECL pthread_timechange_handler_np(void *);
#endif /*PTW32_LEVEL >= PTW32_LEVEL_MAX - 1 */
#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-/*
- * Returns the Win32 HANDLE for the POSIX thread.
- */
-PTW32_DLLPORT HANDLE PTW32_CDECL pthread_getw32threadhandle_np(pthread_t thread);
-
-
-/*
- * Protected Methods
- *
- * This function blocks until the given WIN32 handle
- * is signaled or pthread_cancel had been called.
- * This function allows the caller to hook into the
- * PThreads cancel mechanism. It is implemented using
- *
- * WaitForMultipleObjects
- *
- * on 'waitHandle' and a manually reset WIN32 Event
- * used to implement pthread_cancel. The 'timeout'
- * argument to TimedWait is simply passed to
- * WaitForMultipleObjects.
- */
-PTW32_DLLPORT int PTW32_CDECL pthreadCancelableWait (HANDLE waitHandle);
-PTW32_DLLPORT int PTW32_CDECL pthreadCancelableTimedWait (HANDLE waitHandle,
- DWORD timeout);
+ /*
+ * Returns the Win32 HANDLE for the POSIX thread.
+ */
+ PTW32_DLLPORT HANDLE PTW32_CDECL pthread_getw32threadhandle_np(pthread_t thread);
+
+
+ /*
+ * Protected Methods
+ *
+ * This function blocks until the given WIN32 handle
+ * is signaled or pthread_cancel had been called.
+ * This function allows the caller to hook into the
+ * PThreads cancel mechanism. It is implemented using
+ *
+ * WaitForMultipleObjects
+ *
+ * on 'waitHandle' and a manually reset WIN32 Event
+ * used to implement pthread_cancel. The 'timeout'
+ * argument to TimedWait is simply passed to
+ * WaitForMultipleObjects.
+ */
+ PTW32_DLLPORT int PTW32_CDECL pthreadCancelableWait (HANDLE waitHandle);
+ PTW32_DLLPORT int PTW32_CDECL pthreadCancelableTimedWait (HANDLE waitHandle,
+ DWORD timeout);
#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
-/*
- * Thread-Safe C Runtime Library Mappings.
- */
+ /*
+ * Thread-Safe C Runtime Library Mappings.
+ */
#ifndef _UWIN
# if defined(NEED_ERRNO)
- PTW32_DLLPORT int * PTW32_CDECL _errno( void );
+ PTW32_DLLPORT int * PTW32_CDECL _errno( void );
# else
# ifndef errno
# if (defined(_MT) || defined(_DLL))
- __declspec(dllimport) extern int * __cdecl _errno(void);
+ __declspec(dllimport) extern int * __cdecl _errno(void);
# define errno (*_errno())
# endif
# endif
# endif
#endif
-/*
- * WIN32 C runtime library had been made thread-safe
- * without affecting the user interface. Provide
- * mappings from the UNIX thread-safe versions to
- * the standard C runtime library calls.
- * Only provide function mappings for functions that
- * actually exist on WIN32.
- */
+ /*
+ * WIN32 C runtime library had been made thread-safe
+ * without affecting the user interface. Provide
+ * mappings from the UNIX thread-safe versions to
+ * the standard C runtime library calls.
+ * Only provide function mappings for functions that
+ * actually exist on WIN32.
+ */
#if !defined(__MINGW32__)
#define strtok_r( _s, _sep, _lasts ) \
@@ -1261,9 +1256,9 @@ PTW32_DLLPORT int PTW32_CDECL pthreadCancelableTimedWait (HANDLE waitHandle,
( _seed == _seed? rand() : rand() )
-/*
- * Some compiler environments don't define some things.
- */
+ /*
+ * Some compiler environments don't define some things.
+ */
#if defined(__BORLANDC__)
# define _ftime ftime
# define _timeb timeb
@@ -1271,22 +1266,22 @@ PTW32_DLLPORT int PTW32_CDECL pthreadCancelableTimedWait (HANDLE waitHandle,
#ifdef __cplusplus
-/*
- * Internal exceptions
- */
-class ptw32_exception {};
-class ptw32_exception_cancel : public ptw32_exception {};
-class ptw32_exception_exit : public ptw32_exception {};
+ /*
+ * Internal exceptions
+ */
+ class ptw32_exception {};
+ class ptw32_exception_cancel : public ptw32_exception {};
+ class ptw32_exception_exit : public ptw32_exception {};
#endif
#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-/* FIXME: This is only required if the library was built using SEH */
-/*
- * Get internal SEH tag
- */
-PTW32_DLLPORT DWORD PTW32_CDECL ptw32_get_exception_services_code(void);
+ /* FIXME: This is only required if the library was built using SEH */
+ /*
+ * Get internal SEH tag
+ */
+ PTW32_DLLPORT DWORD PTW32_CDECL ptw32_get_exception_services_code(void);
#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
@@ -1294,10 +1289,10 @@ PTW32_DLLPORT DWORD PTW32_CDECL ptw32_get_exception_services_code(void);
#ifdef __CLEANUP_SEH
-/*
- * Redefine the SEH __except keyword to ensure that applications
- * propagate our internal exceptions up to the library's internal handlers.
- */
+ /*
+ * Redefine the SEH __except keyword to ensure that applications
+ * propagate our internal exceptions up to the library's internal handlers.
+ */
#define __except( E ) \
__except( ( GetExceptionCode() == ptw32_get_exception_services_code() ) \
? EXCEPTION_CONTINUE_SEARCH : ( E ) )
@@ -1306,15 +1301,15 @@ PTW32_DLLPORT DWORD PTW32_CDECL ptw32_get_exception_services_code(void);
#ifdef __CLEANUP_CXX
-/*
- * Redefine the C++ catch keyword to ensure that applications
- * propagate our internal exceptions up to the library's internal handlers.
- */
+ /*
+ * Redefine the C++ catch keyword to ensure that applications
+ * propagate our internal exceptions up to the library's internal handlers.
+ */
#ifdef _MSC_VER
- /*
- * WARNING: Replace any 'catch( ... )' with 'PtW32CatchAll'
- * if you want Pthread-Win32 cancelation and pthread_exit to work.
- */
+ /*
+ * WARNING: Replace any 'catch( ... )' with 'PtW32CatchAll'
+ * if you want Pthread-Win32 cancelation and pthread_exit to work.
+ */
#ifndef PtW32NoCatchWarn
diff --git a/mgizapp/w32/sched.h b/mgizapp/w32/sched.h
index dfb8e93..8c6d519 100644
--- a/mgizapp/w32/sched.h
+++ b/mgizapp/w32/sched.h
@@ -3,7 +3,7 @@
*
* Purpose:
* Provides an implementation of POSIX realtime extensions
- * as defined in
+ * as defined in
*
* POSIX 1003.1b-1993 (POSIX.1b)
*
@@ -12,25 +12,25 @@
* Pthreads-win32 - POSIX Threads Library for Win32
* Copyright(C) 1998 John E. Bossom
* Copyright(C) 1999,2005 Pthreads-win32 contributors
- *
+ *
* Contact Email: rpj@callisto.canberra.edu.au
- *
+ *
* The current list of contributors is contained
* in the file CONTRIBUTORS included with the source
* code distribution. The list can also be seen at the
* following World Wide Web location:
* http://sources.redhat.com/pthreads-win32/contributors.html
- *
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library in the file COPYING.LIB;
* if not, write to the Free Software Foundation, Inc.,
@@ -143,26 +143,26 @@ extern "C"
{
#endif /* __cplusplus */
-PTW32_DLLPORT int __cdecl sched_yield (void);
+ PTW32_DLLPORT int __cdecl sched_yield (void);
-PTW32_DLLPORT int __cdecl sched_get_priority_min (int policy);
+ PTW32_DLLPORT int __cdecl sched_get_priority_min (int policy);
-PTW32_DLLPORT int __cdecl sched_get_priority_max (int policy);
+ PTW32_DLLPORT int __cdecl sched_get_priority_max (int policy);
-PTW32_DLLPORT int __cdecl sched_setscheduler (pid_t pid, int policy);
+ PTW32_DLLPORT int __cdecl sched_setscheduler (pid_t pid, int policy);
-PTW32_DLLPORT int __cdecl sched_getscheduler (pid_t pid);
+ PTW32_DLLPORT int __cdecl sched_getscheduler (pid_t pid);
-/*
- * Note that this macro returns ENOTSUP rather than
- * ENOSYS as might be expected. However, returning ENOSYS
- * should mean that sched_get_priority_{min,max} are
- * not implemented as well as sched_rr_get_interval.
- * This is not the case, since we just don't support
- * round-robin scheduling. Therefore I have chosen to
- * return the same value as sched_setscheduler when
- * SCHED_RR is passed to it.
- */
+ /*
+ * Note that this macro returns ENOTSUP rather than
+ * ENOSYS as might be expected. However, returning ENOSYS
+ * should mean that sched_get_priority_{min,max} are
+ * not implemented as well as sched_rr_get_interval.
+ * This is not the case, since we just don't support
+ * round-robin scheduling. Therefore I have chosen to
+ * return the same value as sched_setscheduler when
+ * SCHED_RR is passed to it.
+ */
#define sched_rr_get_interval(_pid, _interval) \
( errno = ENOTSUP, (int) -1 )
diff --git a/mgizapp/w32/semaphore.h b/mgizapp/w32/semaphore.h
index a3330a6..a79fcdb 100644
--- a/mgizapp/w32/semaphore.h
+++ b/mgizapp/w32/semaphore.h
@@ -12,25 +12,25 @@
* Pthreads-win32 - POSIX Threads Library for Win32
* Copyright(C) 1998 John E. Bossom
* Copyright(C) 1999,2005 Pthreads-win32 contributors
- *
+ *
* Contact Email: rpj@callisto.canberra.edu.au
- *
+ *
* The current list of contributors is contained
* in the file CONTRIBUTORS included with the source
* code distribution. The list can also be seen at the
* following World Wide Web location:
* http://sources.redhat.com/pthreads-win32/contributors.html
- *
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library in the file COPYING.LIB;
* if not, write to the Free Software Foundation, Inc.,
@@ -120,41 +120,41 @@ extern "C"
#endif /* __cplusplus */
#ifndef HAVE_MODE_T
-typedef unsigned int mode_t;
+ typedef unsigned int mode_t;
#endif
-typedef struct sem_t_ * sem_t;
+ typedef struct sem_t_ * sem_t;
-PTW32_DLLPORT int __cdecl sem_init (sem_t * sem,
- int pshared,
- unsigned int value);
+ PTW32_DLLPORT int __cdecl sem_init (sem_t * sem,
+ int pshared,
+ unsigned int value);
-PTW32_DLLPORT int __cdecl sem_destroy (sem_t * sem);
+ PTW32_DLLPORT int __cdecl sem_destroy (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_trywait (sem_t * sem);
+ PTW32_DLLPORT int __cdecl sem_trywait (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_wait (sem_t * sem);
+ PTW32_DLLPORT int __cdecl sem_wait (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_timedwait (sem_t * sem,
- const struct timespec * abstime);
+ PTW32_DLLPORT int __cdecl sem_timedwait (sem_t * sem,
+ const struct timespec * abstime);
-PTW32_DLLPORT int __cdecl sem_post (sem_t * sem);
+ PTW32_DLLPORT int __cdecl sem_post (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_post_multiple (sem_t * sem,
- int count);
+ PTW32_DLLPORT int __cdecl sem_post_multiple (sem_t * sem,
+ int count);
-PTW32_DLLPORT int __cdecl sem_open (const char * name,
- int oflag,
- mode_t mode,
- unsigned int value);
+ PTW32_DLLPORT int __cdecl sem_open (const char * name,
+ int oflag,
+ mode_t mode,
+ unsigned int value);
-PTW32_DLLPORT int __cdecl sem_close (sem_t * sem);
+ PTW32_DLLPORT int __cdecl sem_close (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_unlink (const char * name);
+ PTW32_DLLPORT int __cdecl sem_unlink (const char * name);
-PTW32_DLLPORT int __cdecl sem_getvalue (sem_t * sem,
- int * sval);
+ PTW32_DLLPORT int __cdecl sem_getvalue (sem_t * sem,
+ int * sval);
#ifdef __cplusplus
} /* End of extern "C" */