Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'experimental/bidirectional/src/ATables.cpp')
-rw-r--r--experimental/bidirectional/src/ATables.cpp212
1 files changed, 212 insertions, 0 deletions
diff --git a/experimental/bidirectional/src/ATables.cpp b/experimental/bidirectional/src/ATables.cpp
new file mode 100644
index 0000000..8cc4ad2
--- /dev/null
+++ b/experimental/bidirectional/src/ATables.cpp
@@ -0,0 +1,212 @@
+/*
+
+EGYPT Toolkit for Statistical Machine Translation
+Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+USA.
+
+*/
+#include "ATables.h"
+#include "Globals.h"
+#include "myassert.h"
+#include "Parameter.h"
+
+GLOBAL_PARAMETER(bool,CompactADTable,"compactadtable","1: only 3-dimensional alignment table for IBM-2 and IBM-3",PARLEV_MODELS,1);
+GLOBAL_PARAMETER(float,amodel_smooth_factor,"model23SmoothFactor","smoothing parameter for IBM-2/3 (interpolation with constant)",PARLEV_SMOOTH,0.0);
+
+template <class VALTYPE>
+void amodel<VALTYPE>::printTable(const char *filename) const{
+ // print amodel to file with the name filename (it'll be created or overwritten
+ // format : for a table :
+ // aj j l m val
+ // where aj is source word pos, j target word pos, l source sentence length,
+ // m is target sentence length.
+ //
+ //return;
+ if (is_distortion)
+ cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n';
+ else
+ cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n';
+
+ ofstream of(filename);
+ double ssum=0.0;
+ for(WordIndex l=0; l < MaxSentLength; l++){
+ for(WordIndex m=0;m<MaxSentLength;m++){
+ if( CompactADTable && l!=m )
+ continue;
+ unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
+ unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
+ if( is_distortion==0 ){
+ for(WordIndex j=1;j<=M; j++){
+ double sum=0.0;
+ for(WordIndex i=0;i<=L; i++){
+ VALTYPE x=getValue(i, j, L, M);
+ if( x>PROB_SMOOTH ){
+ of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
+ sum+=x;
+ }
+ }
+ ssum+=sum;
+ }
+ }else{
+ for(WordIndex i=0;i<=L;i++){
+ double sum=0.0;
+ for(WordIndex j=1;j<=M;j++){
+ VALTYPE x=getValue(j, i, L, M);
+ if( x>PROB_SMOOTH ){
+ of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
+ sum+=x;
+ }
+ }
+ ssum+=sum;
+ }
+ }
+ }
+ }
+}
+
+template <class VALTYPE>
+void amodel<VALTYPE>::printRealTable(const char *filename) const{
+ // print amodel to file with the name filename (it'll be created or overwritten
+ // format : for a table :
+ // aj j l m val
+ // where aj is source word pos, j target word pos, l source sentence length,
+ // m is target sentence length.
+ //
+ //return;
+ if (is_distortion)
+ cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n';
+ else
+ cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n';
+
+ ofstream of(filename);
+ for(WordIndex l=0; l < MaxSentLength; l++){
+ for(WordIndex m=0;m<MaxSentLength;m++){
+ if( CompactADTable && l!=m )
+ continue;
+ unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
+ unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
+ if( is_distortion==0 ){
+ for(WordIndex j=1;j<=M; j++){
+ for(WordIndex i=0;i<=L; i++){
+ VALTYPE x=getValue(i, j, L, M);
+ if( x>MINCOUNTINCREASE )
+ of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
+ }
+ }
+ }else{
+ for(WordIndex i=0;i<=L;i++){
+ for(WordIndex j=1;j<=M;j++){
+ VALTYPE x=getValue(j, i, L, M);
+ if( x>MINCOUNTINCREASE )
+ of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
+ }
+ }
+ }
+ }
+ }
+}
+
+extern short NoEmptyWord;
+
+template <class VALTYPE>
+bool amodel<VALTYPE>::readTable(const char *filename){
+ /* This function reads the a table from a file.
+ Each line is of the format: aj j l m val
+ where aj is the source word position, j the target word position,
+ l the source sentence length, and m the target sentence length
+
+ This function also works for a d table, where the positions
+ of aj and i are swapped. Both the a and d tables are 4 dimensional
+ hashes; this function will simply read in the four values and keep
+ them in that order when hashing the fifth value.
+ NAS, 7/11/99
+ */
+ ifstream inf(filename);
+ cout << "Reading a/d table from " << filename << "\n";
+ if(!inf){
+ cerr << "\nERROR: Cannot open " << filename<<"\n";
+ return false;
+ }
+ WordIndex w, x, l, m;
+ VALTYPE prob;
+ while(inf >> w >> x >> l >> m >> prob )
+ // the NULL word is added to the length
+ // of the sentence in the tables, but discount it when you write the tables.
+ setValue(w, x, l, m, prob);
+ return true;
+}
+
+template <class VALTYPE>
+bool amodel<VALTYPE>::readAugTable(const char *filename){
+ /* This function reads the a table from a file.
+ Each line is of the format: aj j l m val
+ where aj is the source word position, j the target word position,
+ l the source sentence length, and m the target sentence length
+
+ This function also works for a d table, where the positions
+ of aj and i are swapped. Both the a and d tables are 4 dimensional
+ hashes; this function will simply read in the four values and keep
+ them in that order when hashing the fifth value.
+ NAS, 7/11/99
+ */
+ ifstream inf(filename);
+ cout << "Reading a/d table from " << filename << "\n";
+ if(!inf){
+ cerr << "\nERROR: Cannot open " << filename<<"\n";
+ return false;
+ }
+ WordIndex w, x, l, m;
+ VALTYPE prob;
+ while(inf >> w >> x >> l >> m >> prob )
+ // the NULL word is added to the length
+ // of the sentence in the tables, but discount it when you write the tables.
+ addValue(w, x, l, m, prob);
+ return true;
+}
+
+template <class VALTYPE>
+bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am){
+ cout << "start merging " <<"\n";
+ for(WordIndex l=0; l < MaxSentLength; l++){
+ for(WordIndex m=0;m<MaxSentLength;m++){
+ if( CompactADTable && l!=m )
+ continue;
+ unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
+ unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
+ if( is_distortion==0 ){
+ for(WordIndex j=1;j<=M; j++){
+ for(WordIndex i=0;i<=L; i++){
+ VALTYPE x=am.getValue(i, j, L, M);
+ addValue(i,j,L,M,x);
+ }
+ }
+ }else{
+ for(WordIndex i=0;i<=L;i++){
+ for(WordIndex j=1;j<=M;j++){
+ VALTYPE x=am.getValue(j, i, L, M);
+ addValue(j,i,L,M,x);
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+
+template class amodel<COUNT> ;
+//template class amodel<PROB> ;