fix regression from having all 3 counts in phrase table.

author: Rico Sennrich <rico.sennrich@gmx.ch> 2012-10-18 18:09:38 +0400
committer: Rico Sennrich <rico.sennrich@gmx.ch> 2012-10-18 18:09:38 +0400
commit: 9e03090ca2f08308e167e360d318f767d9307055 (patch)
tree: 6fe3179b49d6bab0aee812396f585a1fceb714ea /contrib
parent: 5bb498c922e93493f7432392933d1a38d9512461 (diff)
1 files changed, 20 insertions, 8 deletions
diff --git a/contrib/tmcombine/tmcombine.py b/contrib/tmcombine/tmcombine.py
index 6560ad23b..d7475b34c 100755
--- a/contrib/tmcombine/tmcombine.py
+++ b/contrib/tmcombine/tmcombine.py
@@ -114,17 +114,29 @@ class Moses():
                 
                 if mode == 'counts' and not priority == 2: #priority 2 is MAP
                     try:
-                        target_count,src_count = map(float,line[-1].split())
+                        counts = map(float,line[-1].split())
+                        try:
+                            target_count,src_count,joint_count = counts
+                            joint_count_e2f = joint_count
+                            joint_count_f2e = joint_count
+                        except ValueError:
+                            # possibly old-style phrase table with 2 counts in last column, or phrase table produced by tmcombine
+                            # note: since each feature has different weight vector, we may have two different phrase pair frequencies
+                            target_count,src_count = counts
+                            i_e2f = flags['i_e2f']
+                            i_f2e = flags['i_f2e']
+                            joint_count_e2f = model_probabilities[i_e2f] * target_count
+                            joint_count_f2e = model_probabilities[i_f2e] * src_count
                     except:
-                        sys.stderr.write(str(line)+'\n')
-                        sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
-                        return
-                        
+                        sys.stderr.write(str(b" ||| ".join(line))+b'\n')
+                        sys.stderr.write('ERROR: counts are missing or misformatted. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
+                        raise
+                    
                     i_e2f = flags['i_e2f']
                     i_f2e = flags['i_f2e']
-                    model_probabilities[i_e2f] *= target_count
-                    model_probabilities[i_f2e] *= src_count
-                
+                    model_probabilities[i_e2f] = joint_count_e2f
+                    model_probabilities[i_f2e] = joint_count_f2e
+                        
                 for j,p in enumerate(model_probabilities):
                     phrase_probabilities[j][i] = p
author	Rico Sennrich <rico.sennrich@gmx.ch>	2012-10-18 18:09:38 +0400
committer	Rico Sennrich <rico.sennrich@gmx.ch>	2012-10-18 18:09:38 +0400
commit	9e03090ca2f08308e167e360d318f767d9307055 (patch)
tree	6fe3179b49d6bab0aee812396f585a1fceb714ea /contrib
parent	5bb498c922e93493f7432392933d1a38d9512461 (diff)