Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRico Sennrich <rico.sennrich@gmx.ch>2012-10-18 18:09:38 +0400
committerRico Sennrich <rico.sennrich@gmx.ch>2012-10-18 18:09:38 +0400
commit9e03090ca2f08308e167e360d318f767d9307055 (patch)
tree6fe3179b49d6bab0aee812396f585a1fceb714ea /contrib
parent5bb498c922e93493f7432392933d1a38d9512461 (diff)
fix regression from having all 3 counts in phrase table.
Diffstat (limited to 'contrib')
-rwxr-xr-xcontrib/tmcombine/tmcombine.py28
1 files changed, 20 insertions, 8 deletions
diff --git a/contrib/tmcombine/tmcombine.py b/contrib/tmcombine/tmcombine.py
index 6560ad23b..d7475b34c 100755
--- a/contrib/tmcombine/tmcombine.py
+++ b/contrib/tmcombine/tmcombine.py
@@ -114,17 +114,29 @@ class Moses():
if mode == 'counts' and not priority == 2: #priority 2 is MAP
try:
- target_count,src_count = map(float,line[-1].split())
+ counts = map(float,line[-1].split())
+ try:
+ target_count,src_count,joint_count = counts
+ joint_count_e2f = joint_count
+ joint_count_f2e = joint_count
+ except ValueError:
+ # possibly old-style phrase table with 2 counts in last column, or phrase table produced by tmcombine
+ # note: since each feature has different weight vector, we may have two different phrase pair frequencies
+ target_count,src_count = counts
+ i_e2f = flags['i_e2f']
+ i_f2e = flags['i_f2e']
+ joint_count_e2f = model_probabilities[i_e2f] * target_count
+ joint_count_f2e = model_probabilities[i_f2e] * src_count
except:
- sys.stderr.write(str(line)+'\n')
- sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
- return
-
+ sys.stderr.write(str(b" ||| ".join(line))+b'\n')
+ sys.stderr.write('ERROR: counts are missing or misformatted. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
+ raise
+
i_e2f = flags['i_e2f']
i_f2e = flags['i_f2e']
- model_probabilities[i_e2f] *= target_count
- model_probabilities[i_f2e] *= src_count
-
+ model_probabilities[i_e2f] = joint_count_e2f
+ model_probabilities[i_f2e] = joint_count_f2e
+
for j,p in enumerate(model_probabilities):
phrase_probabilities[j][i] = p