diff options
author | Rico Sennrich <rico.sennrich@gmx.ch> | 2012-10-18 18:09:38 +0400 |
---|---|---|
committer | Rico Sennrich <rico.sennrich@gmx.ch> | 2012-10-18 18:09:38 +0400 |
commit | 9e03090ca2f08308e167e360d318f767d9307055 (patch) | |
tree | 6fe3179b49d6bab0aee812396f585a1fceb714ea /contrib | |
parent | 5bb498c922e93493f7432392933d1a38d9512461 (diff) |
fix regression from having all 3 counts in phrase table.
Diffstat (limited to 'contrib')
-rwxr-xr-x | contrib/tmcombine/tmcombine.py | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/contrib/tmcombine/tmcombine.py b/contrib/tmcombine/tmcombine.py index 6560ad23b..d7475b34c 100755 --- a/contrib/tmcombine/tmcombine.py +++ b/contrib/tmcombine/tmcombine.py @@ -114,17 +114,29 @@ class Moses(): if mode == 'counts' and not priority == 2: #priority 2 is MAP try: - target_count,src_count = map(float,line[-1].split()) + counts = map(float,line[-1].split()) + try: + target_count,src_count,joint_count = counts + joint_count_e2f = joint_count + joint_count_f2e = joint_count + except ValueError: + # possibly old-style phrase table with 2 counts in last column, or phrase table produced by tmcombine + # note: since each feature has different weight vector, we may have two different phrase pair frequencies + target_count,src_count = counts + i_e2f = flags['i_e2f'] + i_f2e = flags['i_f2e'] + joint_count_e2f = model_probabilities[i_e2f] * target_count + joint_count_f2e = model_probabilities[i_f2e] * src_count except: - sys.stderr.write(str(line)+'\n') - sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n') - return - + sys.stderr.write(str(b" ||| ".join(line))+b'\n') + sys.stderr.write('ERROR: counts are missing or misformatted. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n') + raise + i_e2f = flags['i_e2f'] i_f2e = flags['i_f2e'] - model_probabilities[i_e2f] *= target_count - model_probabilities[i_f2e] *= src_count - + model_probabilities[i_e2f] = joint_count_e2f + model_probabilities[i_f2e] = joint_count_f2e + for j,p in enumerate(model_probabilities): phrase_probabilities[j][i] = p |