Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Denkowski <michael.j.denkowski@gmail.com>2014-08-13 23:58:51 +0400
committerMichael Denkowski <michael.j.denkowski@gmail.com>2014-08-13 23:58:51 +0400
commit057066ea0e9c9a9400cd4bd40d204166e80dc125 (patch)
tree7b568e9ff619df766fa74345a4765286e0396eb7
parent94c44c03d5470694827243aa2f12d3a3b031fda4 (diff)
Minor fixes for simulated post-editing with mert-moses.pl
-rw-r--r--moses-cmd/simulate-pe.cc2
-rwxr-xr-xscripts/generic/moses_sim_pe.py14
-rwxr-xr-xscripts/training/mert-moses.pl15
3 files changed, 18 insertions, 13 deletions
diff --git a/moses-cmd/simulate-pe.cc b/moses-cmd/simulate-pe.cc
index e88c1e463..5384d9886 100644
--- a/moses-cmd/simulate-pe.cc
+++ b/moses-cmd/simulate-pe.cc
@@ -280,7 +280,7 @@ public:
if (file->is_complete() && file->good()) {
fix(*file,PRECISION);
- manager.OutputSearchGraphAsHypergraph(m_lineNumber, *file);
+ manager.OutputSearchGraphAsHypergraph(*file);
file -> flush();
} else {
TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber
diff --git a/scripts/generic/moses_sim_pe.py b/scripts/generic/moses_sim_pe.py
index 290711b56..e29f0333d 100755
--- a/scripts/generic/moses_sim_pe.py
+++ b/scripts/generic/moses_sim_pe.py
@@ -32,7 +32,7 @@ Usage: {} moses-cmd -config moses.ini -input-file text.src -ref text.tgt -symal
Options:
-threads N: number of decoders to run in parallel (default read from moses.ini, 1 if not present)
- -n-best-list nbest.out N: location and size of N-best list
+ -n-best-list nbest.out N [distinct]: location and size of N-best list
-show-weights: for mert-moses.pl, just call moses and exit
-tmp: location of temp directory (default /tmp)
@@ -110,6 +110,7 @@ def main(argv):
threads = 1
n_best_out = None
n_best_size = None
+ n_best_distinct = False
tmp_dir = '/tmp'
xml_found = False
xml_input = 'exclusive'
@@ -143,7 +144,12 @@ def main(argv):
elif cmd[i] == '-n-best-list':
n_best_out = cmd[i + 1]
n_best_size = cmd[i + 2]
- cmd = cmd[:i] + cmd[i + 3:]
+ # Optional "distinct"
+ if i + 3 < len(cmd) and cmd[i + 3] == 'distinct':
+ n_best_distinct = True
+ cmd = cmd[:i] + cmd[i + 4:]
+ else:
+ cmd = cmd[:i] + cmd[i + 3:]
elif cmd[i] == '-tmp':
tmp_dir = cmd[i + 1]
cmd = cmd[:i] + cmd[i + 2:]
@@ -231,7 +237,7 @@ def main(argv):
sys.stderr.write('Jobs: {}\n'.format(threads))
sys.stderr.write('Batch size: {}\n'.format(batch_size))
if n_best_out:
- sys.stderr.write('N-best list: {} ({})\n'.format(n_best_out, n_best_size))
+ sys.stderr.write('N-best list: {} ({}{})\n'.format(n_best_out, n_best_size, ', distinct' if n_best_distinct else ''))
sys.stderr.write('Temp dir: {}\n'.format(work_dir))
# Accumulate seen lines
@@ -289,6 +295,8 @@ def main(argv):
work_cmd.append('-n-best-list')
work_cmd.append(os.path.join(work_dir, 'nbest.{}'.format(i)))
work_cmd.append(str(n_best_size))
+ if n_best_distinct:
+ work_cmd.append('distinct')
in_file = os.path.join(work_dir, 'input.{}.xml'.format(i))
out_file = os.path.join(work_dir, 'out.{}'.format(i))
err_file = os.path.join(work_dir, 'err.{}'.format(i))
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index a4b20f3c7..027d94a77 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -477,9 +477,9 @@ if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
}
# Paths needed for simulated post-editing
-if ($___DEV_SYMAL) {
+$working_dir_abs = ensure_full_path($___WORKING_DIR);
+if (defined $___DEV_SYMAL) {
$dev_symal_abs = ensure_full_path($___DEV_SYMAL);
- $working_dir_abs = ensure_full_path($___WORKING_DIR);
}
# as weights are normalized in the next steps (by cmert)
@@ -1254,16 +1254,13 @@ sub run_decoder {
}
$decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG";
$decoder_cmd .= " -inputtype $___INPUTTYPE" if defined($___INPUTTYPE);
- $decoder_cmd .= " $decoder_config $lsamp_cmd $nbest_list_cmd -input-file $___DEV_F > run$run.out";
-
- # If simulating post-editing, route command through moses_sim_pe.py
+ $decoder_cmd .= " $decoder_config $lsamp_cmd $nbest_list_cmd -input-file $___DEV_F";
if (defined $___DEV_SYMAL) {
+ # If simulating post-editing, route command through moses_sim_pe.py
# Always use single (first) reference. Simulated post-editing undefined for multiple references.
- $decoder_cmd = "$___MOSES_SIM_PE $___DECODER $___DECODER_FLAGS -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config $lsamp_cmd $nbest_list_cmd -input-file $___DEV_F -ref $references[0] -symal $dev_symal_abs -tmp $working_dir_abs > run$run.out";
- } else {
- # Default: call decoder directly
- $decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config $lsamp_cmd $nbest_list_cmd -input-file $___DEV_F > run$run.out";
+ $decoder_cmd = "$___MOSES_SIM_PE $decoder_cmd -ref $references[0] -symal $dev_symal_abs -tmp $working_dir_abs > run$run.out";
}
+ $decoder_cmd .= " > run$run.out";
}
print STDERR "Executing: $decoder_cmd \n";