Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbhaddow <bhaddow@1f5c12ca-751b-0410-a591-d2e778427230>2011-10-05 00:45:47 +0400
committerbhaddow <bhaddow@1f5c12ca-751b-0410-a591-d2e778427230>2011-10-05 00:45:47 +0400
commit210f87bebddc2ca3a4cae66f365cd78126a3dff0 (patch)
treea56979269cefe98227d1f705090add97af730d38 /scripts
parent84d73700af580918a4e4ddef7c08950590e546bb (diff)
Support for lattice sampling (use -lattice-samples n)
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4296 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/training/mert-moses.pl35
1 files changed, 30 insertions, 5 deletions
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index ee0cc02d8..f7bb7b8b6 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -88,6 +88,7 @@ my $___DEV_E = undef; # required, basename of files with references
my $___DECODER = undef; # required, pathname to the decoder executable
my $___CONFIG = undef; # required, pathname to startup ini file
my $___N_BEST_LIST_SIZE = 100;
+my $___LATTICE_SAMPLES = 0;
my $queue_flags = "-hard"; # extra parameters for parallelizer
# the -l ws0ssmt was relevant only to JHU 2006 workshop
my $___JOBS = undef; # if parallel, number of jobs to use (undef or 0 -> serial)
@@ -156,6 +157,7 @@ GetOptions(
"decoder=s" => \$___DECODER,
"config=s" => \$___CONFIG,
"nbest=i" => \$___N_BEST_LIST_SIZE,
+ "lattice-samples=i" => \$___LATTICE_SAMPLES,
"queue-flags=s" => \$queue_flags,
"jobs=i" => \$___JOBS,
"decoder-flags=s" => \$___DECODER_FLAGS,
@@ -208,6 +210,7 @@ if ($usage || !defined $___DEV_F || !defined $___DEV_E || !defined $___DECODER |
Options:
--working-dir=mert-dir ... where all the files are created
--nbest=100 ... how big nbestlist to generate
+ --lattice-samples ... how many lattice samples (Chatterjee & Cancedda, emnlp 2010)
--jobs=N ... set this to anything to run moses in parallel
--mosesparallelcmd=STR ... use a different script instead of moses-parallel
--queue-flags=STRING ... anything you with to pass to qsub, eg.
@@ -606,6 +609,8 @@ my $oldallsorted = undef;
my $allsorted = undef;
my $nbest_file=undef;
+my $lsamp_file=undef; #Lattice samples
+my $orig_nbest_file=undef; # replaced if lattice sampling
while(1) {
$run++;
@@ -625,8 +630,20 @@ while(1) {
# skip running the decoder if the user wanted
if (!$skip_decoder) {
print "($run) run decoder to produce n-best lists\n";
- $nbest_file = run_decoder($featlist, $run, $need_to_normalize);
+ ($nbest_file,$lsamp_file) = run_decoder($featlist, $run, $need_to_normalize);
$need_to_normalize = 0;
+ if ($___LATTICE_SAMPLES) {
+ my $combined_file = "$nbest_file.comb";
+ safesystem("sort -k1,1n $nbest_file $lsamp_file > $combined_file") or
+ die("failed to merge nbest and lattice samples");
+ safesystem("gzip -f $nbest_file; gzip -f $lsamp_file") or
+ die "Failed to gzip nbests and lattice samples";
+ $orig_nbest_file = "$nbest_file.gz";
+ $orig_nbest_file = "$nbest_file.gz";
+ $lsamp_file = "$lsamp_file.gz";
+ $lsamp_file = "$lsamp_file.gz";
+ $nbest_file = "$combined_file";
+ }
safesystem("gzip -f $nbest_file") or die "Failed to gzip run*out";
$nbest_file = $nbest_file.".gz";
}
@@ -921,6 +938,11 @@ sub run_decoder {
my ($featlist, $run, $need_to_normalize) = @_;
my $filename_template = "run%d.best$___N_BEST_LIST_SIZE.out";
my $filename = sprintf($filename_template, $run);
+ my $lsamp_filename = undef;
+ if ($___LATTICE_SAMPLES) {
+ my $lsamp_filename_template = "run%d.lsamp$___LATTICE_SAMPLES.out";
+ $lsamp_filename = sprintf($lsamp_filename_template, $run);
+ }
# user-supplied parameters
print "params = $___DECODER_FLAGS\n";
@@ -945,19 +967,22 @@ sub run_decoder {
print "decoder_config = $decoder_config\n";
# run the decoder
- my $nBest_cmd = "-n-best-size $___N_BEST_LIST_SIZE";
my $decoder_cmd;
+ my $lsamp_cmd = "";
+ if ($___LATTICE_SAMPLES) {
+ $lsamp_cmd = " -lattice-samples $lsamp_filename $___LATTICE_SAMPLES ";
+ }
if (defined $___JOBS && $___JOBS > 0) {
- $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
+ $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" $lsamp_cmd -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
} else {
- $decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config -n-best-list $filename $___N_BEST_LIST_SIZE -input-file $___DEV_F > run$run.out";
+ $decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config $lsamp_cmd -n-best-list $filename $___N_BEST_LIST_SIZE -input-file $___DEV_F > run$run.out";
}
safesystem($decoder_cmd) or die "The decoder died. CONFIG WAS $decoder_config \n";
sanity_check_order_of_lambdas($featlist, $filename);
- return $filename;
+ return ($filename, $lsamp_filename);
}