Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEva Hasler <ehasler@saxnot.inf.ed.ac.uk>2012-04-13 18:43:01 +0400
committerEva Hasler <ehasler@saxnot.inf.ed.ac.uk>2012-04-13 18:43:01 +0400
commitbeb8096d81484249c51469c927f3b932c08613c9 (patch)
tree126d67e75318e8aaad624bf977388b4f37cf74e6 /scripts
parentb85cf551c920239295219f7d1438cc0dea57f4a5 (diff)
set up pipeline for devtest set (mira tuning)
Diffstat (limited to 'scripts')
-rw-r--r--scripts/ems/experiment.meta77
-rwxr-xr-xscripts/ems/experiment.perl50
2 files changed, 101 insertions, 26 deletions
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index c99f2dd55..f61034de7 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -444,18 +444,36 @@ tokenize-input
default-name: tuning/input.tok
pass-unless: input-tokenizer
template: $input-tokenizer < IN > OUT
+tokenize-input-devtest
+ in: raw-input-devtest
+ out: tokenized-input-devtest
+ default-name: tuning/input.devtest.tok
+ pass-unless: input-tokenizer
+ template: $input-tokenizer < IN > OUT
parse-input
in: tokenized-input
out: parsed-input
default-name: tuning/input.parsed
pass-unless: input-parser
template: $input-parser < IN > OUT
+parse-input-devtest
+ in: tokenized-input-devtest
+ out: parsed-input-devtest
+ default-name: tuning/input.devtest.parsed
+ pass-unless: input-parser
+ template: $input-parser < IN > OUT
parse-relax-input
in: parsed-input
out: parse-relaxed-input
default-name: tuning/input.parse-relaxed
pass-unless: input-parse-relaxer
template: $input-parse-relaxer < IN.$input-extension > OUT.$input-extension
+parse-relax-input-devtest
+ in: parsed-input-devtest
+ out: parse-relaxed-input-devtest
+ default-name: tuning/input.devtest.parse-relaxed
+ pass-unless: input-parse-relaxer
+ template: $input-parse-relaxer < IN.$input-extension > OUT.$input-extension
factorize-input
in: parse-relaxed-input
out: factorized-input
@@ -464,6 +482,14 @@ factorize-input
pass-unless: TRAINING:input-factors
error: can't open
error: incompatible number of words in factor
+factorize-input-devtest
+ in: parse-relaxed-input-devtest
+ out: factorized-input-devtest
+ default-name: tuning/input.devtest.factorized
+ rerun-on-change: TRAINING:input-factors
+ pass-unless: TRAINING:input-factors
+ error: can't open
+ error: incompatible number of words in factor
lowercase-input
in: factorized-input
out: cased-input
@@ -471,6 +497,13 @@ lowercase-input
pass-unless: input-lowercaser
ignore-if: input-truecaser
template: $input-lowercaser < IN > OUT
+lowercase-input-devtest
+ in: factorized-input-devtest
+ out: cased-input-devtest
+ default-name: tuning/input.devtest.lc
+ pass-unless: input-lowercaser
+ ignore-if: input-truecaser
+ template: $input-lowercaser < IN > OUT
truecase-input
in: factorized-input TRUECASER:truecase-model
out: cased-input
@@ -478,6 +511,13 @@ truecase-input
default-name: tuning/input.tc
ignore-unless: input-truecaser
template: $input-truecaser -model IN1.$input-extension < IN > OUT
+truecase-input-devtest
+ in: factorized-input-devtest TRUECASER:truecase-model
+ out: cased-input-devtest
+ rerun-on-change: input-truecaser
+ default-name: tuning/input.devtest.tc
+ ignore-unless: input-truecaser
+ template: $input-truecaser -model IN1.$input-extension < IN > OUT
split-input
in: cased-input SPLITTER:splitter-model
out: input
@@ -485,6 +525,13 @@ split-input
default-name: tuning/input.split
pass-unless: input-splitter
template: $input-splitter -model IN1.$input-extension $input-extension < IN > OUT
+split-input-devtest
+ in: cased-input-devtest SPLITTER:splitter-model
+ out: input-devtest
+ rerun-on-change: input-splitter
+ default-name: tuning/input.devtest.split
+ pass-unless: input-splitter
+ template: $input-splitter -model IN1.$input-extension $input-extension < IN > OUT
reference-from-sgm
in: reference-sgm input-sgm
out: raw-reference
@@ -497,6 +544,13 @@ tokenize-reference
pass-unless: output-tokenizer
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $output-tokenizer < IN > OUT
+tokenize-reference-devtest
+ in: raw-reference-devtest
+ out: tokenized-reference-devtest
+ default-name: tuning/reference.devtest.tok
+ pass-unless: output-tokenizer
+ multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+ template: $output-tokenizer < IN > OUT
lowercase-reference
in: tokenized-reference
out: cased-reference
@@ -505,6 +559,14 @@ lowercase-reference
ignore-if: output-truecaser
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $output-lowercaser < IN > OUT
+lowercase-reference-devtest
+ in: tokenized-reference-devtest
+ out: cased-reference-devtest
+ default-name: tuning/reference.devtest.lc
+ pass-unless: output-lowercaser
+ ignore-if: output-truecaser
+ multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+ template: $output-lowercaser < IN > OUT
truecase-reference
in: tokenized-reference TRUECASER:truecase-model
out: cased-reference
@@ -513,6 +575,14 @@ truecase-reference
ignore-unless: output-truecaser
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $output-truecaser -model IN1.$output-extension < IN > OUT
+truecase-reference-devtest
+ in: tokenized-reference-devtest TRUECASER:truecase-model
+ out: cased-reference-devtest
+ rerun-on-change: output-truecaser
+ default-name: tuning/reference.devtest.tc
+ ignore-unless: output-truecaser
+ multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+ template: $output-truecaser -model IN1.$output-extension < IN > OUT
split-reference
in: cased-reference SPLITTER:splitter-model
out: reference
@@ -520,6 +590,13 @@ split-reference
pass-unless: output-splitter
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $output-splitter -model IN1.$output-extension < IN > OUT
+split-reference-devtest
+ in: cased-reference-devtest SPLITTER:splitter-model
+ out: reference-devtest
+ default-name: tuning/reference.devtest.split
+ pass-unless: output-splitter
+ multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+ template: $output-splitter -model IN1.$output-extension < IN > OUT
filter
in: TRAINING:config input
out: filtered-config
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index ffecae43e..ef137434c 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -946,7 +946,7 @@ sub define_step {
&define_tuningevaluation_factorize($i);
}
elsif ($DO_STEP[$i] eq 'TUNING:filter') {
- &define_tuningevaluation_filter(undef,$i,"dev");
+ &define_tuningevaluation_filter(undef,$i);
}
elsif ($DO_STEP[$i] eq 'TUNING:filter-devtest') {
&define_tuningevaluation_filter(undef,$i,"devtest");
@@ -1548,26 +1548,11 @@ sub factorize_one_language {
sub define_tuning_tune {
my ($step_id) = @_;
my $dir = &check_and_get("GENERAL:working-dir");
-
- # the last variable only apply for mira tuning (devtest input and reference are read out later)
- my ($tuned_config,$config,$input,$reference,$config_devtest) = &get_output_and_input($step_id);
-
my $tuning_script = &check_and_get("TUNING:tuning-script");
- my $scripts = &check_backoff_and_get("TUNING:moses-script-dir");
- my $nbest_size = &check_and_get("TUNING:nbest");
- my $lambda = &backoff_and_get("TUNING:lambda");
- my $tune_continue = &backoff_and_get("TUNING:continue");
- my $tune_inputtype = &backoff_and_get("TUNING:inputtype");
- my $jobs = &backoff_and_get("TUNING:jobs");
- my $decoder = &check_backoff_and_get("TUNING:decoder");
-
- my $decoder_settings = &backoff_and_get("TUNING:decoder-settings");
- $decoder_settings = "" unless $decoder_settings;
- $decoder_settings .= " -v 0 " unless $CLUSTER && $jobs;
-
- my $tuning_settings = &backoff_and_get("TUNING:tuning-settings");
- $tuning_settings = "" unless $tuning_settings;
-
+
+ # the last 3 variables are only used for mira tuning
+ my ($tuned_config,$config,$input,$reference,$config_devtest,$input_devtest,$reference_devtest) = &get_output_and_input($step_id);
+
my $use_mira = &backoff_and_get("TUNING:use-mira");
my $cmd = "";
if ($use_mira && $use_mira eq "true") {
@@ -1578,7 +1563,7 @@ sub define_tuning_tune {
my $mira_config_log = $mira_config."log";
$mira_config .= "cfg";
- write_mira_config($mira_config, $experiment_dir, $config, $config_devtest);
+ write_mira_config($mira_config,$experiment_dir,$config,$input,$reference,$config_devtest,$input_devtest,$reference_devtest);
$cmd = "$tuning_script -config $mira_config -exec >& $mira_config_log";
# write script to select the best set of weights after training for the specified number of epochs -->
@@ -1591,6 +1576,22 @@ sub define_tuning_tune {
$cmd .= "\n$script_filename >& $script_filename_log";
}
else {
+
+ my $scripts = &check_backoff_and_get("TUNING:moses-script-dir");
+ my $nbest_size = &check_and_get("TUNING:nbest");
+ my $lambda = &backoff_and_get("TUNING:lambda");
+ my $tune_continue = &backoff_and_get("TUNING:continue");
+ my $tune_inputtype = &backoff_and_get("TUNING:inputtype");
+ my $jobs = &backoff_and_get("TUNING:jobs");
+ my $decoder = &check_backoff_and_get("TUNING:decoder");
+
+ my $decoder_settings = &backoff_and_get("TUNING:decoder-settings");
+ $decoder_settings = "" unless $decoder_settings;
+ $decoder_settings .= " -v 0 " unless $CLUSTER && $jobs;
+
+ my $tuning_settings = &backoff_and_get("TUNING:tuning-settings");
+ $tuning_settings = "" unless $tuning_settings;
+
$cmd = "$tuning_script $input $reference $decoder $config --nbest $nbest_size --working-dir $dir/tuning/tmp.$VERSION --decoder-flags \"$decoder_settings\" --rootdir $scripts $tuning_settings --no-filter-phrase-table";
$cmd .= " --lambdas \"$lambda\"" if $lambda;
$cmd .= " --continue" if $tune_continue;
@@ -1610,18 +1611,14 @@ sub define_tuning_tune {
}
sub write_mira_config {
- my ($config_filename, $expt_dir, $tune_filtered_ini, $devtest_filtered_ini) = @_;
+ my ($config_filename,$expt_dir,$tune_filtered_ini,$input,$reference,$devtest_filtered_ini,$input_devtest,$reference_devtest) = @_;
my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir");
my $tuning_decoder_settings = &check_and_get("TUNING:decoder-settings");
my $core_weights = &backoff_and_get("TUNING:core-weight-config");
- my $input = &check_and_get("TUNING:input");
- my $reference = &check_and_get("TUNING:reference");
my $tuning_settings = &check_and_get("TUNING:tuning-settings");
my @settings = split(/ /, $tuning_settings);
my $mira_tuning_settings = &check_and_get("TUNING:mira-tuning-settings");
- my $input_devtest = &check_and_get("TUNING:input-devtest");
- my $reference_devtest = &check_and_get("TUNING:reference-devtest");
# convert core weights into format expected by mira
my $core_file = "$expt_dir/core_weights";
@@ -2319,6 +2316,7 @@ sub define_tuningevaluation_filter {
$input_filter = &get("EVALUATION:$set:input-filter") unless $tuning_flag;
$input_filter = &get("TUNING:input-filter") if $tuning_flag;
$input_filter = $input unless $input_filter;
+ print STDERR "$type: input-filter: $input_filter\n";
my $filter_dir;
if ($type) {