diff options
author | phikoehn <pkoehn@inf.ed.ac.uk> | 2013-05-12 11:16:22 +0400 |
---|---|---|
committer | phikoehn <pkoehn@inf.ed.ac.uk> | 2013-05-12 11:16:22 +0400 |
commit | 41da5b27605523be15062f7284a6fccc2ea373af (patch) | |
tree | 797a33e75e9abe0ae24a65b069c08921682482d1 /scripts/ems | |
parent | d19a28ae211dfe3a5bedb2fd95feded9967b82ed (diff) | |
parent | 5ece9a17402a3d383b0d7545eae42af9fee83130 (diff) |
Merge branch 'master' of git://github.com/moses-smt/mosesdecoder
Diffstat (limited to 'scripts/ems')
-rw-r--r-- | scripts/ems/example/data/weight.ini | 31 | ||||
-rw-r--r-- | scripts/ems/experiment.machines | 1 | ||||
-rw-r--r-- | scripts/ems/experiment.meta | 6 | ||||
-rwxr-xr-x | scripts/ems/experiment.perl | 68 | ||||
-rwxr-xr-x | scripts/ems/support/reuse-weights.perl | 4 | ||||
-rwxr-xr-x | scripts/ems/support/substitute-filtered-tables.perl | 54 | ||||
-rwxr-xr-x | scripts/ems/support/substitute-weights.perl | 44 |
7 files changed, 119 insertions, 89 deletions
diff --git a/scripts/ems/example/data/weight.ini b/scripts/ems/example/data/weight.ini index 731de5535..e0ab92249 100644 --- a/scripts/ems/example/data/weight.ini +++ b/scripts/ems/example/data/weight.ini @@ -2,29 +2,12 @@ ### MOSES CONFIG FILE ### ######################### -# distortion (reordering) weight -[weight-d] -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 -0.3 +[weight] +Distortion0= 0.3 +UnknownWordPenalty0= 1 +WordPenalty0= -1 +TranslationModel0= 0.2 0.2 0.2 0.2 0.2 +LexicalReordering0= 0.3 0.3 0.3 0.3 0.3 0.3 +LM0= 0.5 -# language model weights -[weight-l] -0.5 - -# translation model weights -[weight-t] -0.2 -0.2 -0.2 -0.2 -0.2 - -# word penalty -[weight-w] --1 diff --git a/scripts/ems/experiment.machines b/scripts/ems/experiment.machines index 6459be2c4..de87bf08d 100644 --- a/scripts/ems/experiment.machines +++ b/scripts/ems/experiment.machines @@ -1,4 +1,5 @@ cluster: townhill seville hermes lion seville sannox lutzow frontend +multicore-4: freddie multicore-8: tyr thor odin crom multicore-16: saxnot vali vili freyja bragi hoenir multicore-24: syn hel skaol saga buri loki sif magni diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index 8381bd6d7..b5803b8cd 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -844,7 +844,7 @@ apply-filter default-name: tuning/moses.filtered.ini pass-if: TRAINING:binarize-all ignore-if: use-hiero - template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT + template: cp IN1/moses.ini OUT apply-filter-devtest in: TRAINING:config filtered-dir-devtest out: filtered-config-devtest @@ -865,7 +865,7 @@ apply-weights out: config-with-reused-weights ignore-if: use-hiero default-name: tuning/moses.tuned.ini - template: $moses-script-dir/ems/support/reuse-weights.perl IN1 < IN > OUT + template: $moses-script-dir/ems/support/substitute-weights.perl IN < IN1 > OUT error: cannot open hiero-tune in: TRAINING:hiero-config input reference @@ -955,7 +955,7 @@ apply-filter default-name: evaluation/filtered.ini pass-if: TRAINING:binarize-all ignore-if: use-hiero - template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT + template: $moses-script-dir/ems/support/substitute-weights.perl IN1/moses.ini < IN > OUT decode in: filtered-config input out: system-output diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl index d55913a3c..855d6b96c 100755 --- a/scripts/ems/experiment.perl +++ b/scripts/ems/experiment.perl @@ -1614,7 +1614,8 @@ sub define_tuning_tune { my $word_alignment = &backoff_and_get("TRAINING:include-word-alignment-in-rules"); # the last 3 variables are only used for mira tuning - my ($tuned_config,$config,$input,$reference,$config_devtest,$input_devtest,$reference_devtest) = &get_output_and_input($step_id); + my ($tuned_config,$config,$input,$reference,$config_devtest,$input_devtest,$reference_devtest, $filtered_config) = &get_output_and_input($step_id); + $config = $filtered_config if $filtered_config; my $cmd = ""; if ($use_mira) { @@ -2105,10 +2106,41 @@ sub define_training_create_config { my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM) = &get_output_and_input($step_id); - my $cmd = &get_training_setting(9); + my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir"); + my $cmd = "$moses_src_dir/bin/create-ini "; + + my %IN; + my %OUT; + if (&backoff_and_get("TRAINING:input-factors")) { + %IN = &get_factor_id("input"); + } + else { + $IN{"word"} = 0; + } + + if (&backoff_and_get("TRAINING:output-factors")) { + %OUT = &get_factor_id("output"); + } + else { + $OUT{"word"} = 0; + } + + $cmd .= "-input-factor-max ".((scalar keys %IN)-1)." "; + + $cmd .= "-translation-factors ". + &encode_factor_definition("translation-factors",\%IN,\%OUT)." " + if &get("TRAINING:translation-factors"); + $cmd .= "-reordering-factors ". + &encode_factor_definition("reordering-factors",\%IN,\%OUT)." " + if &get("TRAINING:reordering-factors"); + $cmd .= "-generation-factors ". + &encode_factor_definition("generation-factors",\%OUT,\%OUT)." " + if &get("TRAINING:generation-factors"); - # get model, and whether suffix array is used. Determines the pt implementation. + # get model, and whether suffix array is used. Determines the pt implementation. my $hierarchical = &get("TRAINING:hierarchical-rule-set"); + $cmd .= "-hierarchical " if $hierarchical; + my $sa_exec_dir = &get("TRAINING:suffix-array"); my ($ptImpl, $numFF); @@ -2576,31 +2608,8 @@ sub define_tuningevaluation_filter { else { $config = $tuning_flag ? "$dir/tuning/moses.table.ini.$VERSION" : "$dir/evaluation/$set.moses.table.ini.$VERSION"; $delete_config = 1; - $cmd = &get_training_setting(9); - $cmd .= &define_domain_feature_score_option($domains) if &get("TRAINING:domain-features"); - - my $ptCmd = $phrase_translation_table; - $ptCmd .= ":$ptImpl" if $ptImpl>0; - $ptCmd .= ":$numFF" if defined($numFF); - $cmd .= &get_table_name_settings("translation-factors","phrase-translation-table", $ptCmd); - $cmd .= &get_table_name_settings("reordering-factors","reordering-table", $reordering_table) - if $reordering_table; - # additional settings for hierarchical models - if (&get("TRAINING:hierarchical-rule-set")) { - my $extract_version = $VERSION; - $extract_version = $RE_USE[$STEP_LOOKUP{"TRAINING:extract-phrases"}] - if defined($STEP_LOOKUP{"TRAINING:extract-phrases"}); - my $glue_grammar_file = &get("TRAINING:glue-grammar"); - $glue_grammar_file = &versionize(&long_file_name("glue-grammar","model",""),$extract_version) - unless $glue_grammar_file; - $cmd .= "-glue-grammar-file $glue_grammar_file "; - } - if (&get("TRAINING:score-settings") && - &get("TRAINING:score-settings") =~ /SparseCountBinFeature/) { - $cmd .= "-sparse-translation-table "; - } - $cmd .= "-lm 0:3:$dir "; # dummy - $cmd .= "-config $config\n"; + + $cmd = "cp $dir/model/moses.ini.$VERSION $config \n"; } # filter command @@ -2630,7 +2639,8 @@ sub define_evaluation_decode { my $dir = &check_and_get("GENERAL:working-dir"); my ($system_output, - $config,$input) = &get_output_and_input($step_id); + $config,$input,$filtered_config) = &get_output_and_input($step_id); + $config = $filtered_config if $filtered_config; my $jobs = &backoff_and_get("EVALUATION:$set:jobs"); my $decoder = &check_backoff_and_get("EVALUATION:$set:decoder"); diff --git a/scripts/ems/support/reuse-weights.perl b/scripts/ems/support/reuse-weights.perl index 71ac3a0b9..0b9668a34 100755 --- a/scripts/ems/support/reuse-weights.perl +++ b/scripts/ems/support/reuse-weights.perl @@ -19,8 +19,8 @@ while(<WEIGHT>) { $weights_file_spec = "\n".$_; $weights_file_flag = 1; } - elsif (/^\[weight\-(\S+)\]/) { - $current_weight = $1; + elsif (/^\[weight]/) { + $current_weight = $1; } elsif ($current_weight && /^(([\-\d\.]+)([Ee][+-]?[\d]+)?)$/) { push @{$WEIGHT{$current_weight}},$1; diff --git a/scripts/ems/support/substitute-filtered-tables.perl b/scripts/ems/support/substitute-filtered-tables.perl index 723c140b0..530130aa8 100755 --- a/scripts/ems/support/substitute-filtered-tables.perl +++ b/scripts/ems/support/substitute-filtered-tables.perl @@ -1,4 +1,4 @@ -#!/usr/bin/perl -w +#!/usr/bin/perl -w # experiment.perl support script # get filtered rule and reordering tables and place them into a configuration file @@ -8,42 +8,34 @@ if (scalar @ARGV < 1 || ! -e $ARGV[0]) { } # read config sections about filtered tables -my %CONFIG; -my $status = 0; -my $section; +my @arr; open(FILTERED, $ARGV[0]) or die "Cannot open: $!"; -while(<FILTERED>) { - if (/^\[(.+)\]\s*$/) { - if ($1 eq "ttable-file" || $1 eq "distortion-file") { - $section = $1; - $status = 1; - print STDERR "found $section\n"; - } - else { - $status = 0; - } - } - elsif ($status) { - $CONFIG{$section} .= $_; +while(my $line = <FILTERED>) { + chomp($line); + if ($line =~ /PhraseModel /) { + print STDERR "pt:$line \n"; + push(@arr, $line); } + elsif ($line =~ /LexicalReordering /) { + print STDERR "ro:$line \n"; + push(@arr, $line); + } } close(FILTERED); # pass through master config file and replace table sections -($status,$section) = (0); -while(<STDIN>) { - if (/^\[(.+)\]\s*$/) { - print $_; - if ($1 eq "ttable-file" || $1 eq "distortion-file") { - print STDERR "replacing $1\n"; - print $CONFIG{$1}; - $status = 1; - } - else { - $status = 0; - } +my $ind = 0; +while(my $line = <STDIN>) { + chomp($line); + if ($line =~ /PhraseModel /) { + print $arr[$ind]."\n"; + ++$ind; } - elsif (!$status) { - print $_; + elsif ($line =~ /LexicalReordering /) { + print $arr[$ind]."\n"; + ++$ind; + } + else { + print "$line\n"; } } diff --git a/scripts/ems/support/substitute-weights.perl b/scripts/ems/support/substitute-weights.perl new file mode 100755 index 000000000..d76ae2f8c --- /dev/null +++ b/scripts/ems/support/substitute-weights.perl @@ -0,0 +1,44 @@ +#!/usr/bin/perl -w + +# experiment.perl support script +# get filtered rule and reordering tables and place them into a configuration file + +if (scalar @ARGV < 1 || ! -e $ARGV[0]) { + die("ERROR: could not find base ini file"); +} + +# read initial ini file +my @arr; +my $inWeightSection = 0; +open(BASEINI, $ARGV[0]) or die "Cannot open: $!"; +while(my $line = <BASEINI>) { + chomp($line); + if ($line =~ /\[weight\]/) { + $inWeightSection = 1; + } + elsif ($line =~ /\[[a-zA-Z0-0]*\]/) { + $inWeightSection = 0; + } + + if (!$inWeightSection) { + print "$line\n"; + } +} +close(BASEINI); + +# read tuned ini file +$inWeightSection = 0; +my $ind = 0; +while(my $line = <STDIN>) { + chomp($line); + if ($line =~ /\[weight\]/) { + $inWeightSection = 1; + } + elsif ($line =~ /\[[a-zA-Z0-0]*\]/) { + $inWeightSection = 0; + } + + if ($inWeightSection) { + print "$line\n"; + } +} |