Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphikoehn <pkoehn@inf.ed.ac.uk>2013-05-12 11:16:22 +0400
committerphikoehn <pkoehn@inf.ed.ac.uk>2013-05-12 11:16:22 +0400
commit41da5b27605523be15062f7284a6fccc2ea373af (patch)
tree797a33e75e9abe0ae24a65b069c08921682482d1 /scripts/ems
parentd19a28ae211dfe3a5bedb2fd95feded9967b82ed (diff)
parent5ece9a17402a3d383b0d7545eae42af9fee83130 (diff)
Merge branch 'master' of git://github.com/moses-smt/mosesdecoder
Diffstat (limited to 'scripts/ems')
-rw-r--r--scripts/ems/example/data/weight.ini31
-rw-r--r--scripts/ems/experiment.machines1
-rw-r--r--scripts/ems/experiment.meta6
-rwxr-xr-xscripts/ems/experiment.perl68
-rwxr-xr-xscripts/ems/support/reuse-weights.perl4
-rwxr-xr-xscripts/ems/support/substitute-filtered-tables.perl54
-rwxr-xr-xscripts/ems/support/substitute-weights.perl44
7 files changed, 119 insertions, 89 deletions
diff --git a/scripts/ems/example/data/weight.ini b/scripts/ems/example/data/weight.ini
index 731de5535..e0ab92249 100644
--- a/scripts/ems/example/data/weight.ini
+++ b/scripts/ems/example/data/weight.ini
@@ -2,29 +2,12 @@
### MOSES CONFIG FILE ###
#########################
-# distortion (reordering) weight
-[weight-d]
-0.3
-0.3
-0.3
-0.3
-0.3
-0.3
-0.3
+[weight]
+Distortion0= 0.3
+UnknownWordPenalty0= 1
+WordPenalty0= -1
+TranslationModel0= 0.2 0.2 0.2 0.2 0.2
+LexicalReordering0= 0.3 0.3 0.3 0.3 0.3 0.3
+LM0= 0.5
-# language model weights
-[weight-l]
-0.5
-
-# translation model weights
-[weight-t]
-0.2
-0.2
-0.2
-0.2
-0.2
-
-# word penalty
-[weight-w]
--1
diff --git a/scripts/ems/experiment.machines b/scripts/ems/experiment.machines
index 6459be2c4..de87bf08d 100644
--- a/scripts/ems/experiment.machines
+++ b/scripts/ems/experiment.machines
@@ -1,4 +1,5 @@
cluster: townhill seville hermes lion seville sannox lutzow frontend
+multicore-4: freddie
multicore-8: tyr thor odin crom
multicore-16: saxnot vali vili freyja bragi hoenir
multicore-24: syn hel skaol saga buri loki sif magni
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 8381bd6d7..b5803b8cd 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -844,7 +844,7 @@ apply-filter
default-name: tuning/moses.filtered.ini
pass-if: TRAINING:binarize-all
ignore-if: use-hiero
- template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT
+ template: cp IN1/moses.ini OUT
apply-filter-devtest
in: TRAINING:config filtered-dir-devtest
out: filtered-config-devtest
@@ -865,7 +865,7 @@ apply-weights
out: config-with-reused-weights
ignore-if: use-hiero
default-name: tuning/moses.tuned.ini
- template: $moses-script-dir/ems/support/reuse-weights.perl IN1 < IN > OUT
+ template: $moses-script-dir/ems/support/substitute-weights.perl IN < IN1 > OUT
error: cannot open
hiero-tune
in: TRAINING:hiero-config input reference
@@ -955,7 +955,7 @@ apply-filter
default-name: evaluation/filtered.ini
pass-if: TRAINING:binarize-all
ignore-if: use-hiero
- template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT
+ template: $moses-script-dir/ems/support/substitute-weights.perl IN1/moses.ini < IN > OUT
decode
in: filtered-config input
out: system-output
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index d55913a3c..855d6b96c 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -1614,7 +1614,8 @@ sub define_tuning_tune {
my $word_alignment = &backoff_and_get("TRAINING:include-word-alignment-in-rules");
# the last 3 variables are only used for mira tuning
- my ($tuned_config,$config,$input,$reference,$config_devtest,$input_devtest,$reference_devtest) = &get_output_and_input($step_id);
+ my ($tuned_config,$config,$input,$reference,$config_devtest,$input_devtest,$reference_devtest, $filtered_config) = &get_output_and_input($step_id);
+ $config = $filtered_config if $filtered_config;
my $cmd = "";
if ($use_mira) {
@@ -2105,10 +2106,41 @@ sub define_training_create_config {
my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM)
= &get_output_and_input($step_id);
- my $cmd = &get_training_setting(9);
+ my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir");
+ my $cmd = "$moses_src_dir/bin/create-ini ";
+
+ my %IN;
+ my %OUT;
+ if (&backoff_and_get("TRAINING:input-factors")) {
+ %IN = &get_factor_id("input");
+ }
+ else {
+ $IN{"word"} = 0;
+ }
+
+ if (&backoff_and_get("TRAINING:output-factors")) {
+ %OUT = &get_factor_id("output");
+ }
+ else {
+ $OUT{"word"} = 0;
+ }
+
+ $cmd .= "-input-factor-max ".((scalar keys %IN)-1)." ";
+
+ $cmd .= "-translation-factors ".
+ &encode_factor_definition("translation-factors",\%IN,\%OUT)." "
+ if &get("TRAINING:translation-factors");
+ $cmd .= "-reordering-factors ".
+ &encode_factor_definition("reordering-factors",\%IN,\%OUT)." "
+ if &get("TRAINING:reordering-factors");
+ $cmd .= "-generation-factors ".
+ &encode_factor_definition("generation-factors",\%OUT,\%OUT)." "
+ if &get("TRAINING:generation-factors");
- # get model, and whether suffix array is used. Determines the pt implementation.
+ # get model, and whether suffix array is used. Determines the pt implementation.
my $hierarchical = &get("TRAINING:hierarchical-rule-set");
+ $cmd .= "-hierarchical " if $hierarchical;
+
my $sa_exec_dir = &get("TRAINING:suffix-array");
my ($ptImpl, $numFF);
@@ -2576,31 +2608,8 @@ sub define_tuningevaluation_filter {
else {
$config = $tuning_flag ? "$dir/tuning/moses.table.ini.$VERSION" : "$dir/evaluation/$set.moses.table.ini.$VERSION";
$delete_config = 1;
- $cmd = &get_training_setting(9);
- $cmd .= &define_domain_feature_score_option($domains) if &get("TRAINING:domain-features");
-
- my $ptCmd = $phrase_translation_table;
- $ptCmd .= ":$ptImpl" if $ptImpl>0;
- $ptCmd .= ":$numFF" if defined($numFF);
- $cmd .= &get_table_name_settings("translation-factors","phrase-translation-table", $ptCmd);
- $cmd .= &get_table_name_settings("reordering-factors","reordering-table", $reordering_table)
- if $reordering_table;
- # additional settings for hierarchical models
- if (&get("TRAINING:hierarchical-rule-set")) {
- my $extract_version = $VERSION;
- $extract_version = $RE_USE[$STEP_LOOKUP{"TRAINING:extract-phrases"}]
- if defined($STEP_LOOKUP{"TRAINING:extract-phrases"});
- my $glue_grammar_file = &get("TRAINING:glue-grammar");
- $glue_grammar_file = &versionize(&long_file_name("glue-grammar","model",""),$extract_version)
- unless $glue_grammar_file;
- $cmd .= "-glue-grammar-file $glue_grammar_file ";
- }
- if (&get("TRAINING:score-settings") &&
- &get("TRAINING:score-settings") =~ /SparseCountBinFeature/) {
- $cmd .= "-sparse-translation-table ";
- }
- $cmd .= "-lm 0:3:$dir "; # dummy
- $cmd .= "-config $config\n";
+
+ $cmd = "cp $dir/model/moses.ini.$VERSION $config \n";
}
# filter command
@@ -2630,7 +2639,8 @@ sub define_evaluation_decode {
my $dir = &check_and_get("GENERAL:working-dir");
my ($system_output,
- $config,$input) = &get_output_and_input($step_id);
+ $config,$input,$filtered_config) = &get_output_and_input($step_id);
+ $config = $filtered_config if $filtered_config;
my $jobs = &backoff_and_get("EVALUATION:$set:jobs");
my $decoder = &check_backoff_and_get("EVALUATION:$set:decoder");
diff --git a/scripts/ems/support/reuse-weights.perl b/scripts/ems/support/reuse-weights.perl
index 71ac3a0b9..0b9668a34 100755
--- a/scripts/ems/support/reuse-weights.perl
+++ b/scripts/ems/support/reuse-weights.perl
@@ -19,8 +19,8 @@ while(<WEIGHT>) {
$weights_file_spec = "\n".$_;
$weights_file_flag = 1;
}
- elsif (/^\[weight\-(\S+)\]/) {
- $current_weight = $1;
+ elsif (/^\[weight]/) {
+ $current_weight = $1;
}
elsif ($current_weight && /^(([\-\d\.]+)([Ee][+-]?[\d]+)?)$/) {
push @{$WEIGHT{$current_weight}},$1;
diff --git a/scripts/ems/support/substitute-filtered-tables.perl b/scripts/ems/support/substitute-filtered-tables.perl
index 723c140b0..530130aa8 100755
--- a/scripts/ems/support/substitute-filtered-tables.perl
+++ b/scripts/ems/support/substitute-filtered-tables.perl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/perl -w
# experiment.perl support script
# get filtered rule and reordering tables and place them into a configuration file
@@ -8,42 +8,34 @@ if (scalar @ARGV < 1 || ! -e $ARGV[0]) {
}
# read config sections about filtered tables
-my %CONFIG;
-my $status = 0;
-my $section;
+my @arr;
open(FILTERED, $ARGV[0]) or die "Cannot open: $!";
-while(<FILTERED>) {
- if (/^\[(.+)\]\s*$/) {
- if ($1 eq "ttable-file" || $1 eq "distortion-file") {
- $section = $1;
- $status = 1;
- print STDERR "found $section\n";
- }
- else {
- $status = 0;
- }
- }
- elsif ($status) {
- $CONFIG{$section} .= $_;
+while(my $line = <FILTERED>) {
+ chomp($line);
+ if ($line =~ /PhraseModel /) {
+ print STDERR "pt:$line \n";
+ push(@arr, $line);
}
+ elsif ($line =~ /LexicalReordering /) {
+ print STDERR "ro:$line \n";
+ push(@arr, $line);
+ }
}
close(FILTERED);
# pass through master config file and replace table sections
-($status,$section) = (0);
-while(<STDIN>) {
- if (/^\[(.+)\]\s*$/) {
- print $_;
- if ($1 eq "ttable-file" || $1 eq "distortion-file") {
- print STDERR "replacing $1\n";
- print $CONFIG{$1};
- $status = 1;
- }
- else {
- $status = 0;
- }
+my $ind = 0;
+while(my $line = <STDIN>) {
+ chomp($line);
+ if ($line =~ /PhraseModel /) {
+ print $arr[$ind]."\n";
+ ++$ind;
}
- elsif (!$status) {
- print $_;
+ elsif ($line =~ /LexicalReordering /) {
+ print $arr[$ind]."\n";
+ ++$ind;
+ }
+ else {
+ print "$line\n";
}
}
diff --git a/scripts/ems/support/substitute-weights.perl b/scripts/ems/support/substitute-weights.perl
new file mode 100755
index 000000000..d76ae2f8c
--- /dev/null
+++ b/scripts/ems/support/substitute-weights.perl
@@ -0,0 +1,44 @@
+#!/usr/bin/perl -w
+
+# experiment.perl support script
+# get filtered rule and reordering tables and place them into a configuration file
+
+if (scalar @ARGV < 1 || ! -e $ARGV[0]) {
+ die("ERROR: could not find base ini file");
+}
+
+# read initial ini file
+my @arr;
+my $inWeightSection = 0;
+open(BASEINI, $ARGV[0]) or die "Cannot open: $!";
+while(my $line = <BASEINI>) {
+ chomp($line);
+ if ($line =~ /\[weight\]/) {
+ $inWeightSection = 1;
+ }
+ elsif ($line =~ /\[[a-zA-Z0-0]*\]/) {
+ $inWeightSection = 0;
+ }
+
+ if (!$inWeightSection) {
+ print "$line\n";
+ }
+}
+close(BASEINI);
+
+# read tuned ini file
+$inWeightSection = 0;
+my $ind = 0;
+while(my $line = <STDIN>) {
+ chomp($line);
+ if ($line =~ /\[weight\]/) {
+ $inWeightSection = 1;
+ }
+ elsif ($line =~ /\[[a-zA-Z0-0]*\]/) {
+ $inWeightSection = 0;
+ }
+
+ if ($inWeightSection) {
+ print "$line\n";
+ }
+}