Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphikoehn <pkoehn@inf.ed.ac.uk>2013-05-17 11:37:29 +0400
committerphikoehn <pkoehn@inf.ed.ac.uk>2013-05-17 11:37:29 +0400
commit4cdffc8a891a3004a0102aa77ae26ceb91a1e881 (patch)
tree815350e8007c863a098d04689bd35177cc61569b /scripts/ems
parent13991fc88fc6184139db46aa306789d855ef54cd (diff)
fixes for sparse feature handling
Diffstat (limited to 'scripts/ems')
-rw-r--r--scripts/ems/experiment.meta4
-rwxr-xr-xscripts/ems/experiment.perl49
-rwxr-xr-xscripts/ems/support/substitute-filtered-tables.perl35
3 files changed, 53 insertions, 35 deletions
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index c82195b1a..d10bf2dbe 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -844,13 +844,13 @@ apply-filter
default-name: tuning/moses.filtered.ini
pass-if: TRAINING:binarize-all
ignore-if: use-hiero
- template: cp IN1/moses.ini OUT
+ template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT
apply-filter-devtest
in: TRAINING:config filtered-dir-devtest
out: filtered-config-devtest
default-name: tuning/moses.filtered.devtest.ini
pass-if: TRAINING:binarize-all
- ignore-unless: use-mira
+ ignore-unless: use-mira
template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT
tune
in: filtered-config input reference filtered-config-devtest input-devtest reference-devtest
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index b4ebe161a..a4d94b00e 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2100,15 +2100,12 @@ sub define_training_sigtest_filter {
&create_step($step_id,$cmd);
}
-sub define_training_create_config {
- my ($step_id) = @_;
-
- my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM)
- = &get_output_and_input($step_id);
+sub get_config_tables {
+ my ($config,$reordering_table,$phrase_translation_table,$generation_table,$domains) = @_;
my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir");
- #my $cmd = "$moses_src_dir/bin/create-ini ";
my $cmd = &backoff_and_get("TRAINING:create-ini");
+ $cmd = "$moses_src_dir/bin/create-ini" unless defined($cmd);
my %IN;
my %OUT;
@@ -2188,8 +2185,29 @@ sub define_training_create_config {
my $unknown_word_label = &versionize(&long_file_name("unknown-word-label","model",""),$extract_version);
$cmd .= "-unknown-word-label $unknown_word_label ";
}
+ my $additional_ini = &get("TRAINING:additional-ini");
+ if (&get("TRAINING:score-settings") &&
+ &get("TRAINING:score-settings") =~ /SparseCountBinFeature/) {
+ $additional_ini .= "<br>[report-sparse-features]<br>stm<br><br>";
+ $cmd .= "-sparse-translation-table ";
+ }
+ $cmd .= "-additional-ini '$additional_ini' " if defined($additional_ini);
+ $cmd .= &define_domain_feature_score_option($domains) if &get("TRAINING:domain-features");
+
+ return $cmd;
+}
+
+sub define_training_create_config {
+ my ($step_id) = @_;
+
+ my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM)
+ = &get_output_and_input($step_id);
+
+ my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains);
+
+ # sparse lexical features provide additional content for config file
+ $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;
- # find out which language model files have been built
my @LM_SETS = &get_sets("LM");
my %INTERPOLATED_AWAY;
my %OUTPUT_FACTORS;
@@ -2257,18 +2275,6 @@ sub define_training_create_config {
$cmd .= "-lm $factor:$order:$lm_file:$type ";
}
- my $additional_ini = &get("TRAINING:additional-ini");
- if (&get("TRAINING:score-settings") &&
- &get("TRAINING:score-settings") =~ /SparseCountBinFeature/) {
- $additional_ini .= "<br>[report-sparse-features]<br>stm<br><br>";
- $cmd .= "-sparse-translation-table ";
- }
- $cmd .= "-additional-ini '$additional_ini' " if defined($additional_ini);
-
- # sparse lexical features provide additional content for config file
- $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;
- $cmd .= &define_domain_feature_score_option($domains) if &get("TRAINING:domain-features");
-
&create_step($step_id,$cmd);
}
@@ -2608,9 +2614,12 @@ sub define_tuningevaluation_filter {
# create pseudo-config file
else {
$config = $tuning_flag ? "$dir/tuning/moses.table.ini.$VERSION" : "$dir/evaluation/$set.moses.table.ini.$VERSION";
+ $cmd = "touch $config\n";
$delete_config = 1;
- $cmd = "cp $dir/model/moses.ini.$VERSION $config \n";
+ $cmd .= &get_config_tables($config,$reordering_table,$phrase_translation_table,undef,$domains);
+
+ $cmd .= "-lm 0:3:$config:8\n"; # dummy kenlm 3-gram model on factor 0
}
# filter command
diff --git a/scripts/ems/support/substitute-filtered-tables.perl b/scripts/ems/support/substitute-filtered-tables.perl
index 530130aa8..3efb243d7 100755
--- a/scripts/ems/support/substitute-filtered-tables.perl
+++ b/scripts/ems/support/substitute-filtered-tables.perl
@@ -10,30 +10,39 @@ if (scalar @ARGV < 1 || ! -e $ARGV[0]) {
# read config sections about filtered tables
my @arr;
open(FILTERED, $ARGV[0]) or die "Cannot open: $!";
+my $feature_section = 0;
while(my $line = <FILTERED>) {
chomp($line);
- if ($line =~ /PhraseModel /) {
- print STDERR "pt:$line \n";
- push(@arr, $line);
+ if ($line =~ /^\[(.+)\]/) {
+ $feature_section = ($1 eq "feature");
+ }
+ next unless $feature_section;
+ if ($line =~ /PhraseDictionary/) {
+ print STDERR "pt:$line \n";
+ push(@arr, $line);
+ }
+ elsif ($line =~ /LexicalReordering/) {
+ print STDERR "ro:$line \n";
+ push(@arr, $line);
}
- elsif ($line =~ /LexicalReordering /) {
- print STDERR "ro:$line \n";
- push(@arr, $line);
- }
}
close(FILTERED);
# pass through master config file and replace table sections
my $ind = 0;
+$feature_section = 0;
while(my $line = <STDIN>) {
chomp($line);
- if ($line =~ /PhraseModel /) {
- print $arr[$ind]."\n";
- ++$ind;
+ if ($line =~ /^\[(.+)\]/) {
+ $feature_section = ($1 eq "feature");
+ }
+ if ($feature_section && $line =~ /PhraseDictionary/) {
+ print $arr[$ind]."\n";
+ ++$ind;
}
- elsif ($line =~ /LexicalReordering /) {
- print $arr[$ind]."\n";
- ++$ind;
+ elsif ($feature_section && $line =~ /LexicalReordering/) {
+ print $arr[$ind]."\n";
+ ++$ind;
}
else {
print "$line\n";