Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphikoehn <pkoehn@inf.ed.ac.uk>2013-05-19 06:28:02 +0400
committerphikoehn <pkoehn@inf.ed.ac.uk>2013-05-19 06:28:02 +0400
commit542cd72c63f359e3d0c4c0e04dabb4e8138a66f2 (patch)
treebf01447ba7bc8ba79f564c2f5923a93f1e7b2629 /scripts/ems
parent668bda8623ae6186550696286a67eb769854b9cf (diff)
moved config creation back into train-model.perl
Diffstat (limited to 'scripts/ems')
-rw-r--r--scripts/ems/experiment.meta16
-rwxr-xr-xscripts/ems/experiment.perl68
-rwxr-xr-xscripts/ems/support/build-sparse-features.perl (renamed from scripts/ems/support/build-sparse-lexical-features.perl)0
-rwxr-xr-xscripts/ems/support/substitute-filtered-tables-and-weights.perl2
-rwxr-xr-xscripts/ems/support/substitute-weights.perl32
5 files changed, 45 insertions, 73 deletions
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 0731e2480..c8d1a4625 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -570,18 +570,18 @@ build-generation-custom
rerun-on-change: generation-factors generation-type training-options script generation-corpus
ignore-unless: AND generation-factors generation-corpus
default-name: model/generation-table
-build-sparse-lexical
+build-sparse
in: corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus
- out: sparse-lexical
- ignore-unless: sparse-lexical-features
- rerun-on-change: sparse-lexical-features
+ out: sparse
+ ignore-unless: sparse-features
+ rerun-on-change: sparse-features
default-name: model/most-frequent-words
- template: $moses-script-dir/ems/support/build-sparse-lexical-features.perl IN $input-extension $output-extension OUT "$sparse-lexical-features"
+ template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features"
create-config
- in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse-lexical corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm
+ in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm
out: config
ignore-if: use-hiero
- rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini create-ini
+ rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini
default-name: model/moses.ini
error: Unknown option
binarize-config
@@ -865,7 +865,7 @@ apply-weights
out: config-with-reused-weights
ignore-if: use-hiero
default-name: tuning/moses.tuned.ini
- template: $moses-script-dir/ems/support/substitute-weights.perl IN < IN1 > OUT
+ template: $moses-script-dir/ems/support/substitute-weights.perl IN IN1 OUT
error: cannot open
hiero-tune
in: TRAINING:hiero-config input reference
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index a4d94b00e..1f1b37e35 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2029,7 +2029,7 @@ sub define_domain_feature_score_option {
$restricted_to_table = $1 if $spec =~ /( table \S+)/;
die("ERROR: faulty TRAINING:domain-features spec (no method): $spec\n") unless defined($method);
if ($spec =~ /sparse/) {
- return "-sparse-translation-table -score-options '--SparseDomain$method $domains$restricted_to_table' -additional-ini '<br>[report-sparse-features]<br>stm<br><br>' ";
+ return "-score-options '--SparseDomain$method $domains$restricted_to_table' ";
}
else {
return "-score-options '--Domain$method $domains' ";
@@ -2104,57 +2104,24 @@ sub get_config_tables {
my ($config,$reordering_table,$phrase_translation_table,$generation_table,$domains) = @_;
my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir");
- my $cmd = &backoff_and_get("TRAINING:create-ini");
- $cmd = "$moses_src_dir/bin/create-ini" unless defined($cmd);
-
- my %IN;
- my %OUT;
- if (&backoff_and_get("TRAINING:input-factors")) {
- %IN = &get_factor_id("input");
- }
- else {
- $IN{"word"} = 0;
- }
-
- if (&backoff_and_get("TRAINING:output-factors")) {
- %OUT = &get_factor_id("output");
- }
- else {
- $OUT{"word"} = 0;
- }
-
- $cmd .= " -input-factor-max ".((scalar keys %IN)-1)." ";
-
- $cmd .= "-translation-factors ".
- &encode_factor_definition("translation-factors",\%IN,\%OUT)." "
- if &get("TRAINING:translation-factors");
- $cmd .= "-reordering-factors ".
- &encode_factor_definition("reordering-factors",\%IN,\%OUT)." "
- if &get("TRAINING:reordering-factors");
- $cmd .= "-generation-factors ".
- &encode_factor_definition("generation-factors",\%OUT,\%OUT)." "
- if &get("TRAINING:generation-factors");
+ my $cmd = &get_training_setting(9);
# get model, and whether suffix array is used. Determines the pt implementation.
my $hierarchical = &get("TRAINING:hierarchical-rule-set");
$cmd .= "-hierarchical " if $hierarchical;
my $sa_exec_dir = &get("TRAINING:suffix-array");
-
- my ($ptImpl, $numFF);
- if ($hierarchical) {
- if ($sa_exec_dir) {
- $ptImpl = 10; # suffix array
- $numFF = 7;
- }
- else {
- $ptImpl = 6; # in-mem SCFG
- }
- }
- else {
- $ptImpl = 0; # phrase-based
- }
-
+ my ($ptImpl, $numFF) = (0);
+ if ($hierarchical) {
+ if ($sa_exec_dir) {
+ $ptImpl = 10; # suffix array
+ $numFF = 7;
+ }
+ else {
+ $ptImpl = 6; # in-mem SCFG
+ }
+ }
+
# additional settings for factored models
my $ptCmd = $phrase_translation_table;
$ptCmd .= ":$ptImpl" if $ptImpl>0;
@@ -2185,14 +2152,11 @@ sub get_config_tables {
my $unknown_word_label = &versionize(&long_file_name("unknown-word-label","model",""),$extract_version);
$cmd .= "-unknown-word-label $unknown_word_label ";
}
+ # configuration due to domain features
+ $cmd .= &define_domain_feature_score_option($domains) if &get("TRAINING:domain-features");
+ # additional specified items from config
my $additional_ini = &get("TRAINING:additional-ini");
- if (&get("TRAINING:score-settings") &&
- &get("TRAINING:score-settings") =~ /SparseCountBinFeature/) {
- $additional_ini .= "<br>[report-sparse-features]<br>stm<br><br>";
- $cmd .= "-sparse-translation-table ";
- }
$cmd .= "-additional-ini '$additional_ini' " if defined($additional_ini);
- $cmd .= &define_domain_feature_score_option($domains) if &get("TRAINING:domain-features");
return $cmd;
}
diff --git a/scripts/ems/support/build-sparse-lexical-features.perl b/scripts/ems/support/build-sparse-features.perl
index 2bda46722..2bda46722 100755
--- a/scripts/ems/support/build-sparse-lexical-features.perl
+++ b/scripts/ems/support/build-sparse-features.perl
diff --git a/scripts/ems/support/substitute-filtered-tables-and-weights.perl b/scripts/ems/support/substitute-filtered-tables-and-weights.perl
index a914248cf..886408125 100755
--- a/scripts/ems/support/substitute-filtered-tables-and-weights.perl
+++ b/scripts/ems/support/substitute-filtered-tables-and-weights.perl
@@ -26,7 +26,7 @@ $cmd = "$RealBin/substitute-filtered-tables.perl $filteredPath < $origPath > $te
print STDERR "cmd=$cmd \n";
system($cmd);
-$cmd = "$RealBin/substitute-weights.perl $tempPath < $tunedPath > $outPath ";
+$cmd = "$RealBin/substitute-weights.perl $tempPath $tunedPath $outPath ";
print STDERR "cmd=$cmd \n";
system($cmd);
diff --git a/scripts/ems/support/substitute-weights.perl b/scripts/ems/support/substitute-weights.perl
index 86c715a3e..1b964ad80 100755
--- a/scripts/ems/support/substitute-weights.perl
+++ b/scripts/ems/support/substitute-weights.perl
@@ -3,17 +3,15 @@
# experiment.perl support script
# get filtered rule and reordering tables and place them into a configuration file
-if (scalar @ARGV < 1 || ! -e $ARGV[0]) {
- die("ERROR: could not find base ini file");
-}
+die("ERROR: syntax: base-ini weight-ini out-ini\n") unless scalar @ARGV == 3;
# read initial ini file
-my @arr;
my $inWeightSection = 0;
-open(BASEINI, $ARGV[0]) or die "Cannot open: $!";
+open(OUT, ">".$ARGV[2]) or die "ERROR cannot open out-ini '$ARGV[2]': $!";
+open(BASEINI, $ARGV[0]) or die "ERROR cannot open base-ini '$ARGV[0]': $!";
while(my $line = <BASEINI>) {
chomp($line);
- if ($line =~ /\[weight\]/) {
+ if ($line =~ /\[weight\]/ || $line =~ /\[weight-file\]/) {
$inWeightSection = 1;
}
elsif ($line =~ /\[[a-zA-Z0-0]*\]/) {
@@ -21,27 +19,37 @@ while(my $line = <BASEINI>) {
}
if (!$inWeightSection) {
- print "$line\n";
+ print OUT "$line\n" unless $line =~ /dense weights for feature functions/;
}
}
close(BASEINI);
# read tuned ini file
$inWeightSection = 0;
-my $ind = 0;
-while(my $line = <STDIN>) {
+open(WEIGHTINI, $ARGV[1]) or die "ERROR cannot open weight-ini '$ARGV[1]': $!";
+while(my $line = <WEIGHTINI>) {
chomp($line);
if ($line =~ /\[weight\]/) {
$inWeightSection = 1;
+ print OUT "# core weights\n";
}
elsif ($line =~ /\[weight-file\]/) {
- $inWeightSection = 1;
+ print OUT "# sparse weights\n";
+ print OUT "$line\n";
+ my $sparse_weight_file = <WEIGHTINI>;
+ chop($sparse_weight_file);
+ # copy sparse feature file
+ `cp $sparse_weight_file $ARGV[2].sparse`;
+ print OUT "$ARGV[2].sparse\n\n";
+ $inWeightSection = 0;
}
elsif ($line =~ /\[[a-zA-Z0-0]*\]/) {
+ print OUT "\n" if $inWeightSection;
$inWeightSection = 0;
}
- if ($inWeightSection) {
- print "$line\n";
+ if ($inWeightSection && $line !~ /^\s*$/) {
+ print OUT "$line\n";
}
}
+close(WEIGHTINI);