Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphikoehn <pkoehn@inf.ed.ac.uk>2013-05-12 11:16:22 +0400
committerphikoehn <pkoehn@inf.ed.ac.uk>2013-05-12 11:16:22 +0400
commit41da5b27605523be15062f7284a6fccc2ea373af (patch)
tree797a33e75e9abe0ae24a65b069c08921682482d1 /scripts/training
parentd19a28ae211dfe3a5bedb2fd95feded9967b82ed (diff)
parent5ece9a17402a3d383b0d7545eae42af9fee83130 (diff)
Merge branch 'master' of git://github.com/moses-smt/mosesdecoder
Diffstat (limited to 'scripts/training')
-rwxr-xr-xscripts/training/binarize-model.perl62
-rwxr-xr-xscripts/training/filter-model-given-input.pl303
-rwxr-xr-xscripts/training/mert-moses.pl148
-rwxr-xr-xscripts/training/train-model.perl12
4 files changed, 335 insertions, 190 deletions
diff --git a/scripts/training/binarize-model.perl b/scripts/training/binarize-model.perl
new file mode 100755
index 000000000..15ad23ac4
--- /dev/null
+++ b/scripts/training/binarize-model.perl
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+
+#
+# Binarize a Moses model
+#
+
+use strict;
+
+use Getopt::Long "GetOptions";
+use FindBin qw($RealBin);
+
+$ENV{"LC_ALL"} = "C";
+my $SCRIPTS_ROOTDIR = $RealBin;
+if ($SCRIPTS_ROOTDIR eq '') {
+ $SCRIPTS_ROOTDIR = dirname(__FILE__);
+}
+$SCRIPTS_ROOTDIR =~ s/\/training$//;
+
+my ($binarizer, $input_config, $output_config);
+my $opt_hierarchical = 0;
+$binarizer = "$SCRIPTS_ROOTDIR/../bin/processPhraseTable";
+GetOptions(
+ "Hierarchical" => \$opt_hierarchical,
+ "Binarizer=s" => \$binarizer
+) or exit(1);
+
+$input_config = shift;
+$output_config = shift;
+
+if (!defined $input_config || !defined $output_config) {
+ print STDERR "usage: binarize-model.perl input-config output-config [-Binarizer binarizer]\n";
+ exit 1;
+}
+
+my $hierarchical = "";
+$hierarchical = "-Hierarchical" if $opt_hierarchical;
+my $targetdir = "$output_config.tables";
+
+safesystem("$RealBin/filter-model-given-input.pl $targetdir $input_config /dev/null $hierarchical -nofilter -Binarizer $binarizer") || die "binarising failed";
+safesystem("rm -f $output_config; ln -s $targetdir/moses.ini $output_config") || die "failed to link new ini file";
+
+#FIXME: Why isn't this in a module?
+sub safesystem {
+ print STDERR "Executing: @_\n";
+ system(@_);
+ if ($? == -1) {
+ print STDERR "Failed to execute: @_\n $!\n";
+ exit(1);
+ }
+ elsif ($? & 127) {
+ printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
+ ($? & 127), ($? & 128) ? 'with' : 'without';
+ exit(1);
+ }
+ else {
+ my $exitcode = $? >> 8;
+ print STDERR "Exit code: $exitcode\n" if $exitcode;
+ return ! $exitcode;
+ }
+}
+
+
diff --git a/scripts/training/filter-model-given-input.pl b/scripts/training/filter-model-given-input.pl
index 6323096be..586e7efdf 100755
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/perl -w
# $Id$
# Given a moses.ini file and an input text prepare minimized translation
@@ -95,104 +95,161 @@ safesystem("mkdir -p $dir") or die "Can't mkdir $dir";
# get tables to be filtered (and modify config file)
my (@TABLE,@TABLE_FACTORS,@TABLE_NEW_NAME,%CONSIDER_FACTORS,%KNOWN_TTABLE,@TABLE_WEIGHTS,%TABLE_NUMBER);
+
my %new_name_used = ();
open(INI_OUT,">$dir/moses.ini") or die "Can't write $dir/moses.ini";
open(INI,$config) or die "Can't read $config";
-while(<INI>) {
- print INI_OUT $_;
- if (/ttable-file\]/) {
- while(1) {
- my $table_spec = <INI>;
- if ($table_spec !~ /^(\d+) ([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)( \S+)?$/) {
- print INI_OUT $table_spec;
- last;
- }
- my ($phrase_table_impl,$source_factor,$t,$w,$file,$table_flag) = ($1,$2,$3,$4,$5,$6);
- $table_flag = "" if (!defined($table_flag));
-
- if (($phrase_table_impl ne "0" && $phrase_table_impl ne "6") || $file =~ /glue-grammar/) {
- # Only Memory ("0") and NewFormat ("6") can be filtered.
- print INI_OUT $table_spec;
- next;
- }
-
- chomp($file);
- push @TABLE, $file;
- push @TABLE_WEIGHTS,$w;
- $KNOWN_TTABLE{$#TABLE}++;
-
- my $new_name = "$dir/phrase-table.$source_factor-$t.".(++$TABLE_NUMBER{"$source_factor-$t"});
- my $cnt = 1;
- $cnt ++ while (defined $new_name_used{"$new_name.$cnt"});
- $new_name .= ".$cnt";
- $new_name_used{$new_name} = 1;
- if ($binarizer && $phrase_table_impl == 6) {
- print INI_OUT "2 $source_factor $t $w $new_name.bin$table_flag\n";
- }
- elsif ($binarizer && $phrase_table_impl == 0) {
- if ($binarizer =~ /processPhraseTableMin/) {
- print INI_OUT "12 $source_factor $t $w $new_name$table_flag\n";
- } else {
- print INI_OUT "1 $source_factor $t $w $new_name$table_flag\n";
- }
- } else {
- $new_name .= ".gz" if $opt_gzip;
- print INI_OUT "$phrase_table_impl $source_factor $t $w $new_name$table_flag\n";
- }
- push @TABLE_NEW_NAME,$new_name;
-
- $CONSIDER_FACTORS{$source_factor} = 1;
- print STDERR "Considering factor $source_factor\n";
- push @TABLE_FACTORS, $source_factor;
- }
- }
- elsif (/distortion-file/) {
- while(1) {
- my $table_spec = <INI>;
- if ($table_spec !~ /^([\d\,\-]+) (\S+) (\d+) (\S+)$/) {
- print INI_OUT $table_spec;
- last;
- }
- my ($factors,$t,$w,$file) = ($1,$2,$3,$4);
- my $source_factor = $factors;
- $source_factor =~ s/\-[\d,]+$//;
-
- chomp($file);
- push @TABLE,$file;
-
- $file =~ s/^.*\/+([^\/]+)/$1/g;
- my $new_name = "$dir/$file";
- $new_name =~ s/\.gz//;
- print INI_OUT "$factors $t $w $new_name\n";
- push @TABLE_NEW_NAME,$new_name;
-
- $CONSIDER_FACTORS{$source_factor} = 1;
- print STDERR "Considering factor $source_factor\n";
- push @TABLE_FACTORS,$source_factor;
- }
- }
-}
+while(my $line = <INI>) {
+ chomp($line);
+ my @toks = split(/ /, $line);
+ if ($line =~ /PhraseDictionaryMemory /
+ || $line =~ /PhraseDictionaryTreeAdaptor /
+ || $line =~ /PhraseDictionaryOnDisk /
+ || $line =~ /PhraseDictionarySCFG /
+ ) {
+ print STDERR "pt:$line\n";
+
+ my ($phrase_table_impl,$source_factor,$t,$w,$file,$table_flag); # = ($1,$2,$3,$4,$5,$6);
+ $table_flag = "";
+ $phrase_table_impl = $toks[0];
+
+ for (my $i = 1; $i < scalar(@toks); ++$i) {
+ my @args = split(/=/, $toks[$i]);
+ chomp($args[0]);
+ chomp($args[1]);
+
+ if ($args[0] eq "num-features") {
+ $w = $args[1];
+ }
+ elsif ($args[0] eq "input-factor") {
+ $source_factor = $args[1];
+ }
+ elsif ($args[0] eq "output-factor") {
+ $t = $args[1];
+ }
+ elsif ($args[0] eq "path") {
+ $file = $args[1];
+ }
+ } #for (my $i = 1; $i < scalar(@toks); ++$i) {
+
+ if (($phrase_table_impl ne "PhraseDictionaryMemory" && $phrase_table_impl ne "PhraseDictionarySCFG") || $file =~ /glue-grammar/) {
+ # Only Memory ("0") and NewFormat ("6") can be filtered.
+ print INI_OUT "$line\n";
+ next;
+ }
+
+ push @TABLE, $file;
+ push @TABLE_WEIGHTS,$w;
+ $KNOWN_TTABLE{$#TABLE}++;
+
+ my $new_name = "$dir/phrase-table.$source_factor-$t.".(++$TABLE_NUMBER{"$source_factor-$t"});
+ my $cnt = 1;
+ $cnt ++ while (defined $new_name_used{"$new_name.$cnt"});
+ $new_name .= ".$cnt";
+ $new_name_used{$new_name} = 1;
+ if ($binarizer && $phrase_table_impl eq "PhraseDictionarySCFG") {
+ $phrase_table_impl = "PhraseDictionaryOnDisk";
+ @toks = set_value(\@toks, "path", "$new_name.bin$table_flag");
+ }
+ elsif ($binarizer && $phrase_table_impl eq "PhraseDictionaryMemory") {
+ if ($binarizer =~ /processPhraseTableMin/) {
+ $phrase_table_impl = "PhraseDictionaryCompact";
+ @toks = set_value(\@toks, "path", "$new_name$table_flag");
+ }
+ elsif ($binarizer =~ /CreateOnDiskPt/) {
+ $phrase_table_impl = "PhraseDictionaryOnDisk";
+ @toks = set_value(\@toks, "path", "$new_name.bin$table_flag");
+ }
+ else {
+ $phrase_table_impl = "PhraseDictionaryTreeAdaptor";
+ @toks = set_value(\@toks, "path", "$new_name$table_flag");
+ }
+ }
+ else {
+ $new_name .= ".gz" if $opt_gzip;
+ @toks = set_value(\@toks, "path", "$new_name$table_flag");
+ }
+
+ $toks[0] = $phrase_table_impl;
+
+ print INI_OUT join_array(\@toks)."\n";
+
+ push @TABLE_NEW_NAME,$new_name;
+
+ $CONSIDER_FACTORS{$source_factor} = 1;
+ print STDERR "Considering factor $source_factor\n";
+ push @TABLE_FACTORS, $source_factor;
+
+ } #if (/PhraseModel /) {
+ elsif ($line =~ /LexicalReordering /) {
+ print STDERR "ro:$line\n";
+ my ($source_factor, $t, $w, $file); # = ($1,$2,$3,$4);
+
+ for (my $i = 1; $i < scalar(@toks); ++$i) {
+ my @args = split(/=/, $toks[$i]);
+ chomp($args[0]);
+ chomp($args[1]);
+
+ if ($args[0] eq "num-features") {
+ $w = $args[1];
+ }
+ elsif ($args[0] eq "input-factor") {
+ $source_factor = chomp($args[1]);
+ }
+ elsif ($args[0] eq "output-factor") {
+ #$t = chomp($args[1]);
+ }
+ elsif ($args[0] eq "type") {
+ $t = $args[1];
+ }
+ elsif ($args[0] eq "path") {
+ $file = $args[1];
+ }
+
+ } # for (my $i = 1; $i < scalar(@toks); ++$i) {
+
+ push @TABLE, $file;
+
+ $file =~ s/^.*\/+([^\/]+)/$1/g;
+ my $new_name = "$dir/$file";
+ $new_name =~ s/\.gz//;
+
+ #print INI_OUT "$source_factor $t $w $new_name\n";
+ @toks = set_value(\@toks, "path", "$new_name");
+ print INI_OUT join_array(\@toks)."\n";
+
+ push @TABLE_NEW_NAME,$new_name;
+
+ $CONSIDER_FACTORS{$source_factor} = 1;
+ print STDERR "Considering factor $source_factor\n";
+ push @TABLE_FACTORS,$source_factor;
+
+
+ } #elsif (/LexicalReordering /) {
+ else {
+ print INI_OUT "$line\n";
+ }
+} # while(<INI>) {
close(INI);
close(INI_OUT);
my %TMP_INPUT_FILENAME;
-if ($opt_hierarchical)
-{
- # Write a separate, temporary input file for each combination of source
- # factors
- foreach my $key (keys %CONSIDER_FACTORS) {
- my $filename = "$dir/input-$key";
- open(FILEHANDLE,">$filename") or die "Can't open $filename for writing";
- $TMP_INPUT_FILENAME{$key} = $filename;
- my @FACTOR = split(/,/, $key);
- open(PIPE,"$SCRIPTS_ROOTDIR/training/reduce_combine.pl $input @FACTOR |");
- while (my $line = <PIPE>) {
- print FILEHANDLE $line
- }
- close(FILEHANDLE);
- }
-}
+if ($opt_hierarchical) {
+ # Write a separate, temporary input file for each combination of source
+ # factors
+ foreach my $key (keys %CONSIDER_FACTORS) {
+ my $filename = "$dir/input-$key";
+ open(FILEHANDLE,">$filename") or die "Can't open $filename for writing";
+ $TMP_INPUT_FILENAME{$key} = $filename;
+ my @FACTOR = split(/,/, $key);
+ open(PIPE,"$SCRIPTS_ROOTDIR/training/reduce_combine.pl $input @FACTOR |");
+ while (my $line = <PIPE>) {
+ print FILEHANDLE $line
+ }
+ close(FILEHANDLE);
+ } # foreach my $key (keys %CONSIDER_FACTORS) {
+} #if ($opt_hierarchical) {
my %PHRASE_USED;
if (!$opt_hierarchical) {
@@ -223,22 +280,6 @@ if (!$opt_hierarchical) {
close(INPUT);
}
-sub mk_open_string {
- my $file = shift;
- my $openstring;
- if ($file !~ /\.gz$/ && -e "$file.gz") {
- $openstring = "$ZCAT $file.gz |";
- } elsif ($file =~ /\.gz$/) {
- $openstring = "$ZCAT $file |";
- } elsif ($opt_hierarchical) {
- $openstring = "cat $file |";
- } else {
- $openstring = "< $file";
- }
- return $openstring;
-}
-
-
# filter files
for(my $i=0;$i<=$#TABLE;$i++) {
my ($used,$total) = (0,0);
@@ -350,6 +391,23 @@ close(INFO);
print "To run the decoder, please call:
moses -f $dir/moses.ini -i $input\n";
+# functions
+sub mk_open_string {
+ my $file = shift;
+ my $openstring;
+ if ($file !~ /\.gz$/ && -e "$file.gz") {
+ $openstring = "$ZCAT $file.gz |";
+ } elsif ($file =~ /\.gz$/) {
+ $openstring = "$ZCAT $file |";
+ } elsif ($opt_hierarchical) {
+ $openstring = "cat $file |";
+ } else {
+ $openstring = "< $file";
+ }
+ return $openstring;
+}
+
+
sub safesystem {
print STDERR "Executing: @_\n";
system(@_);
@@ -368,6 +426,7 @@ sub safesystem {
return ! $exitcode;
}
}
+
sub ensure_full_path {
my $PATH = shift;
return $PATH if $PATH =~ /^\//;
@@ -388,3 +447,33 @@ sub ensure_full_path {
return $PATH;
}
+sub join_array {
+ my @outside = @{$_[0]};
+
+ my $ret = "";
+ for (my $i = 0; $i < scalar(@outside); ++$i) {
+ my $tok = $outside[$i];
+ $ret .= "$tok ";
+ }
+
+ return $ret;
+}
+
+sub set_value {
+ my @arr = @{$_[0]};
+ my $keySought = $_[1];
+ my $newValue = $_[2];
+
+ for (my $i = 1; $i < scalar(@arr); ++$i) {
+ my @inside = split(/=/, $arr[$i]);
+
+ my $key = $inside[0];
+ if ($key eq $keySought) {
+ $arr[$i] = "$key=$newValue";
+ return @arr;
+ }
+ }
+ return @arr;
+}
+
+
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index 23b61e2cf..fbb2d0620 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -58,29 +58,6 @@ my $SCRIPTS_ROOTDIR = $RealBin;
$SCRIPTS_ROOTDIR =~ s/\/training$//;
$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
-## We preserve this bit of comments to keep the traditional weight ranges.
-# "w" => [ [ 0.0, -1.0, 1.0 ] ], # word penalty
-# "d" => [ [ 1.0, 0.0, 2.0 ] ], # lexicalized reordering model
-# "lm" => [ [ 1.0, 0.0, 2.0 ] ], # language model
-# "g" => [ [ 1.0, 0.0, 2.0 ], # generation model
-# [ 1.0, 0.0, 2.0 ] ],
-# "tm" => [ [ 0.3, 0.0, 0.5 ], # translation model
-# [ 0.2, 0.0, 0.5 ],
-# [ 0.3, 0.0, 0.5 ],
-# [ 0.2, 0.0, 0.5 ],
-# [ 0.0,-1.0, 1.0 ] ], # ... last weight is phrase penalty
-# "lex"=> [ [ 0.1, 0.0, 0.2 ] ], # global lexical model
-# "I" => [ [ 0.0,-1.0, 1.0 ] ], # input lattice scores
-
-
-
-# moses.ini file uses FULL names for lambdas, while this training script
-# internally (and on the command line) uses ABBR names.
-my @ABBR_FULL_MAP = qw(d=weight-d lm=weight-l tm=weight-t w=weight-w
- g=weight-generation lex=weight-lex I=weight-i dlm=weight-dlm pp=weight-pp wt=weight-wt pb=weight-pb lex=weight-lex glm=weight-glm);
-my %ABBR2FULL = map { split /=/, $_, 2 } @ABBR_FULL_MAP;
-my %FULL2ABBR = map { my ($a, $b) = split /=/, $_, 2; ($b, $a); } @ABBR_FULL_MAP;
-
my $minimum_required_change_in_weights = 0.00001;
# stop if no lambda changes more than this
@@ -727,7 +704,7 @@ while (1) {
# Create an ini file for the interpolated phrase table
$interpolated_config ="moses.interpolated.ini";
- substitute_ttable($uninterpolated_config, $interpolated_config, $interpolated_phrase_table, "97");
+ substitute_ttable($uninterpolated_config, $interpolated_config, $interpolated_phrase_table, "99");
# Append the multimodel weights
open(ITABLE,">>$interpolated_config") || die "Failed to append weights to $interpolated_config";
@@ -1201,11 +1178,11 @@ sub run_decoder {
my %model_weights;
for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
my $name = $featlist->{"names"}->[$i];
- $model_weights{$name} = "-$name" if !defined $model_weights{$name};
+ $model_weights{$name} = "$name=" if !defined $model_weights{$name};
$model_weights{$name} .= sprintf " %.6f", $vals[$i];
}
my $decoder_config = "";
- $decoder_config = join(" ", values %model_weights) unless $___USE_CONFIG_WEIGHTS_FIRST && $run==1;
+ $decoder_config = "-weight-overwrite '" . join(" ", values %model_weights) ."'" unless $___USE_CONFIG_WEIGHTS_FIRST && $run==1;
$decoder_config .= " -weight-file run$run.sparse-weights" if -e "run$run.sparse-weights";
$decoder_config .= " -report-segmentation" if $__PROMIX_TRAINING;
print STDERR "DECODER_CFG = $decoder_config\n";
@@ -1245,8 +1222,6 @@ sub insert_ranges_to_featlist {
if ($namedpair =~ /^(.*?):/) {
$name = $1;
$namedpair =~ s/^.*?://;
- die "Unrecognized name '$name' in --range=$range"
- if !defined $ABBR2FULL{$name};
}
my ($min, $max) = split /\.\./, $namedpair;
die "Bad min '$min' in --range=$range" if $min !~ /^-?[0-9.]+$/;
@@ -1309,15 +1284,17 @@ sub get_featlist_from_file {
while (<$fh>) {
$nr++;
chomp;
- /^(.+) (\S+) (\S+)$/ || die "invalid feature: $_";
- my ($longname, $feature, $value) = ($1, $2, $3);
- next if $value eq "sparse";
- push @errs, "$featlistfn:$nr:Bad initial value of $feature: $value\n"
- if $value !~ /^[+-]?[0-9.\-e]+$/;
- push @errs, "$featlistfn:$nr:Unknown feature '$feature', please add it to \@ABBR_FULL_MAP\n"
- if !defined $ABBR2FULL{$feature};
- push @names, $feature;
- push @startvalues, $value;
+ /^(\S+)= (.+)$/ || die "invalid feature: $_";
+ my ($longname, $valuesStr) = ($1, $2);
+ next if $valuesStr eq "sparse";
+
+ my @values = split(/ /, $valuesStr);
+ foreach my $value (@values) {
+ push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n"
+ if $value !~ /^[+-]?[0-9.\-e]+$/;
+ push @names, $longname;
+ push @startvalues, $value;
+ }
}
close $fh;
@@ -1348,7 +1325,7 @@ sub get_order_of_scores_from_nbestlist {
foreach my $tok (split /\s+/, $scores) {
if ($tok =~ /.+_.+:/) {
$sparse = 1;
- } elsif ($tok =~ /^([a-z][0-9a-z]*):/i) {
+ } elsif ($tok =~ /^([a-z][0-9a-z]*)=/i) {
$label = $1;
} elsif ($tok =~ /^-?[-0-9.\-e]+$/) {
if (!$sparse) {
@@ -1375,6 +1352,13 @@ sub create_config {
my $bleu_achieved = shift; # just for verbosity
my $sparse_weights_file = shift; # only defined when optimizing sparse features
+ for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
+ my $name = $featlist->{"names"}->[$i];
+ my $val = $featlist->{"values"}->[$i];
+ # ensure long name
+ print STDERR "featlist: $name=$val \n";
+ }
+
my %P; # the hash of all parameters we wish to override
# first convert the command line parameters to the hash
@@ -1387,32 +1371,15 @@ sub create_config {
foreach (split(/ /, $___DECODER_FLAGS)) {
if (/^\-([^\d].*)$/) {
$parameter = $1;
- $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
} else {
- die "Found value with no -paramname before it: $_"
+ my $value = $_;
+ die "Found value with no -paramname before it: $value"
if !defined $parameter;
- push @{$P{$parameter}}, $_;
+ push @{$P{$parameter}}, $value;
}
}
}
- # First delete all weights params from the input, we're overwriting them.
- # Delete both short and long-named version.
- for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
- my $name = $featlist->{"names"}->[$i];
- delete($P{$name});
- delete($P{$ABBR2FULL{$name}});
- }
-
- # Convert weights to elements in P
- for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
- my $name = $featlist->{"names"}->[$i];
- my $val = $featlist->{"values"}->[$i];
- $name = defined $ABBR2FULL{$name} ? $ABBR2FULL{$name} : $name;
- # ensure long name
- push @{$P{$name}}, $val;
- }
-
if (defined($sparse_weights_file)) {
push @{$P{"weight-file"}}, File::Spec->catfile($___WORKING_DIR, $sparse_weights_file);
}
@@ -1442,31 +1409,28 @@ sub create_config {
# parameter name
my $parameter = $1;
- $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
- print $out "[$parameter]\n";
-
- # change parameter, if new values
- if (defined($P{$parameter})) {
- # write new values
- foreach (@{$P{$parameter}}) {
- print $out $_ . "\n";
- }
- delete($P{$parameter});
- # skip until new parameter, only write comments
- while ($line = <$ini_fh>) {
- print $out $line if $line =~ /^\#/ || $line =~ /^\s+$/;
- last if $line =~ /^\[/;
- last unless $line;
- }
- next;
- }
- # unchanged parameter, write old
- while ($line = <$ini_fh>) {
- last if $line =~ /^\[/;
- print $out $line;
- }
- }
+ if ($parameter eq "weight") {
+ # leave weights 'til last. We're changing it
+ while ($line = <$ini_fh>) {
+ last if $line =~ /^\[/;
+ }
+ }
+ elsif (defined($P{$parameter})) {
+ # found a param (thread, verbose etc) that we're overriding. Leave to the end
+ while ($line = <$ini_fh>) {
+ last if $line =~ /^\[/;
+ }
+ }
+ else {
+ # unchanged parameter, write old
+ print $out "[$parameter]\n";
+ while ($line = <$ini_fh>) {
+ last if $line =~ /^\[/;
+ print $out $line;
+ }
+ }
+ }
# write all additional parameters
foreach my $parameter (keys %P) {
@@ -1476,6 +1440,26 @@ sub create_config {
}
}
+ # write all weights
+ print $out "[weight]\n";
+
+ my $prevName = "";
+ my $outStr = "";
+ for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
+ my $name = $featlist->{"names"}->[$i];
+ my $val = $featlist->{"values"}->[$i];
+
+ if ($prevName eq $name) {
+ $outStr .= " $val";
+ }
+ else {
+ print $out "$outStr\n";
+ $outStr = "$name= $val";
+ $prevName = $name;
+ }
+ }
+ print $out "$outStr\n";
+
close $ini_fh;
close $out;
print STDERR "Saved: $outfn\n";
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index 680495602..503cb0a46 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -39,7 +39,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
$_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,
@_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,
$_SPARSE_TRANSLATION_TABLE, @_BASELINE_ALIGNMENT_MODEL, $_BASELINE_EXTRACT, $_BASELINE_ALIGNMENT,
- $_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $_LMODEL_OOV_FEATURE, $IGNORE);
+ $_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $_LMODEL_OOV_FEATURE, $_NUM_LATTICE_FEATURES, $IGNORE);
my $_BASELINE_CORPUS = "";
my $_CORES = 1;
@@ -136,6 +136,7 @@ $_HELP = 1
'cores=i' => \$_CORES,
'instance-weights-file=s' => \$_INSTANCE_WEIGHTS_FILE,
'lmodel-oov-feature' => \$_LMODEL_OOV_FEATURE,
+ 'num-lattice-features=i' => \$_NUM_LATTICE_FEATURES,
);
if ($_HELP) {
@@ -2028,6 +2029,14 @@ sub create_ini {
print INI "\n# no generation models, no weight-generation section\n";
}
+ if ($_NUM_LATTICE_FEATURES) {
+ print INI "\n\n#lattice or confusion net weights\n[weight-i]\n";
+ for (1..$_NUM_LATTICE_FEATURES) {
+ print INI "0.1\n";
+ }
+ print "\n";
+ }
+
print INI "\n# word penalty\n[weight-w]\n-1\n\n";
if ($_HIERARCHICAL) {
@@ -2116,3 +2125,4 @@ sub open_or_zcat {
return $hdl;
}
+