diff options
-rw-r--r-- | mert/Data.cpp | 2 | ||||
-rw-r--r-- | mert/FeatureDataIterator.cpp | 2 | ||||
-rw-r--r-- | mert/FeatureStats.cpp | 4 | ||||
-rw-r--r-- | scripts/ems/experiment.meta | 4 | ||||
-rwxr-xr-x | scripts/ems/experiment.perl | 49 | ||||
-rwxr-xr-x | scripts/ems/support/substitute-filtered-tables.perl | 35 | ||||
-rwxr-xr-x | scripts/training/mert-moses.pl | 21 |
7 files changed, 68 insertions, 49 deletions
diff --git a/mert/Data.cpp b/mert/Data.cpp index 3f91c1376..1efa080a2 100644 --- a/mert/Data.cpp +++ b/mert/Data.cpp @@ -212,7 +212,7 @@ void Data::InitFeatureMap(const string& str) { while (!buf.empty()) { getNextPound(buf, substr); - // string ending with ":" are skipped, because they are the names of the features + // string ending with "=" are skipped, because they are the names of the features if (!EndsWith(substr, "=")) { stringstream ss; ss << tmp_name << "_" << tmp_index << " "; diff --git a/mert/FeatureDataIterator.cpp b/mert/FeatureDataIterator.cpp index a22112bb4..471da07ee 100644 --- a/mert/FeatureDataIterator.cpp +++ b/mert/FeatureDataIterator.cpp @@ -89,7 +89,7 @@ void FeatureDataIterator::readNext() { StringPiece line = m_in->ReadLine(); m_next.push_back(FeatureDataItem()); for (TokenIter<AnyCharacter, true> token(line, AnyCharacter(" \t")); token; ++token) { - TokenIter<AnyCharacterLast,false> value(*token,AnyCharacterLast(":")); + TokenIter<AnyCharacterLast,false> value(*token,AnyCharacterLast("=")); if (!value) throw FileFormatException(m_in->FileName(), line.as_string()); StringPiece first = *value; ++value; diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp index 22f62e234..242d3fbd0 100644 --- a/mert/FeatureStats.cpp +++ b/mert/FeatureStats.cpp @@ -220,12 +220,12 @@ void FeatureStats::set(string &theString, const SparseVector& sparseWeights ) while (!theString.empty()) { getNextPound(theString, substring); // regular feature - if (substring.find(":") == string::npos) { + if (substring.find("=") == string::npos) { add(ConvertStringToFeatureStatsType(substring)); } // sparse feature else { - size_t separator = substring.find_last_of(":"); + size_t separator = substring.find_last_of("="); addSparse(substring.substr(0,separator), atof(substring.substr(separator+1).c_str()) ); } } diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index c82195b1a..d10bf2dbe 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -844,13 +844,13 @@ apply-filter default-name: tuning/moses.filtered.ini pass-if: TRAINING:binarize-all ignore-if: use-hiero - template: cp IN1/moses.ini OUT + template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT apply-filter-devtest in: TRAINING:config filtered-dir-devtest out: filtered-config-devtest default-name: tuning/moses.filtered.devtest.ini pass-if: TRAINING:binarize-all - ignore-unless: use-mira + ignore-unless: use-mira template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT tune in: filtered-config input reference filtered-config-devtest input-devtest reference-devtest diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl index b4ebe161a..a4d94b00e 100755 --- a/scripts/ems/experiment.perl +++ b/scripts/ems/experiment.perl @@ -2100,15 +2100,12 @@ sub define_training_sigtest_filter { &create_step($step_id,$cmd); } -sub define_training_create_config { - my ($step_id) = @_; - - my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM) - = &get_output_and_input($step_id); +sub get_config_tables { + my ($config,$reordering_table,$phrase_translation_table,$generation_table,$domains) = @_; my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir"); - #my $cmd = "$moses_src_dir/bin/create-ini "; my $cmd = &backoff_and_get("TRAINING:create-ini"); + $cmd = "$moses_src_dir/bin/create-ini" unless defined($cmd); my %IN; my %OUT; @@ -2188,8 +2185,29 @@ sub define_training_create_config { my $unknown_word_label = &versionize(&long_file_name("unknown-word-label","model",""),$extract_version); $cmd .= "-unknown-word-label $unknown_word_label "; } + my $additional_ini = &get("TRAINING:additional-ini"); + if (&get("TRAINING:score-settings") && + &get("TRAINING:score-settings") =~ /SparseCountBinFeature/) { + $additional_ini .= "<br>[report-sparse-features]<br>stm<br><br>"; + $cmd .= "-sparse-translation-table "; + } + $cmd .= "-additional-ini '$additional_ini' " if defined($additional_ini); + $cmd .= &define_domain_feature_score_option($domains) if &get("TRAINING:domain-features"); + + return $cmd; +} + +sub define_training_create_config { + my ($step_id) = @_; + + my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM) + = &get_output_and_input($step_id); + + my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains); + + # sparse lexical features provide additional content for config file + $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features; - # find out which language model files have been built my @LM_SETS = &get_sets("LM"); my %INTERPOLATED_AWAY; my %OUTPUT_FACTORS; @@ -2257,18 +2275,6 @@ sub define_training_create_config { $cmd .= "-lm $factor:$order:$lm_file:$type "; } - my $additional_ini = &get("TRAINING:additional-ini"); - if (&get("TRAINING:score-settings") && - &get("TRAINING:score-settings") =~ /SparseCountBinFeature/) { - $additional_ini .= "<br>[report-sparse-features]<br>stm<br><br>"; - $cmd .= "-sparse-translation-table "; - } - $cmd .= "-additional-ini '$additional_ini' " if defined($additional_ini); - - # sparse lexical features provide additional content for config file - $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features; - $cmd .= &define_domain_feature_score_option($domains) if &get("TRAINING:domain-features"); - &create_step($step_id,$cmd); } @@ -2608,9 +2614,12 @@ sub define_tuningevaluation_filter { # create pseudo-config file else { $config = $tuning_flag ? "$dir/tuning/moses.table.ini.$VERSION" : "$dir/evaluation/$set.moses.table.ini.$VERSION"; + $cmd = "touch $config\n"; $delete_config = 1; - $cmd = "cp $dir/model/moses.ini.$VERSION $config \n"; + $cmd .= &get_config_tables($config,$reordering_table,$phrase_translation_table,undef,$domains); + + $cmd .= "-lm 0:3:$config:8\n"; # dummy kenlm 3-gram model on factor 0 } # filter command diff --git a/scripts/ems/support/substitute-filtered-tables.perl b/scripts/ems/support/substitute-filtered-tables.perl index 530130aa8..3efb243d7 100755 --- a/scripts/ems/support/substitute-filtered-tables.perl +++ b/scripts/ems/support/substitute-filtered-tables.perl @@ -10,30 +10,39 @@ if (scalar @ARGV < 1 || ! -e $ARGV[0]) { # read config sections about filtered tables my @arr; open(FILTERED, $ARGV[0]) or die "Cannot open: $!"; +my $feature_section = 0; while(my $line = <FILTERED>) { chomp($line); - if ($line =~ /PhraseModel /) { - print STDERR "pt:$line \n"; - push(@arr, $line); + if ($line =~ /^\[(.+)\]/) { + $feature_section = ($1 eq "feature"); + } + next unless $feature_section; + if ($line =~ /PhraseDictionary/) { + print STDERR "pt:$line \n"; + push(@arr, $line); + } + elsif ($line =~ /LexicalReordering/) { + print STDERR "ro:$line \n"; + push(@arr, $line); } - elsif ($line =~ /LexicalReordering /) { - print STDERR "ro:$line \n"; - push(@arr, $line); - } } close(FILTERED); # pass through master config file and replace table sections my $ind = 0; +$feature_section = 0; while(my $line = <STDIN>) { chomp($line); - if ($line =~ /PhraseModel /) { - print $arr[$ind]."\n"; - ++$ind; + if ($line =~ /^\[(.+)\]/) { + $feature_section = ($1 eq "feature"); + } + if ($feature_section && $line =~ /PhraseDictionary/) { + print $arr[$ind]."\n"; + ++$ind; } - elsif ($line =~ /LexicalReordering /) { - print $arr[$ind]."\n"; - ++$ind; + elsif ($feature_section && $line =~ /LexicalReordering/) { + print $arr[$ind]."\n"; + ++$ind; } else { print "$line\n"; diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl index f4797f868..e9f46de26 100755 --- a/scripts/training/mert-moses.pl +++ b/scripts/training/mert-moses.pl @@ -1284,16 +1284,17 @@ sub get_featlist_from_file { while (<$fh>) { $nr++; chomp; - /^(\S+)= (.+)$/; - my ($longname, $valuesStr) = ($1, $2); - next if (!defined($valuesStr)); + if (/^(\S+)= (.+)$/) { # only for feature functions with dense features + my ($longname, $valuesStr) = ($1, $2); + next if (!defined($valuesStr)); - my @values = split(/ /, $valuesStr); - foreach my $value (@values) { - push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n" - if $value !~ /^[+-]?[0-9.\-e]+$/; - push @names, $longname; - push @startvalues, $value; + my @values = split(/ /, $valuesStr); + foreach my $value (@values) { + push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n" + if $value !~ /^[+-]?[0-9.\-e]+$/; + push @names, $longname; + push @startvalues, $value; + } } } close $fh; @@ -1323,7 +1324,7 @@ sub get_order_of_scores_from_nbestlist { my $label = undef; my $sparse = 0; # we ignore sparse features here foreach my $tok (split /\s+/, $scores) { - if ($tok =~ /.+_.+:/) { + if ($tok =~ /.+_.+=/) { $sparse = 1; } elsif ($tok =~ /^([a-z][0-9a-z]*)=/i) { $label = $1; |