Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphikoehn <pkoehn@inf.ed.ac.uk>2013-06-09 23:00:19 +0400
committerphikoehn <pkoehn@inf.ed.ac.uk>2013-06-09 23:00:19 +0400
commit54f2ea07bdcf52d3e66c3ffb206b09614c2dc1ff (patch)
tree5ab63d24626ca47838a6aa4cf66dc127cd74409c /scripts/training
parent416e3c435b962f98822b6142eb0cc254d68f36dd (diff)
handle sparse features in translation table
Diffstat (limited to 'scripts/training')
-rwxr-xr-xscripts/training/convert-moses-ini-to-v2.perl32
1 files changed, 32 insertions, 0 deletions
diff --git a/scripts/training/convert-moses-ini-to-v2.perl b/scripts/training/convert-moses-ini-to-v2.perl
index 50e2ee78f..01e1c818b 100755
--- a/scripts/training/convert-moses-ini-to-v2.perl
+++ b/scripts/training/convert-moses-ini-to-v2.perl
@@ -13,6 +13,7 @@ my %LM_IMPLEMENTATION = ( 0 => "SRILM",
my (%FEATURE,%WEIGHT);
my $i=0;
+my ($has_sparse_ttable_features,$sparse_weight_file) = (0);
for(; $i<scalar(@INI); $i++) {
my $line = $INI[$i];
@@ -29,6 +30,19 @@ for(; $i<scalar(@INI); $i++) {
$section eq "phrase-length-feature") {
$FEATURE{$section} = &get_data();
}
+ elsif ($section eq "weight-file") {
+ print $header.$line;
+ my $WEIGHT_FILE = &get_data();
+ $sparse_weight_file = $$WEIGHT_FILE[0];
+ $has_sparse_ttable_features = `cat $sparse_weight_file | grep ^stm | wc -l`;
+ if ($has_sparse_ttable_features) {
+ print STDERR "sparse weight feature file has translaton model features\n -> creating new sparse weight file '$sparse_weight_file.new'\n";
+ print "$sparse_weight_file.new\n";
+ }
+ else {
+ print "$sparse_weight_file\n";
+ }
+ }
elsif ($section =~ /weight-(.+)/ && $section ne "weight-file") {
$WEIGHT{$1} = &get_data();
}
@@ -49,6 +63,24 @@ for(; $i<scalar(@INI); $i++) {
}
print $header;
+if ($has_sparse_ttable_features) {
+ open(SPARSE,$sparse_weight_file);
+ open(NEW,">$sparse_weight_file.new");
+ while(<SPARSE>) {
+ if (!/^stm/) {
+ print NEW $_;
+ }
+ else {
+ s/^stm//;
+ for (my $i=0;$i<scalar@{$FEATURE{"ttable-file"}};$i++) {
+ print NEW "TranslationModel$i$_";
+ }
+ }
+ }
+ close(NEW);
+ close(SPARSE);
+}
+
my ($feature,$weight) = ("","");
$feature .= "UnknownWordPenalty\n";
$weight .= "UnknownWordPenalty0= 1\n";