Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOndrej Bojar <bojar@ufal.mff.cuni.cz>2013-05-02 02:33:09 +0400
committerOndrej Bojar <bojar@ufal.mff.cuni.cz>2013-05-02 02:33:09 +0400
commitb57f5a530e152abdb0aaee08afca59a1c67cfd64 (patch)
tree6bbd509ba08799570a3ef2c50ee04c34846de8ec /scripts/training
parent3e2b83444d20f240c45c29bd4d100c581c5fe677 (diff)
allow disabling distortion model binarization
Diffstat (limited to 'scripts/training')
-rwxr-xr-xscripts/training/filter-model-given-input.pl31
1 files changed, 20 insertions, 11 deletions
diff --git a/scripts/training/filter-model-given-input.pl b/scripts/training/filter-model-given-input.pl
index 07d883898..6f2c4500c 100755
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@@ -37,6 +37,7 @@ my $ZCAT = "gzip -cd";
my $opt_hierarchical = 0;
my $tempdir = undef;
my $binarizer = undef;
+my $binarize_distortion_model = 1; # if some binarizer is given!
my $opt_min_non_initial_rule_count = undef;
my $opt_gzip = 1; # gzip output files (so far only phrase-based ttable until someone tests remaining models and formats)
@@ -45,6 +46,7 @@ GetOptions(
"gzip!" => \$opt_gzip,
"Hierarchical" => \$opt_hierarchical,
"Binarizer=s" => \$binarizer,
+ "binarize-distortion-model!" => \$binarize_distortion_model, # (dis)allow (the given) binarizer for distortion models
"MinNonInitialRuleCount=i" => \$opt_min_non_initial_rule_count
) or exit(1);
@@ -158,7 +160,12 @@ while(<INI>) {
$file =~ s/^.*\/+([^\/]+)/$1/g;
my $new_name = "$dir/$file";
- $new_name =~ s/\.gz//;
+ $new_name =~ s/\.gz//;
+ if ($binarizer && $binarize_distortion_model) {
+ # the filename should not include .gz for binarized models
+ } else {
+ $new_name .= ".gz" if $opt_gzip;
+ }
print INI_OUT "$factors $t $w $new_name\n";
push @TABLE_NEW_NAME,$new_name;
@@ -309,17 +316,19 @@ for(my $i=0;$i<=$#TABLE;$i++) {
}
# reordering model
else {
- my $lexbin = $binarizer;
- $lexbin =~ s/PhraseTable/LexicalTable/;
- my $cmd;
- if ($lexbin =~ /processLexicalTableMin/) {
- $cmd = "LC_ALL=C sort -T $tempdir $mid_file > $mid_file.sorted; $lexbin -in $mid_file.sorted -out $new_file; rm $mid_file.sorted";
- } else {
- $lexbin =~ s/^\s*(\S+)\s.+/$1/; # no options
- $cmd = "$lexbin -in $mid_file -out $new_file";
+ if ($binarize_distortion_model) {
+ my $lexbin = $binarizer;
+ $lexbin =~ s/PhraseTable/LexicalTable/;
+ my $cmd;
+ if ($lexbin =~ /processLexicalTableMin/) {
+ $cmd = "LC_ALL=C sort -T $tempdir $mid_file > $mid_file.sorted; $lexbin -in $mid_file.sorted -out $new_file; rm $mid_file.sorted";
+ } else {
+ $lexbin =~ s/^\s*(\S+)\s.+/$1/; # no options
+ $cmd = "$lexbin -in $mid_file -out $new_file";
+ }
+ print STDERR $cmd."\n";
+ print STDERR `$cmd`;
}
- print STDERR $cmd."\n";
- print STDERR `$cmd`;
}
}