diff options
author | alvations <alvations@gmail.com> | 2015-04-26 21:25:15 +0300 |
---|---|---|
committer | alvations <alvations@gmail.com> | 2015-04-26 21:25:15 +0300 |
commit | c01b0a6262fbf92e1908ebc31c5f0894c489a2cf (patch) | |
tree | 2e16071917f6cd991c42c363dfa6240110ea34a7 /scripts | |
parent | dda3ddd80b5ff81879a4cd640a8279aad1de6004 (diff) |
merging the filter-model-given-input.pl with alvations-master branch
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/training/filter-model-given-input.pl | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/scripts/training/filter-model-given-input.pl b/scripts/training/filter-model-given-input.pl index 7dec0762c..84dbbe879 100755 --- a/scripts/training/filter-model-given-input.pl +++ b/scripts/training/filter-model-given-input.pl @@ -1,4 +1,4 @@ -#!/usr/bin/env perl +#!/usr/bin/perl -w # $Id$ # Given a moses.ini file and an input text prepare minimized translation @@ -8,7 +8,6 @@ # changes by Ondrej Bojar # adapted for hierarchical models by Phil Williams -use warnings; use strict; use FindBin qw($RealBin); @@ -37,6 +36,7 @@ my $ZCAT = "gzip -cd"; # get optional parameters my $opt_hierarchical = 0; my $binarizer = undef; +my $threads = 1; # Default is single-thread, i.e. $threads=1 my $syntax_filter_cmd = "$SCRIPTS_ROOTDIR/../bin/filter-rule-table hierarchical"; my $min_score = undef; my $opt_min_non_initial_rule_count = undef; @@ -54,6 +54,7 @@ GetOptions( "SyntaxFilterCmd=s" => \$syntax_filter_cmd, "tempdir=s" => \$tempdir, "MinScore=s" => \$min_score, + "threads=i" => \$threads, "MinNonInitialRuleCount=i" => \$opt_min_non_initial_rule_count, # DEPRECATED ) or exit(1); @@ -63,7 +64,7 @@ my $config = shift; my $input = shift; if (!defined $dir || !defined $config || !defined $input) { - print STDERR "usage: filter-model-given-input.pl targetdir moses.ini input.text [-Binarizer binarizer] [-Hierarchical] [-MinScore id:threshold[,id:threshold]*] [-SyntaxFilterCmd cmd]\n"; + print STDERR "usage: filter-model-given-input.pl targetdir moses.ini input.text [-Binarizer binarizer] [-Hierarchical] [-MinScore id:threshold[,id:threshold]*] [-SyntaxFilterCmd cmd] [-threads num]\n"; exit 1; } $dir = ensure_full_path($dir); @@ -405,7 +406,8 @@ for(my $i=0;$i<=$#TABLE;$i++) { # ... phrase translation model elsif ($binarizer =~ /processPhraseTableMin/) { #compact phrase table - my $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $binarizer -in $mid_file.sorted -out $new_file -nscores $TABLE_WEIGHTS[$i] && rm $mid_file.sorted"; + ##my $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $binarizer -in $mid_file.sorted -out $new_file -nscores $TABLE_WEIGHTS[$i] -threads $threads && rm $mid_file.sorted"; + my $cmd = "$binarizer -in <($catcmd $mid_file | LC_ALL=C sort -T $tempdir) -out $new_file -nscores $TABLE_WEIGHTS[$i] -threads $threads -encoding None"; safesystem($cmd) or die "Can't binarize"; } elsif ($binarizer =~ /CreateOnDiskPt/) { my $cmd = "$binarizer $mid_file $new_file.bin"; @@ -426,7 +428,7 @@ for(my $i=0;$i<=$#TABLE;$i++) { $lexbin =~ s/PhraseTable/LexicalTable/; my $cmd; if ($lexbin =~ /processLexicalTableMin/) { - $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $lexbin -in $mid_file.sorted -out $new_file && rm $mid_file.sorted"; + $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $lexbin -in $mid_file.sorted -out $new_file -threads $threads && rm $mid_file.sorted"; } else { $lexbin =~ s/^\s*(\S+)\s.+/$1/; # no options $cmd = "$lexbin -in $mid_file -out $new_file"; |