Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoralvations <alvations@gmail.com>2015-04-26 21:25:15 +0300
committeralvations <alvations@gmail.com>2015-04-26 21:25:15 +0300
commitc01b0a6262fbf92e1908ebc31c5f0894c489a2cf (patch)
tree2e16071917f6cd991c42c363dfa6240110ea34a7 /scripts
parentdda3ddd80b5ff81879a4cd640a8279aad1de6004 (diff)
merging the filter-model-given-input.pl with alvations-master branch
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/training/filter-model-given-input.pl12
1 files changed, 7 insertions, 5 deletions
diff --git a/scripts/training/filter-model-given-input.pl b/scripts/training/filter-model-given-input.pl
index 7dec0762c..84dbbe879 100755
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/env perl
+#!/usr/bin/perl -w
# $Id$
# Given a moses.ini file and an input text prepare minimized translation
@@ -8,7 +8,6 @@
# changes by Ondrej Bojar
# adapted for hierarchical models by Phil Williams
-use warnings;
use strict;
use FindBin qw($RealBin);
@@ -37,6 +36,7 @@ my $ZCAT = "gzip -cd";
# get optional parameters
my $opt_hierarchical = 0;
my $binarizer = undef;
+my $threads = 1; # Default is single-thread, i.e. $threads=1
my $syntax_filter_cmd = "$SCRIPTS_ROOTDIR/../bin/filter-rule-table hierarchical";
my $min_score = undef;
my $opt_min_non_initial_rule_count = undef;
@@ -54,6 +54,7 @@ GetOptions(
"SyntaxFilterCmd=s" => \$syntax_filter_cmd,
"tempdir=s" => \$tempdir,
"MinScore=s" => \$min_score,
+ "threads=i" => \$threads,
"MinNonInitialRuleCount=i" => \$opt_min_non_initial_rule_count, # DEPRECATED
) or exit(1);
@@ -63,7 +64,7 @@ my $config = shift;
my $input = shift;
if (!defined $dir || !defined $config || !defined $input) {
- print STDERR "usage: filter-model-given-input.pl targetdir moses.ini input.text [-Binarizer binarizer] [-Hierarchical] [-MinScore id:threshold[,id:threshold]*] [-SyntaxFilterCmd cmd]\n";
+ print STDERR "usage: filter-model-given-input.pl targetdir moses.ini input.text [-Binarizer binarizer] [-Hierarchical] [-MinScore id:threshold[,id:threshold]*] [-SyntaxFilterCmd cmd] [-threads num]\n";
exit 1;
}
$dir = ensure_full_path($dir);
@@ -405,7 +406,8 @@ for(my $i=0;$i<=$#TABLE;$i++) {
# ... phrase translation model
elsif ($binarizer =~ /processPhraseTableMin/) {
#compact phrase table
- my $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $binarizer -in $mid_file.sorted -out $new_file -nscores $TABLE_WEIGHTS[$i] && rm $mid_file.sorted";
+ ##my $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $binarizer -in $mid_file.sorted -out $new_file -nscores $TABLE_WEIGHTS[$i] -threads $threads && rm $mid_file.sorted";
+ my $cmd = "$binarizer -in <($catcmd $mid_file | LC_ALL=C sort -T $tempdir) -out $new_file -nscores $TABLE_WEIGHTS[$i] -threads $threads -encoding None";
safesystem($cmd) or die "Can't binarize";
} elsif ($binarizer =~ /CreateOnDiskPt/) {
my $cmd = "$binarizer $mid_file $new_file.bin";
@@ -426,7 +428,7 @@ for(my $i=0;$i<=$#TABLE;$i++) {
$lexbin =~ s/PhraseTable/LexicalTable/;
my $cmd;
if ($lexbin =~ /processLexicalTableMin/) {
- $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $lexbin -in $mid_file.sorted -out $new_file && rm $mid_file.sorted";
+ $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $lexbin -in $mid_file.sorted -out $new_file -threads $threads && rm $mid_file.sorted";
} else {
$lexbin =~ s/^\s*(\S+)\s.+/$1/; # no options
$cmd = "$lexbin -in $mid_file -out $new_file";