Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/generic/binarize4moses2.perl')
-rwxr-xr-xscripts/generic/binarize4moses2.perl85
1 files changed, 85 insertions, 0 deletions
diff --git a/scripts/generic/binarize4moses2.perl b/scripts/generic/binarize4moses2.perl
new file mode 100755
index 000000000..a703cc241
--- /dev/null
+++ b/scripts/generic/binarize4moses2.perl
@@ -0,0 +1,85 @@
+#!/usr/bin/env perl
+
+use strict;
+
+use Getopt::Long;
+use File::Basename;
+use FindBin qw($RealBin);
+
+sub systemCheck($);
+
+my $mosesDir = "$RealBin/../..";
+my $ptPath;
+my $lexRoPath;
+my $outPath;
+my $numScores = 4;
+my $numLexScores;
+my $pruneNum = 0;
+my $scfg = 0;
+
+GetOptions("phrase-table=s" => \$ptPath,
+ "lex-ro=s" => \$lexRoPath,
+ "output-dir=s" => \$outPath,
+ "num-scores=s" => \$numScores,
+ "num-lex-scores=i" => \$numLexScores,
+ "prune=i" => \$pruneNum,
+ "scfg" => \$scfg
+ ) or exit 1;
+
+#print STDERR "scfg=$scfg \n";
+die("ERROR: please set --phrase-table") unless defined($ptPath);
+#die("ERROR: please set --lex-ro") unless defined($lexRoPath);
+die("ERROR: please set --output-dir") unless defined($outPath);
+#die("ERROR: please set --num-lex-scores") unless defined($numLexScores);
+
+my $cmd;
+
+my $tempPath = dirname($outPath) ."/tmp.$$";
+`mkdir -p $tempPath`;
+
+$cmd = "gzip -dc $ptPath | $mosesDir/contrib/sigtest-filter/filter-pt -n $pruneNum | gzip -c > $tempPath/pt.gz";
+systemCheck($cmd);
+
+if (defined($lexRoPath)) {
+ die("ERROR: please set --num-lex-scores") unless defined($numLexScores);
+
+ $cmd = "$mosesDir/bin/processLexicalTableMin -in $lexRoPath -out $tempPath/lex-ro -T . -threads all";
+ systemCheck($cmd);
+
+ $cmd = "$mosesDir/bin/addLexROtoPT $tempPath/pt.gz $tempPath/lex-ro.minlexr | gzip -c > $tempPath/pt.withLexRO.gz";
+ systemCheck($cmd);
+
+ $cmd = "ln -s pt.withLexRO.gz $tempPath/pt.txt.gz";
+ systemCheck($cmd);
+}
+else {
+ $cmd = "ln -s pt.gz $tempPath/pt.txt.gz";
+ systemCheck($cmd);
+}
+
+$cmd = "$mosesDir/bin/CreateProbingPT2 --num-scores $numScores --log-prob --input-pt $tempPath/pt.txt.gz --output-dir $outPath";
+
+if (defined($lexRoPath)) {
+ $cmd .= " --num-lex-scores $numLexScores";
+}
+
+if ($scfg) {
+ $cmd .= " --scfg";
+}
+
+systemCheck($cmd);
+
+exit(0);
+
+#####################################################
+sub systemCheck($)
+{
+ my $cmd = shift;
+ print STDERR "Executing: $cmd\n";
+
+ my $retVal = system($cmd);
+ if ($retVal != 0)
+ {
+ exit(1);
+ }
+}