Welcome to mirror list, hosted at ThFree Co, Russian Federation.

binarize4moses2.perl « generic « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 5765c370573a45e46882d5c926c513435bd702f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env perl

use strict;

use Getopt::Long;
use File::Basename;
use FindBin qw($RealBin);

sub systemCheck($);

my $mosesDir = "$RealBin/../..";
my $ptPath;
my $lexRoPath;
my $outPath;
my $numLexScores;
my $pruneNum = 0;
my $scfg = 0;

GetOptions("phrase-table=s"  => \$ptPath,
           "lex-ro=s"   => \$lexRoPath,
           "output-dir=s" => \$outPath,
           "num-lex-scores=i" => \$numLexScores,
           "prune=i" => \$pruneNum,
           "scfg" => \$scfg
	   ) or exit 1;

#print STDERR "scfg=$scfg \n";
die("ERROR: please set --phrase-table") unless defined($ptPath);
#die("ERROR: please set --lex-ro") unless defined($lexRoPath);
die("ERROR: please set --output-dir") unless defined($outPath);
#die("ERROR: please set --num-lex-scores") unless defined($numLexScores);

my $cmd;

my $tempPath = dirname($outPath)  ."/tmp.$$";
`mkdir -p $tempPath`;

$cmd = "gzip -dc $ptPath |  $mosesDir/contrib/sigtest-filter/filter-pt -n $pruneNum | gzip -c > $tempPath/pt.gz";
systemCheck($cmd);

if (defined($lexRoPath)) {
  die("ERROR: please set --num-lex-scores") unless defined($numLexScores);                                            

  $cmd = "$mosesDir/bin/processLexicalTableMin  -in $lexRoPath -out $tempPath/lex-ro -T . -threads all";
  systemCheck($cmd);

  $cmd = "$mosesDir/bin/addLexROtoPT $tempPath/pt.gz $tempPath/lex-ro.minlexr  | gzip -c > $tempPath/pt.withLexRO.gz";
  systemCheck($cmd);

  $cmd = "ln -s pt.withLexRO.gz $tempPath/pt.txt.gz";
  systemCheck($cmd);
}
else {
    $cmd = "ln -s pt.gz $tempPath/pt.txt.gz";
    systemCheck($cmd);
}

$cmd = "$mosesDir/bin/CreateProbingPT2 --log-prob --input-pt $tempPath/pt.txt.gz --output-dir $outPath";

if (defined($lexRoPath)) {
    $cmd .= " --num-lex-scores $numLexScores";
}

if ($scfg) {
    $cmd .= " --scfg";
}

systemCheck($cmd);

exit(0);

#####################################################
sub systemCheck($)
{
  my $cmd = shift;
  print STDERR "Executing: $cmd\n";
  
  my $retVal = system($cmd);
  if ($retVal != 0)
  {
    exit(1);
  }
}