Welcome to mirror list, hosted at ThFree Co, Russian Federation.

snt2cooc.pl « scripts « mgizapp - github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 5bbefab3aefdbfd5a65e7b67203e703555da0537 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/perl -w 

# sntcooc.perl [-sort-buffer-size 200M] [-sort-batch-size 253] [-sort-compress gzip] output vcb1 vcb2 snt12 

use strict;
use File::Basename;
use FindBin qw($Bin);

sub systemCheck($);

my $sortArgs = "";
for (my $i = 0; $i < (@ARGV - 4); ++$i)
{
  my $arg = $ARGV[$i];
  if ($arg eq "-sort-buffer-size")
  {
		$sortArgs .= " -S " .$ARGV[++$i];
  }
  elsif ($arg eq "-sort-batch-size")
  {
	  $sortArgs .= " --batch-size " .$ARGV[++$i];
  }
  elsif ($arg eq "-sort-compress")
  {
	  $sortArgs .= " --compress-program " .$ARGV[++$i];
  }
}
					
my $out		= $ARGV[@ARGV - 4];
my $vcb1	= $ARGV[@ARGV - 3];
my $vcb2	= $ARGV[@ARGV - 2];
my $snt12	= $ARGV[@ARGV - 1];

my $SORT_EXEC = `gsort --help 2>/dev/null`; 
if($SORT_EXEC) {
  $SORT_EXEC = 'gsort';
}
else {
  $SORT_EXEC = 'sort';
}

my $TMPDIR=dirname($out);

my $cmd;
$cmd = "$Bin/snt2coocrmp $vcb1 $vcb2 $snt12 ";
$cmd .= "| $SORT_EXEC $sortArgs -T $TMPDIR -nk 1 -nk 2 | uniq > $out";
systemCheck($cmd);

#############################

sub systemCheck($)
{
  my $cmd = shift;
	print STDERR "Executing $cmd \n";
	
  my $retVal = system($cmd);
  if ($retVal != 0)
  {
    exit(1);
  }
}