Welcome to mirror list, hosted at ThFree Co, Russian Federation.

generic-parallel.perl « generic « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 653912c5c7b7788154c20f9929a3c735acbe37fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env perl 

use warnings;
use strict;
use utf8;

binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";

sub NumStr($);

my $NUM_SPLIT_LINES = $ARGV[0];

my $TMPDIR = $ARGV[1];
$TMPDIR = "$TMPDIR/tmp.$$";
mkdir $TMPDIR;
print STDERR "TMPDIR=$TMPDIR \n";

my $cmd = "";
for (my $i = 2; $i < scalar(@ARGV); ++$i)
{
  $cmd .= $ARGV[$i] ." ";
}

# split input file
open (INPUT_ALL, "> $TMPDIR/input.all");
binmode INPUT_ALL, ":utf8";
while (my $line = <STDIN>)
{ 
  chomp($line);
  print INPUT_ALL $line."\n";
}
close(INPUT_ALL);

my $cmd2 = "split -l $NUM_SPLIT_LINES -a 5 -d  $TMPDIR/input.all $TMPDIR/x";
`$cmd2`;

# create exec file
open (EXEC, "> $TMPDIR/exec");
binmode EXEC, ":utf8";

# execute in parallel
print STDERR "executing\n";

my $i = 0;
my $filePath = "$TMPDIR/x" .NumStr($i);
while (-f $filePath) 
{
  print EXEC "$cmd < $filePath > $filePath.out\n";

  ++$i;
  $filePath = "$TMPDIR/x" .NumStr($i);
}
close (EXEC);

$cmd2 = "parallel < $TMPDIR/exec";
`$cmd2`;

# concatenate
print STDERR "concatenating\n";

$i = 1;
my $firstPath = "$TMPDIR/x" .NumStr(0) .".out";
$filePath = "$TMPDIR/x" .NumStr($i) .".out";
while (-f $filePath) 
{
  $cmd = "cat $filePath >> $firstPath";
  `$cmd`;

  ++$i;
  $filePath = "$TMPDIR/x" .NumStr($i) .".out";
}

# output
open (OUTPUT_ALL, "$firstPath");
binmode OUTPUT_ALL, ":utf8";
while (my $line = <OUTPUT_ALL>)
{ 
  chomp($line);
  print "$line\n";
}
close(OUTPUT_ALL);

$cmd = "rm -rf $TMPDIR/";
`$cmd`;

###########################################
sub NumStr($)
{
    my $i = shift;
    my $numStr;
    if ($i < 10) {
	$numStr = "000000$i";
    }
    elsif ($i < 100) {
	$numStr = "00000$i";
    }
    elsif ($i < 1000) {
	$numStr = "0000$i";
    }
    elsif ($i < 10000) {
	$numStr = "000$i";
    }
    elsif ($i < 100000) {
	$numStr = "00$i";
    }
    elsif ($i < 1000000) {
	$numStr = "0$i";
    }
    else {
	$numStr = $i;
    }
    return $numStr;
}