Welcome to mirror list, hosted at ThFree Co, Russian Federation.

generic-multicore-parallelizer.perl « support « ems « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: d821aa11454021a674151b6ac6128ee36e91a4bf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env perl
#
# This file is part of moses.  Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

use warnings;
use strict;

my $cores = 8;
my $serial = 1;
my ($infile,$outfile,$cmd,$tmpdir);
my $parent = $$;

use Getopt::Long qw(:config pass_through no_ignore_case);
GetOptions('cores=i' => \$cores,
	   'tmpdir=s' => \$tmpdir,
	   'in=s' => \$infile,
	   'out=s' => \$outfile,
	   'cmd=s' => \$cmd,
     'serial=i' => \$serial
    ) or exit(1);

die("ERROR: specify command with -cmd") unless $cmd;
die("ERROR: specify infile with -in") unless $infile;
die("ERROR: specify outfile with -out") unless $outfile;
die("ERROR: did not find infile '$infile'") unless -e $infile;
die("ERROR: you need to specify a tempdir with -tmpdir") unless $tmpdir;
# set up directory
`mkdir -p $tmpdir`;

# create split input files
my $sentenceN = `cat $infile | wc -l`;
my $splitN = int(($sentenceN+($cores*$serial)-0.5) / ($cores*$serial));
print STDERR "split -a 3 -l $splitN $infile $tmpdir/in-$parent-\n";
`split -a 4 -l $splitN $infile $tmpdir/in-$parent-`;

# find out the names of the processes
my @CORE=`ls $tmpdir/in-$parent-*`;
chomp(@CORE);
grep(s/.+in\-\d+\-([a-z]+)$/$1/e,@CORE);

# create core scripts
for(my $i=0;$i<scalar(@CORE);$i++) {
    my $core = $CORE[$i];
    open(BASH,">$tmpdir/core-$parent-$core.bash") or die "Cannot open: $!";
    print  BASH "#bash\n\n";
#    print  BASH "export PATH=$ENV{PATH}\n\n";
    printf BASH $cmd."\n", "$tmpdir/in-$parent-$core", "$tmpdir/out-$parent-$core";
    for(my $j=2;$j<=$serial;$j++) {
      $core = $CORE[++$i];
      printf BASH $cmd."\n", "$tmpdir/in-$parent-$core", "$tmpdir/out-$parent-$core";
    }
    close(BASH);
}

# fork processes
my (@CHILDREN);
foreach my $core (@CORE){
    next unless -e "$tmpdir/core-$parent-$core.bash";
    my $child = fork();
    if (! $child) { # I am child
	print STDERR "running child $core\n";
	`bash $tmpdir/core-$parent-$core.bash 1> $tmpdir/core-$parent-$core.stdout 2> $tmpdir/core-$parent-$core.stderr`;
	exit 0;
    }
    push @CHILDREN,$child;
    print "adding child $core to children\n";
    sleep(1);
}

print "waiting on children\n";
foreach my $child (@CHILDREN) {
    waitpid( $child, 0 );
}
sleep(1);

# merge outfile
`rm -rf $outfile`;
foreach my $core (@CORE){
    `cat $tmpdir/out-$parent-$core >> $outfile`;
}