Welcome to mirror list, hosted at ThFree Co, Russian Federation.

qsub-wrapper.pl « generic « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 622323bdbd16f0bd9537c0f2e65a7d8a229e3895 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
#!/usr/bin/env perl 

# $Id$
use warnings;
use strict;

#######################
#Default parameters 
#parameters for submiiting processes through SGE
#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
my $queueparameters="";

# look for the correct pwdcmd 
my $pwdcmd = getPwdCmd();

my $workingdir = `$pwdcmd`; chomp $workingdir;
my $tmpdir="$workingdir/tmp$$";
my $jobscript="$workingdir/job$$.sh";
my $qsubout="$workingdir/out.job$$";
my $qsuberr="$workingdir/err.job$$";


$SIG{INT} = \&kill_all_and_quit; # catch exception for CTRL-C

my $help="";
my $dbg="";
my $version="";
my $qsubname="WR$$";
my $cmd="";
my $cmdout=undef;
my $cmderr=undef;
my $parameters="";
my $old_sge = 0; # assume grid engine < 6.0

sub init(){
  use Getopt::Long qw(:config pass_through);
  GetOptions('version'=>\$version,
             'help'=>\$help,
             'debug'=>\$dbg,
             'qsub-prefix=s'=> \$qsubname,
             'command=s'=> \$cmd,
             'stdout=s'=> \$cmdout,
             'stderr=s'=> \$cmderr,
             'queue-parameter=s'=> \$queueparameters,
             'old-sge' => \$old_sge,
            ) or exit(1);
  $parameters="@ARGV";
  
  version() if $version;
  usage() if $help;
  print_parameters() if $dbg;
}

#######################
##print version
sub version(){
#    print STDERR "version 1.0 (29-07-2006)\n";
    print STDERR "version 1.1 (31-07-2006)\n";
    exit(1);
}

#usage
sub usage(){
  print STDERR "qsub-wrapper.pl [options]\n";
  print STDERR "Options:\n";
  print STDERR "-command <file> command to run\n";
  print STDERR "-stdout <file> file to save stdout of cmd (optional)\n";
  print STDERR "-stderr <file> file to save stderr of cmd (optional)\n";
  print STDERR "-qsub-prefix <string> name for sumbitted jobs (optional)\n";
  print STDERR "-queue-parameters <string>  parameter for the queue (optional)\n";
  print STDERR "-old-sge ... assume Sun Grid Engine < 6.0\n";
  print STDERR "-debug debug\n";
  print STDERR "-version print version of the script\n";
  print STDERR "-help this help\n";
  exit(1);
}

#printparameters
sub print_parameters(){
  print STDERR "command: $cmd\n";
  if (defined($cmdout)){ print STDERR "file for stdout: $cmdout\n"; }
  else { print STDERR "file for stdout is not defined, stdout is discarded\n"; }
  if (defined($cmderr)){ print STDERR "file for stdout: $cmderr\n"; }
  else { print STDERR "file for stderr is not defined, stderr is discarded\n"; }
  print STDERR "Qsub name: $qsubname\n";
  print STDERR "Queue parameters: $queueparameters\n";
  print STDERR "parameters directly passed to cmd: $parameters\n";
  exit(1);
}

#script creation
sub preparing_script(){
  my $scriptheader="\#\!/bin/bash\n# the above line is ignored by qsub, unless parameter \"-b yes\" is set!\n\n";
  $scriptheader.="uname -a\n\n";

  $scriptheader.="cd $workingdir\n\n";
    
  open (OUT, "> $jobscript");
  print OUT $scriptheader;

  print OUT "if $cmd $parameters > $tmpdir/cmdout$$ 2> $tmpdir/cmderr$$ ; then
    echo 'succeeded'
else
    echo failed with exit status \$\?
    die=1
fi
";

  if (defined $cmdout){
    print OUT "mv -f $tmpdir/cmdout$$ $cmdout || echo failed to preserve the log: $tmpdir/cmdout$$\n\n";
  }
  else{
    print OUT "rm -f $tmpdir/cmdout$$\n\n";
  }

  if (defined $cmderr){
    print OUT "mv -f $tmpdir/cmderr$$ $cmderr || echo failed to preserve the log: $tmpdir/cmderr$$\n\n";
  }
  else{
    print OUT "rm -f $tmpdir/cmderr$$\n\n";
  }
  print OUT "if [ x\$die == 1 ]; then exit 1; fi\n";
  close(OUT);

  #setting permissions of the script
  chmod(oct(755),$jobscript);
}

#######################
#Script starts here

init();

usage() if $cmd eq "";

safesystem("mkdir -p $tmpdir") or die;

preparing_script();

my $maysync = $old_sge ? "" : "-sync y";

# create the qsubcmd to submit to the queue with the parameter "-b yes"
my $qsubcmd="qsub $queueparameters $maysync -V -o $qsubout -e $qsuberr -N $qsubname -b yes $jobscript > $jobscript.log 2>&1";

#run the qsubcmd 
safesystem($qsubcmd) or die;

#getting id of submitted job
my $res;
open (IN,"$jobscript.log") or die "Can't read main job id: $jobscript.log";
chomp($res=<IN>);
my @arrayStr = split(/\s+/,$res);
my $id=$arrayStr[2];
die "Failed to get job id from $jobscript.log, got: $res"
  if $id !~ /^[0-9]+$/;
close(IN);

print STDERR " res:$res\n";
print STDERR " id:$id\n";

if ($old_sge) {
  # need to workaround -sync, add another job that will wait for the main one
  # prepare a fake waiting script
  my $syncscript = "$jobscript.sync_workaround_script.sh";
  safesystem("echo 'date' > $syncscript") or die;

  my $checkpointfile = "$jobscript.sync_workaround_checkpoint";

  # ensure checkpoint does not exist
  safesystem("\\rm -f $checkpointfile") or die;

  # start the 'hold' job, i.e. the job that will wait
  $cmd="qsub -cwd $queueparameters -hold_jid $id -o $checkpointfile -e /dev/null -N $qsubname.W $syncscript >& $qsubname.W.log";
  safesystem($cmd) or die;
  
  # and wait for checkpoint file to appear
  my $nr=0;
  while (!-e $checkpointfile) {
    sleep(10);
    $nr++;
    print STDERR "w" if $nr % 3 == 0;
  }
  safesystem("\\rm -f $checkpointfile $syncscript") or die();
  print STDERR "End of waiting workaround.\n";
}


my $failure=&check_exit_status();
print STDERR "check_exit_status returned $failure\n";

&kill_all_and_quit() if $failure;

&remove_temporary_files() if !$dbg;

sub check_exit_status(){
  my $failure=0;

  print STDERR "check_exit_status of submitted job $id\n";
  open(IN,"$qsubout") or die "Can't read $qsubout";
  while (<IN>){
    $failure=1 if (/failed with exit status/);
  }
  close(IN);
  return $failure;
}

sub kill_all_and_quit(){
  print STDERR "kill_all_and_quit\n";
  print STDERR "qdel $id\n";
  safesystem("qdel $id");

  print STDERR "The submitted jobs died not correctly\n";
  print STDERR "Send qdel signal to the submitted jobs\n";

  exit(1);
}

sub remove_temporary_files(){
  #removing temporary files

  unlink("${jobscript}");
  unlink("${jobscript}.log");
  unlink("$qsubout");
  unlink("$qsuberr");
  rmdir("$tmpdir");
}

sub safesystem {
  print STDERR "Executing: @_\n";
  system(@_);
  if ($? == -1) {
    print STDERR "Failed to execute: @_\n  $!\n";
    exit(1);
  }
  elsif ($? & 127) {
    printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",
      ($? & 127),  ($? & 128) ? 'with' : 'without';
    exit(1);
  }
  else {
    my $exitcode = $? >> 8;
    print STDERR "Exit code: $exitcode\n" if $exitcode;
    return ! $exitcode;
  }
}

# look for the correct pwdcmd (pwd by default, pawd if it exists)
# I assume that pwd always exists
sub getPwdCmd(){
	my $pwdcmd="pwd";
	my $a;
	chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print $1}'`);
	if ($a && -e $a){	$pwdcmd=$a;	}
	return $pwdcmd;
}