scripts/babysitter


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409

#!/usr/bin/env python

# Mimics GNU timeout, but does some fancy tracking based on custom features in mono nunit24.

import argparse
import subprocess
import re
import signal
import time
import sys
import os.path
import copy
import tempfile
import calendar
import json

### Constants

# Here is how the communication with nunit works. It has to work with two constraints:
# - We don't invoke nunit directly. We invoke some Makefile which invokes some other Makefile
#   and at some point down the line someone calls nunit.
# - nunit has to be able to report back to us even if (especially if) it terminates improperly.
# To deal with all this, communication babysitter->nunit is done by environment variables,
# and communication nunit->babysitter is done by leaving behind files in known locations.

# Filenames

CURRENT_TEST_FILE = "babysitter_report_current_test_file.txt"
RAN_TEST_FILE     = "babysitter_report_ran_test_file.txt"
FAILED_TEST_FILE  = "babysitter_report_failed_test_file.txt"
LOGGING_FILE      = "babysitter_report.json_lines"

# Environment keys

# Keys used for Babysitter<->Nunit IPC
CURRENT_TEST_KEY  = 'MONO_BABYSITTER_NUNIT_CURRENT_TEST_FILE' # Tell nunit where to leave files
RAN_TEST_KEY      = 'MONO_BABYSITTER_NUNIT_RAN_TEST_FILE'
FAILED_TEST_KEY   = 'MONO_BABYSITTER_NUNIT_FAILED_TEST_FILE'
RUN_KEY           = 'MONO_BABYSITTER_NUNIT_RUN_TEST' # Semicolon-separated list of test names
RUN_MODE_KEY      = 'MONO_BABYSITTER_NUNIT_RUN_MODE' # Equal to either RUN or AFTER

# Keys used for script configuration (see --help text)
LOG_FILE_KEY      = 'MONO_BABYSITTER_LOG_FILE'       # Path
RETRY_KEY         = 'MONO_BABYSITTER_RETRY'          # Equal to an integer
VERBOSE_KEY       = 'MONO_BABYSITTER_VERBOSE'        # "Undocumented"-- used for debugging babysitter

# JSON keys

DATE_JSON       = 'date'          # POSIX timestamp of test suite run
INVOKE_JSON     = 'invocation'
COUNT_JSON      = 'iteration'        # How many times was command executed?
LIMIT_JSON      = 'failure_max'
SUPPORT_JSON    = 'babysitter_protocol' # Was the test suite running with a babysitter-aware nunit?
FINAL_CODE_JSON = 'final_code'
TESTS_JSON      = 'tests'         # Holds dictionary of (test case name)->(dict with TEST_ keys below)
TEST_FAILURES         = 'normal_failures'
TEST_CRASH_FAILURES   = 'crash_failures'
TEST_TIMEOUT_FAILURES = 'timeout_failures'

### Interpret arguments

scriptname = sys.argv[0]

# This is very silly: This program needs to switch from argparse to manual parsing
# after the second positional argument, but argparse doesn't let you do this.
# I work around this with a custom subclass which can selectively swallow errors:
class Hesitate(Exception):
	pass
class HesitantParser(argparse.ArgumentParser):
	def __init__(s, *args, **kwargs):
		s.hesitating = True  # Errors will be swallowed until this is set false
		argparse.ArgumentParser.__init__(s, *args, **kwargs)
	def error(s, *args, **kwargs):
		if s.hesitating:
			raise Hesitate() # Bail out before errors are printed.
		argparse.ArgumentParser.error(s, *args, **kwargs)

# Define args
argparser = HesitantParser(description="""\
Run a test suite with a timeout.\n
Durations are floating point numbers followed by an optional unit:\n
's' for seconds (the default)
'm' for minutes
'h' for hours
'd' for days\n
supported environment variables:
  %s: File to write logs to (as line-delimited JSON)
  %s: If set to a number, failed test cases will be rerun this many times (NUnit test suites only)""" %
		(LOG_FILE_KEY, RETRY_KEY),
	formatter_class=argparse.RawTextHelpFormatter)
argparser.add_argument('-s', '--signal', dest='signal', metavar='signal', default='TERM',
	help="Send this signal to the command on timeout, instead of TERM.")
argparser.add_argument('-k', '--kill-after-duration', dest='kill_after', metavar='duration',
	help="If process continues running after signal, send KILL after this duration.")
argparser.add_argument('duration',
	help="Time to run before sending signal.")
argparser.add_argument('command', nargs="+", help="Command+args to run.")

# Repeatedly parse the given args until we find the shortest prefix for which parsing succeeds.
argc = len(sys.argv)
extra_args = []
for limit in range(1,argc+1):
	try:
		if limit == argc: # On the final pass, parse for real
			argparser.hesitating = False
		args = argparser.parse_args(sys.argv[1:limit])
		# If we're still here, parse_args succeeded.
		# The final parsed arg is the command; remaining argv items are its args.
		extra_args = sys.argv[limit:]
		break
	except Hesitate: # Before the final pass, if parse_args fails, skip
		pass

argparser.hesitating = False # Just act like a normal argparser from here

durationre = re.compile(r'(\d+)([smhd]?)')
def parse_duration(duration): # Accept units
	match = durationre.match(duration)
	if not match:
		argparser.error("Could not understand duration %s" % duration)
	time, units = match.group(1), match.group(2)
	time = int(time)
	if units == 'm':
		time *= 60
	elif units == 'h':
		time *= 60*60
	elif units == 'd':
		time *= 60*60*24
	return time

def parse_signal(sig):        # Accept names
	if sig.isdigit():
		return int(sig)
	for k,v in signal.__dict__.iteritems():
		if k == ("SIG%s" % sig):
			return v
	argparser.error("Could not understand signal name %s" % sig)

# Final interpretation of arguments
duration = parse_duration(args.duration)
kill_after = parse_duration(args.kill_after) if args.kill_after is not None else None
timeout_signal = parse_signal(args.signal)
command = args.command + extra_args

# Process environment
global_env = copy.deepcopy( os.environ )

verbose = VERBOSE_KEY in global_env
logging = LOG_FILE_KEY in global_env
logfile = global_env[LOG_FILE_KEY] if logging else None
crash_resuming = True # TODO: Consider exposing this option, or adding a retry_on_crash option.
failmax = int(global_env[RETRY_KEY]) if RETRY_KEY in global_env else 0
babysitting = True # If false, babysitter becomes a timeout clone with no env manipulation or anything.
if babysitting:
	babysitter_dir = tempfile.mkdtemp()
	global_env[CURRENT_TEST_KEY] = os.path.join(babysitter_dir, CURRENT_TEST_FILE)
	global_env[RAN_TEST_KEY]     = os.path.join(babysitter_dir, RAN_TEST_FILE)
	global_env[FAILED_TEST_KEY]  = os.path.join(babysitter_dir, FAILED_TEST_FILE)

have_unix_process_groups = 'killpg' in os.__dict__
have_windows_process_groups = 'CREATE_NEW_PROCESS_GROUP' in subprocess.__dict__

### Timeout implementation

def wait(proc, duration):
	# TODO: If we detect Python 3.3, Popen objects have a wait(timeout) method we can use
	start = time.time()
	while True:
		code = proc.poll()
		if code is not None:
			return code
		if time.time()-start > duration:
			return None
		time.sleep(0.05)

# Popen and send_signal can't be called in their basic forms because we want to
# send signals to all children, not just to the immediately spawned process.
# Unfortunately the way to do this varies by operating system.
def popen(*args, **kwargs):
	if have_unix_process_groups: # Call function on spawn to become process group leader
		kwargs['preexec_fn'] = os.setsid
	elif have_windows_process_groups: # Set magic flag for Windows process groups
		kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
	return subprocess.Popen(*args, **kwargs)

def send_signal(proc, sig):
	if have_unix_process_groups: # UNIX
		# For compatibility with GNU timeout, pre-send the signal to just the monitored process
		os.kill(proc.pid, sig)
		# Send signal to entire group
		os.killpg(proc.pid, sig)
		# For compatibility with GNU Timeout, send a SIGCONT after the signal
		# (so delivery has a chance to occur even for stopped processes)
		if sig != signal.SIGKILL and sig != signal.SIGCONT:
			os.kill(proc.pid, signal.SIGCONT)
	elif have_windows_process_groups: # Windows with Python 2.7 or better
		os.kill(proc.pid, sig) # Becuase CREATE_NEW_PROCESS_GROUP, will go to entire group
	else: # Windows with Python 2.6-- CREATE_NEW_PROCESS_GROUP not supported
		proc.send_signal(sig) # No way to contact group, just kill process

### Utility functions

def attemptDelete(path):
	try:
		os.remove(path)
	except OSError:
		pass

def attemptLines(path):
	try:
		with open(path) as f:
			return map(lambda s: s.strip('\r\n'), f.readlines())
	except (OSError, IOError):
		return []

def attemptFirstLine(path):
	lines = attemptLines(path)
	if lines:
		return lines[0]
	return None

def posixtime(): # Amazingly, time.time() does not guarantee an epoch in the docs. However this does:
	return calendar.timegm(time.gmtime())

failcount = {}
def failure_may_retry(test):
	if test not in failcount:
		failcount[test] = 0
	failcount[test] += 1
	return failcount[test] < failmax

def verbose_print(arg):
	if (verbose):
		print(arg)

def failure_annotate(test):
	return "%s (failure #%d of %d allowed)" % (test, failcount[test], failmax)

def pluralize(lst):
	return "s" if len(lst) > 1 else ""

### Run command

def run(): # Returns exit code
	resume_after = []
	retry_these = []
	ever_completed = False
	died_politely = False
	proc = None
	code = None

	# Set up logging
	log = {DATE_JSON: posixtime(), COUNT_JSON:0, LIMIT_JSON:failmax, SUPPORT_JSON:False,
		INVOKE_JSON: " ".join(command)}

	def log_value(key, set=None, add=None, target=log): # Call to add toplevel value to log
		if add is not None:
			if key not in target:
				target[key] = 0
			target[key] += add
		else:
			target[key] = set

	def log_test(testname, key, set=None, add=None):   # Call to add test-case-level value to log
		if TESTS_JSON not in log:
			log[TESTS_JSON] = {}
		if testname not in log[TESTS_JSON]:
			log[TESTS_JSON][testname] = {}
		log_value(key, set=set, add=add, target=log[TESTS_JSON][testname])

	# Ready to run tests
	try:
		while True:
			env = copy.copy(global_env)
			if ever_completed:
				retry_next = []
			else: # Persist reported failures list until first non-crash run
				retry_next = retry_these

			log_value(COUNT_JSON, add=1)

			# Prepare environment/filesystem
			if babysitting:
				for key in [CURRENT_TEST_KEY, RAN_TEST_KEY, FAILED_TEST_KEY]:
					attemptDelete(env[key])
				if resume_after:
					env[RUN_KEY] = ";".join(resume_after)
					env[RUN_MODE_KEY] = "EXCLUDE"
				elif retry_these:
					env[RUN_KEY] = ";".join(retry_these)
					env[RUN_MODE_KEY] = "RUN"

			# Run test suite
			try:
				proc = popen(command, env=env)
			except OSError:
				died_politely = True
				sys.stderr.write("%s: Could not execute command `%s`\n" % (scriptname, command[0]))
				sys.exit(127)

			code = wait(proc, duration)
			timed_out = code is None
			if timed_out:                  # Initial timeout
				send_signal(proc, timeout_signal)
				if kill_after is not None: # Kill-after timeout
					code = wait(proc, kill_after)
					if code is None:
						send_signal(proc, signal.SIGKILL)
				code = proc.wait()         # Waits forever
				sys.stderr.write("%s: Command `%s` timed out\n" % (scriptname, command[0]))
			died_politely = True

			# The test suite has now run, and what happens next varies:
			# 1. The suite either completed fully without failures, or timed out: Just quit.
			# 2. The suite crashed (halted without completing):
			#   Remember any failures for later and rerun, using a blacklist of testcases we have completed.
			# 3. The suite completed, but there were failures reported:
			#   Rerun, using a whitelist of only reported-failed testcases.
			# 4. The suite crashed partway through a run with a whitelist:
			#   Rerun, using a whitelist consisting of the previous whitelist minus successful testcases.

			crashed_at = attemptFirstLine(env[CURRENT_TEST_KEY])
			failed_tests = attemptLines(env[FAILED_TEST_KEY])
			ran_tests = attemptLines(env[RAN_TEST_KEY])
			bailout = False

			if crashed_at or failed_tests or ran_tests: # Test suite follows the babysitter protocol
				log_value(SUPPORT_JSON, True)

			if not crashed_at and not ever_completed:  # The resume_after whitelist is only
				resume_after = []                      # used before the first noncrashing run
				ever_completed = True

			if timed_out:       # Currently no retries after timeout
				bailout = True
				code = 124      # See GNU timeout manpage

			if code or crashed_at: # Process failures
				# Handle crash failures
				if crashed_at and not timed_out:
					log_test(crashed_at, TEST_CRASH_FAILURES, add=1)
					if not crash_resuming:
						bailout = True

					if failure_may_retry(crashed_at):
						if ever_completed:           # Rerun with whitelist next time
							for test in retry_these: # Prepopulate with last whitelist minus run testcases
								if test == crashed_at or test not in ran_tests: # (plus crashed testcase)
									retry_next.append(test)
						else:                            # Rerun with blacklist next time
							for test in ran_tests:       # Add testcases we just ran to blacklist
								if test != crashed_at:   # (except for the crashed testcase)
									resume_after.append(test)
					else:
						bailout = True

				# Handle reported failures
				for test in failed_tests:
					log_test(test, TEST_FAILURES, add=1)
					if failure_may_retry(test):
						retry_next.append(test)
					else:
						bailout = True

				# Report human-readable failures for stdout log.
				message = "%s:" % (scriptname)
				if timed_out:
					message += " Saw timeout in test case %s (never allowed)." % (crashed_at)
				elif crashed_at:
					message += " Saw crash in test case %s." % (failure_annotate(crashed_at))
				if failed_tests:
					message += " Saw test failure in test case%s %s." % (pluralize(failed_tests), "; ".join(map(failure_annotate, failed_tests)))
				if not (timed_out or crashed_at or failed_tests):
					message += " Test suite terminated with code %d, " % (code)
					if log[SUPPORT_JSON]:
						message += "but failure did not occur during a test case. Halting."
					else:
						message += "and suite cannot report test case data. Halting."
				elif bailout:
					message += " Will halt testing."
				print(message)

			if bailout or not (resume_after or retry_next): # If not retrying
				return code

			# If we got here, a retry is occurring
			retry_these = retry_next

			# Report human-readable retry notice for stdout log.
			message = "%s: Will rerun test suite" % (scriptname)
			if log[COUNT_JSON] > 1:
				message += " (iteration #%d)" % (log[COUNT_JSON])
			if resume_after:
				message += ", resuming at crashed testcase %s." % (crashed_at)
			else:
				message += ", running previously failed testcase%s: %s." % (pluralize(retry_these), "; ".join(retry_these))
			print(message)
	finally:
		# Emergency: Ensure command does not outlive this script
		if proc is not None and not died_politely:
			send_signal(proc, signal.SIGKILL)

		# Write out logs
		log_value(FINAL_CODE_JSON, "EXCEPTION" if code is None else code)
		if logging:
			with open(logfile, "a") as f:
				f.write(json.dumps(log) + os.linesep)

sys.exit( run() )