diff options
author | Christopher Haster <geky@geky.net> | 2022-10-03 02:35:46 +0300 |
---|---|---|
committer | Christopher Haster <geky@geky.net> | 2022-11-15 22:38:13 +0300 |
commit | 490e1c461645e5905f2d1d8f20f8f14ae7efed8f (patch) | |
tree | 38010bb1409d212cd40681299a1b370d2d43d4fe /scripts | |
parent | ca669938125370bbc93c4189739c55220e099b1c (diff) |
Added perf.py a wrapper around Linux's perf tool for perf sampling
This provides 2 things:
1. perf integration with the bench/test runners - This is a bit tricky
with perf as it doesn't have its own way to combine perf measurements
across multiple processes. perf.py works around this by writing
everything to a zip file, using flock to synchronize. As a plus, free
compression!
2. Parsing and presentation of perf results in a format consistent with
the other CSV-based tools. This actually ran into a surprising number of
issues:
- We need to process raw events to get the information we want, this
ends up being a lot of data (~16MiB at 100Hz uncompressed), so we
paralellize the parsing of each decompressed perf file.
- perf reports raw addresses post-ASLR. It does provide sym+off which
is very useful, but to find the source of static functions we need to
reverse the ASLR by finding the delta the produces the best
symbol<->addr matches.
- This isn't related to perf, but decoding dwarf line-numbers is
really complicated. You basically need to write a tiny VM.
This also turns on perf measurement by default for the bench-runner, but at a
low frequency (100 Hz). This can be decreased or removed in the future
if it causes any slowdown.
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/bench.py | 78 | ||||
-rwxr-xr-x | scripts/code.py | 200 | ||||
-rwxr-xr-x | scripts/coverage.py | 105 | ||||
-rwxr-xr-x | scripts/data.py | 200 | ||||
-rwxr-xr-x | scripts/perf.py | 1263 | ||||
-rwxr-xr-x | scripts/plot.py | 3 | ||||
-rwxr-xr-x | scripts/prettyasserts.py | 3 | ||||
-rwxr-xr-x | scripts/stack.py | 99 | ||||
-rwxr-xr-x | scripts/struct_.py | 159 | ||||
-rwxr-xr-x | scripts/summary.py | 3 | ||||
-rwxr-xr-x | scripts/tailpipe.py | 3 | ||||
-rwxr-xr-x | scripts/test.py | 78 | ||||
-rwxr-xr-x | scripts/tracebd.py | 3 |
13 files changed, 1972 insertions, 225 deletions
diff --git a/scripts/bench.py b/scripts/bench.py index e401d7c..61db83e 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -27,9 +27,13 @@ import time import toml -RUNNER_PATH = 'runners/bench_runner' +RUNNER_PATH = './runners/bench_runner' HEADER_PATH = 'runners/bench_runner.h' +GDB_TOOL = ['gdb'] +VALGRIND_TOOL = ['valgrind'] +PERF_SCRIPT = ['./scripts/perf.py'] + def openio(path, mode='r', buffering=-1, nb=False): if path == '-': @@ -502,12 +506,25 @@ def find_runner(runner, **args): # run under valgrind? if args.get('valgrind'): - cmd[:0] = filter(None, [ - 'valgrind', + cmd[:0] = args['valgrind_tool'] + [ '--leak-check=full', '--track-origins=yes', '--error-exitcode=4', - '-q']) + '-q'] + + # run under perf? + if args.get('perf'): + cmd[:0] = args['perf_script'] + list(filter(None, [ + '-R', + '--perf-freq=%s' % args['perf_freq'] + if args.get('perf_freq') else None, + '--perf-period=%s' % args['perf_period'] + if args.get('perf_period') else None, + '--perf-events=%s' % args['perf_events'] + if args.get('perf_events') else None, + '--perf-tool=%s' % args['perf_tool'] + if args.get('perf_tool') else None, + '-o%s' % args['perf']])) # other context if args.get('geometry'): @@ -789,9 +806,9 @@ def run_stage(name, runner_, ids, output_, **args): try: line = mpty.readline() except OSError as e: - if e.errno == errno.EIO: - break - raise + if e.errno != errno.EIO: + raise + break if not line: break last_stdout.append(line) @@ -1126,24 +1143,24 @@ def run(runner, bench_ids=[], **args): cmd = runner_ + [failure.id] if args.get('gdb_main'): - cmd[:0] = ['gdb', + cmd[:0] = args['gdb_tool'] + [ '-ex', 'break main', '-ex', 'run', '--args'] elif args.get('gdb_case'): path, lineno = find_path(runner_, failure.id, **args) - cmd[:0] = ['gdb', + cmd[:0] = args['gdb_tool'] + [ '-ex', 'break %s:%d' % (path, lineno), '-ex', 'run', '--args'] elif failure.assert_ is not None: - cmd[:0] = ['gdb', + cmd[:0] = args['gdb_tool'] + [ '-ex', 'run', '-ex', 'frame function raise', '-ex', 'up 2', '--args'] else: - cmd[:0] = ['gdb', + cmd[:0] = args['gdb_tool'] + [ '-ex', 'run', '--args'] @@ -1187,6 +1204,7 @@ if __name__ == "__main__": argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None parser = argparse.ArgumentParser( description="Build and run benches.", + allow_abbrev=False, conflict_handler='ignore') parser.add_argument( '-v', '--verbose', @@ -1316,6 +1334,11 @@ if __name__ == "__main__": help="Drop into gdb on bench failure but stop at the beginning " "of main.") bench_parser.add_argument( + '--gdb-tool', + type=lambda x: x.split(), + default=GDB_TOOL, + help="Path to gdb tool to use. Defaults to %r." % GDB_TOOL) + bench_parser.add_argument( '--exec', type=lambda e: e.split(), help="Run under another executable.") @@ -1324,6 +1347,37 @@ if __name__ == "__main__": action='store_true', help="Run under Valgrind to find memory errors. Implicitly sets " "--isolate.") + bench_parser.add_argument( + '--valgrind-tool', + type=lambda x: x.split(), + default=VALGRIND_TOOL, + help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL) + bench_parser.add_argument( + '--perf', + help="Run under Linux's perf to sample performance counters, writing " + "samples to this file.") + bench_parser.add_argument( + '--perf-freq', + help="perf sampling frequency. This is passed directly to the perf " + "script.") + bench_parser.add_argument( + '--perf-period', + help="perf sampling period. This is passed directly to the perf " + "script.") + bench_parser.add_argument( + '--perf-events', + help="perf events to record. This is passed directly to the perf " + "script.") + bench_parser.add_argument( + '--perf-script', + type=lambda x: x.split(), + default=PERF_SCRIPT, + help="Path to the perf script to use. Defaults to %r." % PERF_SCRIPT) + bench_parser.add_argument( + '--perf-tool', + type=lambda x: x.split(), + help="Path to the perf tool to use. This is passed directly to the " + "perf script") # compilation flags comp_parser = parser.add_argument_group('compilation options') @@ -1348,7 +1402,7 @@ if __name__ == "__main__": '-o', '--output', help="Output file.") - # runner + bench_ids overlaps bench_paths, so we need to do some munging here + # runner/bench_paths overlap, so need to do some munging here args = parser.parse_intermixed_args() args.bench_paths = [' '.join(args.runner or [])] + args.bench_ids args.runner = args.runner or [RUNNER_PATH] diff --git a/scripts/code.py b/scripts/code.py index 6b373fc..7e7e960 100755 --- a/scripts/code.py +++ b/scripts/code.py @@ -5,7 +5,7 @@ # by Linux's Bloat-O-Meter. # # Example: -# ./scripts/code.py lfs.o lfs_util.o -S +# ./scripts/code.py lfs.o lfs_util.o -Ssize # # Copyright (c) 2022, The littlefs authors. # Copyright (c) 2020, Arm Limited. All rights reserved. @@ -14,6 +14,7 @@ import collections as co import csv +import difflib import glob import itertools as it import math as m @@ -25,7 +26,8 @@ import subprocess as sp OBJ_PATHS = ['*.o'] NM_TOOL = ['nm'] -TYPE = 'tTrRdD' +NM_TYPES = 'tTrRdD' +OBJDUMP_TOOL = ['objdump'] # integer fields @@ -135,21 +137,32 @@ def openio(path, mode='r'): def collect(paths, *, nm_tool=NM_TOOL, - type=TYPE, - build_dir=None, + nm_types=NM_TYPES, + objdump_tool=OBJDUMP_TOOL, + sources=None, everything=False, **args): - results = [] - pattern = re.compile( + size_pattern = re.compile( '^(?P<size>[0-9a-fA-F]+)' + - ' (?P<type>[%s])' % re.escape(type) + + ' (?P<type>[%s])' % re.escape(nm_types) + ' (?P<func>.+?)$') + line_pattern = re.compile( + '^\s+(?P<no>[0-9]+)\s+' + '(?:(?P<dir>[0-9]+)\s+)?' + '.*\s+' + '(?P<path>[^\s]+)$') + info_pattern = re.compile( + '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*' + '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*' + '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$') + + results = [] for path in paths: - # map to source file - src_path = re.sub('\.o$', '.c', path) - if build_dir: - src_path = re.sub('%s/*' % re.escape(build_dir), '', - src_path) + # guess the source, if we have debug-info we'll replace this later + file = re.sub('(\.o)?$', '.c', path, 1) + + # find symbol sizes + results_ = [] # note nm-tool may contain extra args cmd = nm_tool + ['--size-sort', path] if args.get('verbose'): @@ -158,21 +171,18 @@ def collect(paths, *, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, - errors='replace') + errors='replace', + close_fds=False) for line in proc.stdout: - m = pattern.match(line) + m = size_pattern.match(line) if m: func = m.group('func') # discard internal functions if not everything and func.startswith('__'): continue - # discard .8449 suffixes created by optimizer - func = re.sub('\.[0-9]+', '', func) - - results.append(CodeResult( - src_path, func, + results_.append(CodeResult( + file, func, int(m.group('size'), 16))) - proc.wait() if proc.returncode != 0: if not args.get('verbose'): @@ -180,6 +190,121 @@ def collect(paths, *, sys.stdout.write(line) sys.exit(-1) + + # try to figure out the source file if we have debug-info + dirs = {} + files = {} + # note objdump-tool may contain extra args + cmd = objdump_tool + ['--dwarf=rawline', path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace', + close_fds=False) + for line in proc.stdout: + # note that files contain references to dirs, which we + # dereference as soon as we see them as each file table follows a + # dir table + m = line_pattern.match(line) + if m: + if not m.group('dir'): + # found a directory entry + dirs[int(m.group('no'))] = m.group('path') + else: + # found a file entry + dir = int(m.group('dir')) + if dir in dirs: + files[int(m.group('no'))] = os.path.join( + dirs[dir], + m.group('path')) + else: + files[int(m.group('no'))] = m.group('path') + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + # do nothing on error, we don't need objdump to work, source files + # may just be inaccurate + pass + + defs = {} + is_func = False + f_name = None + f_file = None + # note objdump-tool may contain extra args + cmd = objdump_tool + ['--dwarf=info', path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace', + close_fds=False) + for line in proc.stdout: + # state machine here to find definitions + m = info_pattern.match(line) + if m: + if m.group('tag'): + if is_func: + defs[f_name] = files.get(f_file, '?') + is_func = (m.group('tag') == 'DW_TAG_subprogram') + elif m.group('name'): + f_name = m.group('name') + elif m.group('file'): + f_file = int(m.group('file')) + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + # do nothing on error, we don't need objdump to work, source files + # may just be inaccurate + pass + + for r in results_: + # find best matching debug symbol, this may be slightly different + # due to optimizations + if defs: + # exact match? avoid difflib if we can for speed + if r.function in defs: + file = defs[r.function] + else: + _, file = max( + defs.items(), + key=lambda d: difflib.SequenceMatcher(None, + d[0], + r.function, False).ratio()) + else: + file = r.file + + # ignore filtered sources + if sources is not None: + if not any( + os.path.abspath(file) == os.path.abspath(s) + for s in sources): + continue + else: + # default to only cwd + if not everything and not os.path.commonpath([ + os.getcwd(), + os.path.abspath(file)]) == os.getcwd(): + continue + + # simplify path + if os.path.commonpath([ + os.getcwd(), + os.path.abspath(file)]) == os.getcwd(): + file = os.path.relpath(file) + else: + file = os.path.abspath(file) + + results.append(CodeResult(file, r.function, r.size)) + return results @@ -437,7 +562,7 @@ def main(obj_paths, *, paths.append(path) if not paths: - print("error: no .obj files found in %r?" % obj_paths) + print("error: no .o files found in %r?" % obj_paths) sys.exit(-1) results = collect(paths, **args) @@ -469,13 +594,16 @@ def main(obj_paths, *, # write results to CSV if args.get('output'): with openio(args['output'], 'w') as f: - writer = csv.DictWriter(f, CodeResult._by + writer = csv.DictWriter(f, + (by if by is not None else CodeResult._by) + ['code_'+k for k in CodeResult._fields]) writer.writeheader() for r in results: writer.writerow( - {k: getattr(r, k) for k in CodeResult._by} - | {'code_'+k: getattr(r, k) for k in CodeResult._fields}) + {k: getattr(r, k) + for k in (by if by is not None else CodeResult._by)} + | {'code_'+k: getattr(r, k) + for k in CodeResult._fields}) # find previous results? if args.get('diff'): @@ -512,7 +640,8 @@ if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( - description="Find code size at the function level.") + description="Find code size at the function level.", + allow_abbrev=False) parser.add_argument( 'obj_paths', nargs='*', @@ -579,23 +708,30 @@ if __name__ == "__main__": action='store_true', help="Only show the total.") parser.add_argument( - '-A', '--everything', + '-F', '--source', + dest='sources', + action='append', + help="Only consider definitions in this file. Defaults to anything " + "in the current directory.") + parser.add_argument( + '--everything', action='store_true', help="Include builtin and libc specific symbols.") parser.add_argument( - '--type', - default=TYPE, + '--nm-types', + default=NM_TYPES, help="Type of symbols to report, this uses the same single-character " - "type-names emitted by nm. Defaults to %r." % TYPE) + "type-names emitted by nm. Defaults to %r." % NM_TYPES) parser.add_argument( '--nm-tool', type=lambda x: x.split(), default=NM_TOOL, help="Path to the nm tool to use. Defaults to %r." % NM_TOOL) parser.add_argument( - '--build-dir', - help="Specify the relative build directory. Used to map object files " - "to the correct source files.") + '--objdump-tool', + type=lambda x: x.split(), + default=OBJDUMP_TOOL, + help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL) sys.exit(main(**{k: v for k, v in vars(parser.parse_intermixed_args()).items() if v is not None})) diff --git a/scripts/coverage.py b/scripts/coverage.py index 7d36a47..f74dda8 100755 --- a/scripts/coverage.py +++ b/scripts/coverage.py @@ -3,7 +3,9 @@ # Script to find coverage info after running tests. # # Example: -# ./scripts/coverage.py lfs.t.a.gcda lfs_util.t.a.gcda -s +# ./scripts/coverage.py \ +# lfs.t.a.gcda lfs_util.t.a.gcda \ +# -Flfs.c -Flfs_util.c -slines # # Copyright (c) 2022, The littlefs authors. # Copyright (c) 2020, Arm Limited. All rights reserved. @@ -209,19 +211,13 @@ def openio(path, mode='r'): else: return open(path, mode) -def collect(paths, *, +def collect(gcda_paths, *, gcov_tool=GCOV_TOOL, - build_dir=None, + sources=None, everything=False, **args): results = [] - for path in paths: - # map to source file - src_path = re.sub('\.t\.a\.gcda$', '.c', path) - if build_dir: - src_path = re.sub('%s/*' % re.escape(build_dir), '', - src_path) - + for path in gcda_paths: # get coverage info through gcov's json output # note, gcov-tool may contain extra args cmd = GCOV_TOOL + ['-b', '-t', '--json-format', path] @@ -231,7 +227,8 @@ def collect(paths, *, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, - errors='replace') + errors='replace', + close_fds=False) data = json.load(proc.stdout) proc.wait() if proc.returncode != 0: @@ -242,12 +239,30 @@ def collect(paths, *, # collect line/branch coverage for file in data['files']: - if file['file'] != src_path: - continue + # ignore filtered sources + if sources is not None: + if not any( + os.path.abspath(file['file']) == os.path.abspath(s) + for s in sources): + continue + else: + # default to only cwd + if not everything and not os.path.commonpath([ + os.getcwd(), + os.path.abspath(file['file'])]) == os.getcwd(): + continue + + # simplify path + if os.path.commonpath([ + os.getcwd(), + os.path.abspath(file['file'])]) == os.getcwd(): + file_name = os.path.relpath(file['file']) + else: + file_name = os.path.abspath(file['file']) for func in file['functions']: func_name = func.get('name', '(inlined)') - # discard internal function (this includes injected test cases) + # discard internal functions (this includes injected test cases) if not everything: if func_name.startswith('__'): continue @@ -255,7 +270,7 @@ def collect(paths, *, # go ahead and add functions, later folding will merge this if # there are other hits on this line results.append(CoverageResult( - src_path, func_name, func['start_line'], + file_name, func_name, func['start_line'], func['execution_count'], 0, Frac(1 if func['execution_count'] > 0 else 0, 1), 0, @@ -271,7 +286,7 @@ def collect(paths, *, # go ahead and add lines, later folding will merge this if # there are other hits on this line results.append(CoverageResult( - src_path, func_name, line['line_number'], + file_name, func_name, line['line_number'], 0, line['count'], 0, Frac(1 if line['count'] > 0 else 0, 1), @@ -519,31 +534,25 @@ def table(Result, results, diff_results=None, *, line[-1])) -def annotate(Result, results, paths, *, +def annotate(Result, results, *, annotate=False, lines=False, branches=False, - build_dir=None, **args): # if neither branches/lines specified, color both if annotate and not lines and not branches: lines, branches = True, True - for path in paths: - # map to source file - src_path = re.sub('\.t\.a\.gcda$', '.c', path) - if build_dir: - src_path = re.sub('%s/*' % re.escape(build_dir), '', - src_path) - + for path in co.OrderedDict.fromkeys(r.file for r in results).keys(): # flatten to line info results = fold(Result, results, by=['file', 'line']) - table = {r.line: r for r in results if r.file == src_path} + table = {r.line: r for r in results if r.file == path} # calculate spans to show if not annotate: spans = [] last = None + func = None for line, r in sorted(table.items()): if ((lines and int(r.hits) == 0) or (branches and r.branches.a < r.branches.b)): @@ -553,27 +562,29 @@ def annotate(Result, results, paths, *, line+1+args['context']) else: if last is not None: - spans.append(last) + spans.append((last, func)) last = range( line-args['context'], line+1+args['context']) + func = r.function if last is not None: - spans.append(last) + spans.append((last, func)) - with open(src_path) as f: + with open(path) as f: skipped = False for i, line in enumerate(f): # skip lines not in spans? - if not annotate and not any(i+1 in s for s in spans): + if not annotate and not any(i+1 in s for s, _ in spans): skipped = True continue if skipped: skipped = False - print('%s@@ %s:%d @@%s' % ( + print('%s@@ %s:%d: %s @@%s' % ( '\x1b[36m' if args['color'] else '', - src_path, + path, i+1, + next(iter(f for _, f in spans)), '\x1b[m' if args['color'] else '')) # build line @@ -659,12 +670,14 @@ def main(gcda_paths, *, # write results to CSV if args.get('output'): with openio(args['output'], 'w') as f: - writer = csv.DictWriter(f, CoverageResult._by + writer = csv.DictWriter(f, + (by if by is not None else CoverageResult._by) + ['coverage_'+k for k in CoverageResult._fields]) writer.writeheader() for r in results: writer.writerow( - {k: getattr(r, k) for k in CoverageResult._by} + {k: getattr(r, k) + for k in (by if by is not None else CoverageResult._by)} | {'coverage_'+k: getattr(r, k) for k in CoverageResult._fields}) @@ -698,8 +711,7 @@ def main(gcda_paths, *, or args.get('lines') or args.get('branches')): # annotate sources - annotate(CoverageResult, results, paths, - **args) + annotate(CoverageResult, results, **args) else: # print table table(CoverageResult, results, @@ -724,7 +736,8 @@ if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( - description="Find coverage info after running tests.") + description="Find coverage info after running tests.", + allow_abbrev=False) parser.add_argument( 'gcda_paths', nargs='*', @@ -791,15 +804,21 @@ if __name__ == "__main__": action='store_true', help="Only show the total.") parser.add_argument( - '-A', '--everything', + '-F', '--source', + dest='sources', + action='append', + help="Only consider definitions in this file. Defaults to anything " + "in the current directory.") + parser.add_argument( + '--everything', action='store_true', help="Include builtin and libc specific symbols.") parser.add_argument( - '-H', '--hits', + '--hits', action='store_true', help="Show total hits instead of coverage.") parser.add_argument( - '-l', '--annotate', + '-A', '--annotate', action='store_true', help="Show source files annotated with coverage info.") parser.add_argument( @@ -814,7 +833,7 @@ if __name__ == "__main__": '-c', '--context', type=lambda x: int(x, 0), default=3, - help="Show a additional lines of context. Defaults to 3.") + help="Show n additional lines of context. Defaults to 3.") parser.add_argument( '-W', '--width', type=lambda x: int(x, 0), @@ -838,10 +857,6 @@ if __name__ == "__main__": default=GCOV_TOOL, type=lambda x: x.split(), help="Path to the gcov tool to use. Defaults to %r." % GCOV_TOOL) - parser.add_argument( - '--build-dir', - help="Specify the relative build directory. Used to map object files " - "to the correct source files.") sys.exit(main(**{k: v for k, v in vars(parser.parse_intermixed_args()).items() if v is not None})) diff --git a/scripts/data.py b/scripts/data.py index 05ef868..f95540f 100755 --- a/scripts/data.py +++ b/scripts/data.py @@ -5,7 +5,7 @@ # by Linux's Bloat-O-Meter. # # Example: -# ./scripts/data.py lfs.o lfs_util.o -S +# ./scripts/data.py lfs.o lfs_util.o -Ssize # # Copyright (c) 2022, The littlefs authors. # Copyright (c) 2020, Arm Limited. All rights reserved. @@ -14,6 +14,7 @@ import collections as co import csv +import difflib import glob import itertools as it import math as m @@ -25,7 +26,8 @@ import subprocess as sp OBJ_PATHS = ['*.o'] NM_TOOL = ['nm'] -TYPE = 'dDbB' +NM_TYPES = 'dDbB' +OBJDUMP_TOOL = ['objdump'] # integer fields @@ -135,21 +137,32 @@ def openio(path, mode='r'): def collect(paths, *, nm_tool=NM_TOOL, - type=TYPE, - build_dir=None, + nm_types=NM_TYPES, + objdump_tool=OBJDUMP_TOOL, + sources=None, everything=False, **args): - results = [] - pattern = re.compile( + size_pattern = re.compile( '^(?P<size>[0-9a-fA-F]+)' + - ' (?P<type>[%s])' % re.escape(type) + + ' (?P<type>[%s])' % re.escape(nm_types) + ' (?P<func>.+?)$') + line_pattern = re.compile( + '^\s+(?P<no>[0-9]+)\s+' + '(?:(?P<dir>[0-9]+)\s+)?' + '.*\s+' + '(?P<path>[^\s]+)$') + info_pattern = re.compile( + '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*' + '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*' + '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$') + + results = [] for path in paths: - # map to source file - src_path = re.sub('\.o$', '.c', path) - if build_dir: - src_path = re.sub('%s/*' % re.escape(build_dir), '', - src_path) + # guess the source, if we have debug-info we'll replace this later + file = re.sub('(\.o)?$', '.c', path, 1) + + # find symbol sizes + results_ = [] # note nm-tool may contain extra args cmd = nm_tool + ['--size-sort', path] if args.get('verbose'): @@ -158,21 +171,18 @@ def collect(paths, *, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, - errors='replace') + errors='replace', + close_fds=False) for line in proc.stdout: - m = pattern.match(line) + m = size_pattern.match(line) if m: func = m.group('func') # discard internal functions if not everything and func.startswith('__'): continue - # discard .8449 suffixes created by optimizer - func = re.sub('\.[0-9]+', '', func) - - results.append(DataResult( - src_path, func, + results_.append(DataResult( + file, func, int(m.group('size'), 16))) - proc.wait() if proc.returncode != 0: if not args.get('verbose'): @@ -180,6 +190,121 @@ def collect(paths, *, sys.stdout.write(line) sys.exit(-1) + + # try to figure out the source file if we have debug-info + dirs = {} + files = {} + # note objdump-tool may contain extra args + cmd = objdump_tool + ['--dwarf=rawline', path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace', + close_fds=False) + for line in proc.stdout: + # note that files contain references to dirs, which we + # dereference as soon as we see them as each file table follows a + # dir table + m = line_pattern.match(line) + if m: + if not m.group('dir'): + # found a directory entry + dirs[int(m.group('no'))] = m.group('path') + else: + # found a file entry + dir = int(m.group('dir')) + if dir in dirs: + files[int(m.group('no'))] = os.path.join( + dirs[dir], + m.group('path')) + else: + files[int(m.group('no'))] = m.group('path') + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + # do nothing on error, we don't need objdump to work, source files + # may just be inaccurate + pass + + defs = {} + is_func = False + f_name = None + f_file = None + # note objdump-tool may contain extra args + cmd = objdump_tool + ['--dwarf=info', path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace', + close_fds=False) + for line in proc.stdout: + # state machine here to find definitions + m = info_pattern.match(line) + if m: + if m.group('tag'): + if is_func: + defs[f_name] = files.get(f_file, '?') + is_func = (m.group('tag') == 'DW_TAG_subprogram') + elif m.group('name'): + f_name = m.group('name') + elif m.group('file'): + f_file = int(m.group('file')) + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + # do nothing on error, we don't need objdump to work, source files + # may just be inaccurate + pass + + for r in results_: + # find best matching debug symbol, this may be slightly different + # due to optimizations + if defs: + # exact match? avoid difflib if we can for speed + if r.function in defs: + file = defs[r.function] + else: + _, file = max( + defs.items(), + key=lambda d: difflib.SequenceMatcher(None, + d[0], + r.function, False).ratio()) + else: + file = r.file + + # ignore filtered sources + if sources is not None: + if not any( + os.path.abspath(file) == os.path.abspath(s) + for s in sources): + continue + else: + # default to only cwd + if not everything and not os.path.commonpath([ + os.getcwd(), + os.path.abspath(file)]) == os.getcwd(): + continue + + # simplify path + if os.path.commonpath([ + os.getcwd(), + os.path.abspath(file)]) == os.getcwd(): + file = os.path.relpath(file) + else: + file = os.path.abspath(file) + + results.append(DataResult(file, r.function, r.size)) + return results @@ -437,7 +562,7 @@ def main(obj_paths, *, paths.append(path) if not paths: - print("error: no .obj files found in %r?" % obj_paths) + print("error: no .o files found in %r?" % obj_paths) sys.exit(-1) results = collect(paths, **args) @@ -469,13 +594,16 @@ def main(obj_paths, *, # write results to CSV if args.get('output'): with openio(args['output'], 'w') as f: - writer = csv.DictWriter(f, DataResult._by + writer = csv.DictWriter(f, + (by if by is not None else DataResult._by) + ['data_'+k for k in DataResult._fields]) writer.writeheader() for r in results: writer.writerow( - {k: getattr(r, k) for k in DataResult._by} - | {'data_'+k: getattr(r, k) for k in DataResult._fields}) + {k: getattr(r, k) + for k in (by if by is not None else DataResult._by)} + | {'data_'+k: getattr(r, k) + for k in DataResult._fields}) # find previous results? if args.get('diff'): @@ -512,7 +640,8 @@ if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( - description="Find data size at the function level.") + description="Find data size at the function level.", + allow_abbrev=False) parser.add_argument( 'obj_paths', nargs='*', @@ -579,23 +708,30 @@ if __name__ == "__main__": action='store_true', help="Only show the total.") parser.add_argument( - '-A', '--everything', + '-F', '--source', + dest='sources', + action='append', + help="Only consider definitions in this file. Defaults to anything " + "in the current directory.") + parser.add_argument( + '--everything', action='store_true', help="Include builtin and libc specific symbols.") parser.add_argument( - '--type', - default=TYPE, + '--nm-types', + default=NM_TYPES, help="Type of symbols to report, this uses the same single-character " - "type-names emitted by nm. Defaults to %r." % TYPE) + "type-names emitted by nm. Defaults to %r." % NM_TYPES) parser.add_argument( '--nm-tool', type=lambda x: x.split(), default=NM_TOOL, help="Path to the nm tool to use. Defaults to %r." % NM_TOOL) parser.add_argument( - '--build-dir', - help="Specify the relative build directory. Used to map object files " - "to the correct source files.") + '--objdump-tool', + type=lambda x: x.split(), + default=OBJDUMP_TOOL, + help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL) sys.exit(main(**{k: v for k, v in vars(parser.parse_intermixed_args()).items() if v is not None})) diff --git a/scripts/perf.py b/scripts/perf.py new file mode 100755 index 0000000..3eb3dbc --- /dev/null +++ b/scripts/perf.py @@ -0,0 +1,1263 @@ +#!/usr/bin/env python3 +# +# Script to aggregate and report Linux perf results. +# +# Example: +# ./scripts/perf.py -R -obench.perf ./runners/bench_runner +# ./scripts/perf.py bench.perf -Flfs.c -Flfs_util.c -Scycles +# +# Copyright (c) 2022, The littlefs authors. +# SPDX-License-Identifier: BSD-3-Clause +# + +import bisect +import collections as co +import csv +import errno +import fcntl +import functools as ft +import glob +import itertools as it +import math as m +import multiprocessing as mp +import os +import re +import shlex +import shutil +import subprocess as sp +import tempfile +import zipfile + + +PERF_PATHS = ['*.perf'] +PERF_TOOL = ['perf'] +PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references' +PERF_FREQ = 100 +OBJDUMP_TOOL = ['objdump'] +THRESHOLD = (0.5, 0.85) + + +# integer fields +class Int(co.namedtuple('Int', 'x')): + __slots__ = () + def __new__(cls, x=0): + if isinstance(x, Int): + return x + if isinstance(x, str): + try: + x = int(x, 0) + except ValueError: + # also accept +-∞ and +-inf + if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x): + x = m.inf + elif re.match('^\s*-\s*(?:∞|inf)\s*$', x): + x = -m.inf + else: + raise + assert isinstance(x, int) or m.isinf(x), x + return super().__new__(cls, x) + + def __str__(self): + if self.x == m.inf: + return '∞' + elif self.x == -m.inf: + return '-∞' + else: + return str(self.x) + + def __int__(self): + assert not m.isinf(self.x) + return self.x + + def __float__(self): + return float(self.x) + + none = '%7s' % '-' + def table(self): + return '%7s' % (self,) + + diff_none = '%7s' % '-' + diff_table = table + + def diff_diff(self, other): + new = self.x if self else 0 + old = other.x if other else 0 + diff = new - old + if diff == +m.inf: + return '%7s' % '+∞' + elif diff == -m.inf: + return '%7s' % '-∞' + else: + return '%+7d' % diff + + def ratio(self, other): + new = self.x if self else 0 + old = other.x if other else 0 + if m.isinf(new) and m.isinf(old): + return 0.0 + elif m.isinf(new): + return +m.inf + elif m.isinf(old): + return -m.inf + elif not old and not new: + return 0.0 + elif not old: + return 1.0 + else: + return (new-old) / old + + def __add__(self, other): + return self.__class__(self.x + other.x) + + def __sub__(self, other): + return self.__class__(self.x - other.x) + + def __mul__(self, other): + return self.__class__(self.x * other.x) + +# perf results +class PerfResult(co.namedtuple('PerfResult', [ + 'file', 'function', 'line', + 'self_cycles', + 'self_bmisses', 'self_branches', + 'self_cmisses', 'self_caches', + 'cycles', + 'bmisses', 'branches', + 'cmisses', 'caches', + 'children', 'parents'])): + _by = ['file', 'function', 'line'] + _fields = [ + 'self_cycles', + 'self_bmisses', 'self_branches', + 'self_cmisses', 'self_caches', + 'cycles', + 'bmisses', 'branches', + 'cmisses', 'caches'] + _types = { + 'self_cycles': Int, + 'self_bmisses': Int, 'self_branches': Int, + 'self_cmisses': Int, 'self_caches': Int, + 'cycles': Int, + 'bmisses': Int, 'branches': Int, + 'cmisses': Int, 'caches': Int} + + __slots__ = () + def __new__(cls, file='', function='', line=0, + self_cycles=0, + self_bmisses=0, self_branches=0, + self_cmisses=0, self_caches=0, + cycles=0, + bmisses=0, branches=0, + cmisses=0, caches=0, + children=set(), parents=set()): + return super().__new__(cls, file, function, int(Int(line)), + Int(self_cycles), + Int(self_bmisses), Int(self_branches), + Int(self_cmisses), Int(self_caches), + Int(cycles), + Int(bmisses), Int(branches), + Int(cmisses), Int(caches), + children, parents) + + def __add__(self, other): + return PerfResult(self.file, self.function, self.line, + self.self_cycles + other.self_cycles, + self.self_bmisses + other.self_bmisses, + self.self_branches + other.self_branches, + self.self_cmisses + other.self_cmisses, + self.self_caches + other.self_caches, + self.cycles + other.cycles, + self.bmisses + other.bmisses, + self.branches + other.branches, + self.cmisses + other.cmisses, + self.caches + other.caches, + self.children | other.children, + self.parents | other.parents) + + +def openio(path, mode='r'): + if path == '-': + if mode == 'r': + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + +# run perf as a subprocess, storing measurements into a zip file +def record(command, *, + output=None, + perf_freq=PERF_FREQ, + perf_period=None, + perf_events=PERF_EVENTS, + perf_tool=PERF_TOOL, + **args): + if not command: + print('error: no command specified?') + sys.exit(-1) + + if not output: + print('error: no output file specified?') + sys.exit(-1) + + # create a temporary file for perf to write to, as far as I can tell + # this is strictly needed because perf's pipe-mode only works with stdout + with tempfile.NamedTemporaryFile('rb') as f: + # figure out our perf invocation + perf = perf_tool + list(filter(None, [ + 'record', + '-F%s' % perf_freq + if perf_freq is not None + and perf_period is None else None, + '-c%s' % perf_period + if perf_period is not None else None, + '-B', + '-g', + '--all-user', + '-e%s' % perf_events, + '-o%s' % f.name])) + + # run our command + try: + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in perf + command)) + err = sp.call(perf + command, close_fds=False) + + except KeyboardInterrupt: + err = errno.EOWNERDEAD + + # synchronize access + z = os.open(output, os.O_RDWR | os.O_CREAT) + fcntl.flock(z, fcntl.LOCK_EX) + + # copy measurements into our zip file + with os.fdopen(z, 'r+b') as z: + with zipfile.ZipFile(z, 'a', + compression=zipfile.ZIP_DEFLATED, + compresslevel=1) as z: + with z.open('perf.%d' % os.getpid(), 'w') as g: + shutil.copyfileobj(f, g) + + # forward the return code + return err + + +def collect_decompressed(path, *, + perf_tool=PERF_TOOL, + everything=False, + depth=0, + **args): + sample_pattern = re.compile( + '(?P<comm>\w+)' + '\s+(?P<pid>\w+)' + '\s+(?P<time>[\w.]+):' + '\s*(?P<period>\w+)' + '\s+(?P<event>[^:]+):') + frame_pattern = re.compile( + '\s+(?P<addr>\w+)' + '\s+(?P<sym>[^\s]+)' + '\s+\((?P<dso>[^\)]+)\)') + events = { + 'cycles': 'cycles', + 'branch-misses': 'bmisses', + 'branches': 'branches', + 'cache-misses': 'cmisses', + 'cache-references': 'caches'} + + # note perf_tool may contain extra args + cmd = perf_tool + [ + 'script', + '-i%s' % path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace', + close_fds=False) + + last_filtered = False + last_has_frame = False + last_event = '' + last_period = 0 + results = co.defaultdict(lambda: co.defaultdict(lambda: (0, 0))) + + for line in proc.stdout: + # we need to process a lot of data, so wait to use regex as late + # as possible + if not line: + continue + if not line.startswith('\t'): + m = sample_pattern.match(line) + if m: + last_event = m.group('event') + last_filtered = last_event in events + last_period = int(m.group('period'), 0) + last_has_frame = False + elif last_filtered: + m = frame_pattern.match(line) + if m: + # filter out internal/kernel functions + if not everything and ( + m.group('sym').startswith('__') + or m.group('dso').startswith('/usr/lib') + or not m.group('sym')[:1].isalpha()): + continue + + name = ( + m.group('dso'), + m.group('sym'), + int(m.group('addr'), 16)) + self, total = results[name][last_event] + if not last_has_frame: + results[name][last_event] = ( + self + last_period, + total + last_period) + last_has_frame = True + else: + results[name][last_event] = ( + self, + total + last_period) + + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + sys.exit(-1) + + # rearrange results into result type + results_ = [] + for name, r in results.items(): + results_.append(PerfResult(*name, + **{'self_'+events[e]: s for e, (s, _) in r.items()}, + **{ events[e]: t for e, (_, t) in r.items()})) + results = results_ + + return results + +def collect_job(path, i, **args): + # decompress into a temporary file, this is to work around + # some limitations of perf + with zipfile.ZipFile(path) as z: + with z.open(i) as f: + with tempfile.NamedTemporaryFile('wb') as g: + shutil.copyfileobj(f, g) + g.flush() + + return collect_decompressed(g.name, **args) + +def starapply(args): + f, args, kwargs = args + return f(*args, **kwargs) + +def collect(paths, *, + jobs=None, + objdump_tool=None, + sources=None, + everything=False, + **args): + symbol_pattern = re.compile( + '^(?P<addr>[0-9a-fA-F]+)\s.*\s(?P<name>[^\s]+)\s*$') + line_pattern = re.compile( + '^\s+(?:' + # matches dir/file table + '(?P<no>[0-9]+)\s+' + '(?:(?P<dir>[0-9]+)\s+)?' + '.*\s+' + '(?P<path>[^\s]+)' + # matches line opcodes + '|' '\[[^\]]*\]\s+' + '(?:' + '(?P<op_special>Special)' + '|' '(?P<op_copy>Copy)' + '|' '(?P<op_end>End of Sequence)' + '|' 'File .*?to (?:entry )?(?P<op_file>\d+)' + '|' 'Line .*?to (?P<op_line>[0-9]+)' + '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)' + '|' '.' ')*' + ')$', re.IGNORECASE) + + records = [] + for path in paths: + # each .perf file is actually a zip file containing perf files from + # multiple runs + with zipfile.ZipFile(path) as z: + records.extend((path, i) for i in z.infolist()) + + # we're dealing with a lot of data but also surprisingly + # parallelizable + dsos = {} + results = [] + with mp.Pool(jobs or len(os.sched_getaffinity(0))) as p: + for results_ in p.imap_unordered( + starapply, + ((collect_job, (path, i), dict( + everything=everything, + **args)) + for path, i in records)): + + # organize by dso + results__ = {} + for r in results_: + if r.file not in results__: + results__[r.file] = [] + results__[r.file].append(r) + results_ = results__ + + for dso, results_ in results_.items(): + if dso not in dsos: + # find file+line ranges for dsos + # + # do this here so we only process each dso once + syms = {} + sym_at = [] + cmd = objdump_tool + ['-t', dso] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace', + close_fds=False) + for line in proc.stdout: + m = symbol_pattern.match(line) + if m: + name = m.group('name') + addr = int(m.group('addr'), 16) + # note multiple symbols can share a name + if name not in syms: + syms[name] = set() + syms[name].add(addr) + sym_at.append((addr, name)) + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + # assume no debug-info on failure + pass + + # sort and keep first when duplicates + sym_at.sort() + sym_at_ = [] + for addr, name in sym_at: + if len(sym_at_) == 0 or sym_at_[-1][0] != addr: + sym_at_.append((addr, name)) + sym_at = sym_at_ + + # state machine for dwarf line numbers, note that objdump's + # decodedline seems to have issues with multiple dir/file + # tables, which is why we need this + line_at = [] + dirs = {} + files = {} + op_file = 1 + op_line = 1 + op_addr = 0 + cmd = objdump_tool + ['--dwarf=rawline', dso] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace', + close_fds=False) + for line in proc.stdout: + m = line_pattern.match(line) + if m: + if m.group('no') and not m.group('dir'): + # found a directory entry + dirs[int(m.group('no'))] = m.group('path') + elif m.group('no'): + # found a file entry + dir = int(m.group('dir')) + if dir in dirs: + files[int(m.group('no'))] = os.path.join( + dirs[dir], + m.group('path')) + else: + files[int(m.group('no'))] = m.group('path') + else: + # found a state machine update + if m.group('op_file'): + op_file = int(m.group('op_file'), 0) + if m.group('op_line'): + op_line = int(m.group('op_line'), 0) + if m.group('op_addr'): + op_addr = int(m.group('op_addr'), 0) + + if (m.group('op_special') + or m.group('op_copy') + or m.group('op_end')): + line_at.append(( + op_addr, + files.get(op_file, '?'), + op_line)) + + if m.group('op_end'): + op_file = 1 + op_line = 1 + op_addr = 0 + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + # assume no debug-info on failure + pass + + # sort and keep first when duplicates + # + # I think dwarf requires this to be sorted but just in case + line_at.sort() + line_at_ = [] + for addr, file, line in line_at: + if len(line_at_) == 0 or line_at_[-1][0] != addr: + line_at_.append((addr, file, line)) + line_at = line_at_ + + # discard lines outside of the range of the containing + # function, these are introduced by dwarf for inlined + # functions but don't map to elf-level symbols + sym_at_ = [] + for addr, sym in sym_at: + i = bisect.bisect(line_at, addr, key=lambda x: x[0]) + if i > 0: + _, file, line = line_at[i-1] + sym_at_.append((file, line, sym)) + sym_at_.sort() + + line_at_ = [] + for addr, file, line in line_at: + # only keep if sym-at-addr and sym-at-line match + i = bisect.bisect( + sym_at, addr, key=lambda x: x[0]) + j = bisect.bisect( + sym_at_, (file, line), key=lambda x: (x[0], x[1])) + if i > 0 and j > 0 and ( + sym_at[i-1][1] == sym_at_[j-1][2]): + line_at_.append((addr, file, line)) + line_at = line_at_ + + dsos[dso] = (syms, sym_at, line_at) + + syms, _, line_at = dsos[dso] + + # first try to reverse ASLR + def deltas(r, d): + if '+' in r.function: + sym, off = r.function.split('+', 1) + off = int(off, 0) + else: + sym, off = r.function, 0 + addr = r.line - off + d + + for addr_ in syms.get(sym, []): + yield addr_ - addr + + delta = min( + it.chain.from_iterable( + deltas(r, 0) for r in results_), + key=lambda d: sum(it.chain.from_iterable( + deltas(r, d) for r in results_)), + default=0) + + # then try to map addrs -> file+line + for r in results_: + addr = r.line + delta + i = bisect.bisect(line_at, addr, key=lambda x: x[0]) + if i > 0: + _, file, line = line_at[i-1] + else: + file, line = re.sub('(\.o)?$', '.c', r.file, 1), 0 + + # ignore filtered sources + if sources is not None: + if not any( + os.path.abspath(file) == os.path.abspath(s) + for s in sources): + continue + else: + # default to only cwd + if not everything and not os.path.commonpath([ + os.getcwd(), + os.path.abspath(file)]) == os.getcwd(): + continue + + # simplify path + if os.path.commonpath([ + os.getcwd(), + os.path.abspath(file)]) == os.getcwd(): + file = os.path.relpath(file) + else: + file = os.path.abspath(file) + + function, *_ = r.function.split('+', 1) + results.append(PerfResult(file, function, line, + **{k: getattr(r, k) for k in PerfResult._fields})) + + return results + + +def fold(Result, results, *, + by=None, + defines=None, + **_): + if by is None: + by = Result._by + + for k in it.chain(by or [], (k for k, _ in defines or [])): + if k not in Result._by and k not in Result._fields: + print("error: could not find field %r?" % k) + sys.exit(-1) + + # filter by matching defines + if defines is not None: + results_ = [] + for r in results: + if all(getattr(r, k) in vs for k, vs in defines): + results_.append(r) + results = results_ + + # organize results into conflicts + folding = co.OrderedDict() + for r in results: + name = tuple(getattr(r, k) for k in by) + if name not in folding: + folding[name] = [] + folding[name].append(r) + + # merge conflicts + folded = [] + for name, rs in folding.items(): + folded.append(sum(rs[1:], start=rs[0])) + + return folded + +def table(Result, results, diff_results=None, *, + by=None, + fields=None, + sort=None, + summary=False, + all=False, + percent=False, + **_): + all_, all = all, __builtins__.all + + if by is None: + by = Result._by + if fields is None: + fields = Result._fields + types = Result._types + + # fold again + results = fold(Result, results, by=by) + if diff_results is not None: + diff_results = fold(Result, diff_results, by=by) + + # organize by name + table = { + ','.join(str(getattr(r, k) or '') for k in by): r + for r in results} + diff_table = { + ','.join(str(getattr(r, k) or '') for k in by): r + for r in diff_results or []} + names = list(table.keys() | diff_table.keys()) + + # sort again, now with diff info, note that python's sort is stable + names.sort() + if diff_results is not None: + names.sort(key=lambda n: tuple( + types[k].ratio( + getattr(table.get(n), k, None), + getattr(diff_table.get(n), k, None)) + for k in fields), + reverse=True) + if sort: + for k, reverse in reversed(sort): + names.sort(key=lambda n: (getattr(table[n], k),) + if getattr(table.get(n), k, None) is not None else (), + reverse=reverse ^ (not k or k in Result._fields)) + + + # build up our lines + lines = [] + + # header + line = [] + line.append('%s%s' % ( + ','.join(by), + ' (%d added, %d removed)' % ( + sum(1 for n in table if n not in diff_table), + sum(1 for n in diff_table if n not in table)) + if diff_results is not None and not percent else '') + if not summary else '') + if diff_results is None: + for k in fields: + line.append(k) + elif percent: + for k in fields: + line.append(k) + else: + for k in fields: + line.append('o'+k) + for k in fields: + line.append('n'+k) + for k in fields: + line.append('d'+k) + line.append('') + lines.append(line) + + # entries + if not summary: + for name in names: + r = table.get(name) + if diff_results is not None: + diff_r = diff_table.get(name) + ratios = [ + types[k].ratio( + getattr(r, k, None), + getattr(diff_r, k, None)) + for k in fields] + if not any(ratios) and not all_: + continue + + line = [] + line.append(name) + if diff_results is None: + for k in fields: + line.append(getattr(r, k).table() + if getattr(r, k, None) is not None + else types[k].none) + elif percent: + for k in fields: + line.append(getattr(r, k).diff_table() + if getattr(r, k, None) is not None + else types[k].diff_none) + else: + for k in fields: + line.append(getattr(diff_r, k).diff_table() + if getattr(diff_r, k, None) is not None + else types[k].diff_none) + for k in fields: + line.append(getattr(r, k).diff_table() + if getattr(r, k, None) is not None + else types[k].diff_none) + for k in fields: + line.append(types[k].diff_diff( + getattr(r, k, None), + getattr(diff_r, k, None))) + if diff_results is None: + line.append('') + elif percent: + line.append(' (%s)' % ', '.join( + '+∞%' if t == +m.inf + else '-∞%' if t == -m.inf + else '%+.1f%%' % (100*t) + for t in ratios)) + else: + line.append(' (%s)' % ', '.join( + '+∞%' if t == +m.inf + else '-∞%' if t == -m.inf + else '%+.1f%%' % (100*t) + for t in ratios + if t) + if any(ratios) else '') + lines.append(line) + + # total + r = next(iter(fold(Result, results, by=[])), None) + if diff_results is not None: + diff_r = next(iter(fold(Result, diff_results, by=[])), None) + ratios = [ + types[k].ratio( + getattr(r, k, None), + getattr(diff_r, k, None)) + for k in fields] + + line = [] + line.append('TOTAL') + if diff_results is None: + for k in fields: + line.append(getattr(r, k).table() + if getattr(r, k, None) is not None + else types[k].none) + elif percent: + for k in fields: + line.append(getattr(r, k).diff_table() + if getattr(r, k, None) is not None + else types[k].diff_none) + else: + for k in fields: + line.append(getattr(diff_r, k).diff_table() + if getattr(diff_r, k, None) is not None + else types[k].diff_none) + for k in fields: + line.append(getattr(r, k).diff_table() + if getattr(r, k, None) is not None + else types[k].diff_none) + for k in fields: + line.append(types[k].diff_diff( + getattr(r, k, None), + getattr(diff_r, k, None))) + if diff_results is None: + line.append('') + elif percent: + line.append(' (%s)' % ', '.join( + '+∞%' if t == +m.inf + else '-∞%' if t == -m.inf + else '%+.1f%%' % (100*t) + for t in ratios)) + else: + line.append(' (%s)' % ', '.join( + '+∞%' if t == +m.inf + else '-∞%' if t == -m.inf + else '%+.1f%%' % (100*t) + for t in ratios + if t) + if any(ratios) else '') + lines.append(line) + + # find the best widths, note that column 0 contains the names and column -1 + # the ratios, so those are handled a bit differently + widths = [ + ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1 + for w, i in zip( + it.chain([23], it.repeat(7)), + range(len(lines[0])-1))] + + # print our table + for line in lines: + print('%-*s %s%s' % ( + widths[0], line[0], + ' '.join('%*s' % (w, x) + for w, x in zip(widths[1:], line[1:-1])), + line[-1])) + + +def annotate(Result, results, *, + annotate=None, + threshold=None, + branches=False, + caches=False, + **args): + # figure out the threshold + if threshold is None: + t0, t1 = THRESHOLD + elif len(threshold) == 1: + t0, t1 = threshold[0], threshold[0] + else: + t0, t1 = threshold + t0, t1 = min(t0, t1), max(t0, t1) + + if not branches and not caches: + tk = 'self_cycles' + elif branches: + tk = 'self_bmisses' + else: + tk = 'self_cmisses' + + # find max cycles + max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1])) + + for path in co.OrderedDict.fromkeys(r.file for r in results).keys(): + # flatten to line info + results = fold(Result, results, by=['file', 'line']) + table = {r.line: r for r in results if r.file == path} + + # calculate spans to show + if not annotate: + spans = [] + last = None + func = None + for line, r in sorted(table.items()): + if float(getattr(r, tk)) / max_ >= t0: + if last is not None and line - last.stop <= args['context']: + last = range( + last.start, + line+1+args['context']) + else: + if last is not None: + spans.append((last, func)) + last = range( + line-args['context'], + line+1+args['context']) + func = r.function + if last is not None: + spans.append((last, func)) + + with open(path) as f: + skipped = False + for i, line in enumerate(f): + # skip lines not in spans? + if not annotate and not any(i+1 in s for s, _ in spans): + skipped = True + continue + + if skipped: + skipped = False + print('%s@@ %s:%d: %s @@%s' % ( + '\x1b[36m' if args['color'] else '', + path, + i+1, + next(iter(f for _, f in spans)), + '\x1b[m' if args['color'] else '')) + + # build line + if line.endswith('\n'): + line = line[:-1] + + r = table.get(i+1) + if r is not None and ( + float(r.self_cycles) > 0 + if not branches and not caches + else float(r.self_bmisses) > 0 + or float(r.self_branches) > 0 + if branches + else float(r.self_cmisses) > 0 + or float(r.self_caches) > 0): + line = '%-*s // %s' % ( + args['width'], + line, + '%s cycles' % r.self_cycles + if not branches and not caches + else '%s bmisses, %s branches' % ( + r.self_bmisses, r.self_branches) + if branches + else '%s cmisses, %s caches' % ( + r.self_cmisses, r.self_caches)) + + if args['color']: + if float(getattr(r, tk)) / max_ >= t1: + line = '\x1b[1;31m%s\x1b[m' % line + elif float(getattr(r, tk)) / max_ >= t0: + line = '\x1b[35m%s\x1b[m' % line + + print(line) + + +def report(perf_paths, *, + by=None, + fields=None, + defines=None, + sort=None, + self=False, + branches=False, + caches=False, + tree=False, + depth=None, + **args): + # figure out what color should be + if args.get('color') == 'auto': + args['color'] = sys.stdout.isatty() + elif args.get('color') == 'always': + args['color'] = True + else: + args['color'] = False + + # it doesn't really make sense to not have a depth with tree, + # so assume depth=inf if tree by default + if args.get('depth') is None: + args['depth'] = m.inf if tree else 1 + elif args.get('depth') == 0: + args['depth'] = m.inf + + # find sizes + if not args.get('use', None): + # find .o files + paths = [] + for path in perf_paths: + if os.path.isdir(path): + path = path + '/*.perf' + + for path in glob.glob(path): + paths.append(path) + + if not paths: + print("error: no .perf files found in %r?" % perf_paths) + sys.exit(-1) + + results = collect(paths, **args) + else: + results = [] + with openio(args['use']) as f: + reader = csv.DictReader(f, restval='') + for r in reader: + try: + results.append(PerfResult( + **{k: r[k] for k in PerfResult._by + if k in r and r[k].strip()}, + **{k: r['perf_'+k] for k in PerfResult._fields + if 'perf_'+k in r and r['perf_'+k].strip()})) + except TypeError: + pass + + # fold + results = fold(PerfResult, results, by=by, defines=defines) + + # sort, note that python's sort is stable + results.sort() + if sort: + for k, reverse in reversed(sort): + results.sort(key=lambda r: (getattr(r, k),) + if getattr(r, k) is not None else (), + reverse=reverse ^ (not k or k in PerfResult._fields)) + + # write results to CSV + if args.get('output'): + with openio(args['output'], 'w') as f: + writer = csv.DictWriter(f, + (by if by is not None else PerfResult._by) + + ['perf_'+k for k in PerfResult._fields]) + writer.writeheader() + for r in results: + writer.writerow( + {k: getattr(r, k) + for k in (by if by is not None else PerfResult._by)} + | {'perf_'+k: getattr(r, k) + for k in PerfResult._fields}) + + # find previous results? + if args.get('diff'): + diff_results = [] + try: + with openio(args['diff']) as f: + reader = csv.DictReader(f, restval='') + for r in reader: + try: + diff_results.append(PerfResult( + **{k: r[k] for k in PerfResult._by + if k in r and r[k].strip()}, + **{k: r['perf_'+k] for k in PerfResult._fields + if 'perf_'+k in r and r['perf_'+k].strip()})) + except TypeError: + pass + except FileNotFoundError: + pass + + # fold + diff_results = fold(PerfResult, diff_results, by=by, defines=defines) + + # print table + if not args.get('quiet'): + if args.get('annotate') or args.get('threshold'): + # annotate sources + annotate(PerfResult, results, + branches=branches, + caches=caches, + **args) + else: + # print table + table(PerfResult, results, + diff_results if args.get('diff') else None, + by=by if by is not None else ['function'], + fields=fields if fields is not None else [ + 'self_'+k if self else k + for k in ( + ['cycles'] if not branches and not caches + else ['bmisses', 'branches'] if branches + else ['cmisses', 'caches'])], + sort=sort, + **args) + + +def main(**args): + if args.get('record'): + return record(**args) + else: + return report(**args) + + +if __name__ == "__main__": + import argparse + import sys + + # bit of a hack, but parse_intermixed_args and REMAINDER are + # incompatible, so we need to figure out what we want before running + # argparse + if '-R' in sys.argv or '--record' in sys.argv: + nargs = argparse.REMAINDER + else: + nargs = '*' + + argparse.ArgumentParser._handle_conflict_ignore = lambda *_: None + argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None + parser = argparse.ArgumentParser( + description="Aggregate and report Linux perf results.", + allow_abbrev=False, + conflict_handler='ignore') + parser.add_argument( + 'perf_paths', + nargs=nargs, + help="Description of where to find *.perf files. May be a directory " + "or a list of paths. Defaults to %r." % PERF_PATHS) + parser.add_argument( + '-v', '--verbose', + action='store_true', + help="Output commands that run behind the scenes.") + parser.add_argument( + '-q', '--quiet', + action='store_true', + help="Don't show anything, useful with -o.") + parser.add_argument( + '-o', '--output', + help="Specify CSV file to store results.") + parser.add_argument( + '-u', '--use', + help="Don't parse anything, use this CSV file.") + parser.add_argument( + '-d', '--diff', + help="Specify CSV file to diff against.") + parser.add_argument( + '-a', '--all', + action='store_true', + help="Show all, not just the ones that changed.") + parser.add_argument( + '-p', '--percent', + action='store_true', + help="Only show percentage change, not a full diff.") + parser.add_argument( + '-b', '--by', + action='append', + choices=PerfResult._by, + help="Group by this field.") + parser.add_argument( + '-f', '--field', + dest='fields', + action='append', + choices=PerfResult._fields, + help="Show this field.") + parser.add_argument( + '-D', '--define', + dest='defines', + action='append', + type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)), + help="Only include results where this field is this value.") + class AppendSort(argparse.Action): + def __call__(self, parser, namespace, value, option): + if namespace.sort is None: + namespace.sort = [] + namespace.sort.append((value, True if option == '-S' else False)) + parser.add_argument( + '-s', '--sort', + action=AppendSort, + help="Sort by this fields.") + parser.add_argument( + '-S', '--reverse-sort', + action=AppendSort, + help="Sort by this fields, but backwards.") + parser.add_argument( + '-Y', '--summary', + action='store_true', + help="Only show the total.") + parser.add_argument( + '-F', '--source', + dest='sources', + action='append', + help="Only consider definitions in this file. Defaults to anything " + "in the current directory.") + parser.add_argument( + '--everything', + action='store_true', + help="Include builtin and libc specific symbols.") + parser.add_argument( + '--self', + action='store_true', + help="Show samples before propagation up the call-chain.") + parser.add_argument( + '--branches', + action='store_true', + help="Show branches and branch misses.") + parser.add_argument( + '--caches', + action='store_true', + help="Show cache accesses and cache misses.") + parser.add_argument( + '-A', '--annotate', + action='store_true', + help="Show source files annotated with coverage info.") + parser.add_argument( + '-T', '--threshold', + nargs='?', + type=lambda x: tuple(float(x) for x in x.split(',')), + const=THRESHOLD, + help="Show lines wth samples above this threshold as a percent of " + "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD)) + parser.add_argument( + '-c', '--context', + type=lambda x: int(x, 0), + default=3, + help="Show n additional lines of context. Defaults to 3.") + parser.add_argument( + '-W', '--width', + type=lambda x: int(x, 0), + default=80, + help="Assume source is styled with this many columns. Defaults to 80.") + parser.add_argument( + '--color', + choices=['never', 'always', 'auto'], + default='auto', + help="When to use terminal colors. Defaults to 'auto'.") + parser.add_argument( + '-j', '--jobs', + nargs='?', + type=lambda x: int(x, 0), + const=0, + help="Number of processes to use. 0 spawns one process per core.") + parser.add_argument( + '--perf-tool', + type=lambda x: x.split(), + help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL) + parser.add_argument( + '--objdump-tool', + type=lambda x: x.split(), + default=OBJDUMP_TOOL, + help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL) + + # record flags + record_parser = parser.add_argument_group('record options') + record_parser.add_argument( + 'command', + nargs=nargs, + help="Command to run.") + record_parser.add_argument( + '-R', '--record', + action='store_true', + help="Run a command and aggregate perf measurements.") + record_parser.add_argument( + '-o', '--output', + help="Output file. Uses flock to synchronize. This is stored as a " + "zip-file of multiple perf results.") + record_parser.add_argument( + '--perf-freq', + help="perf sampling frequency. This is passed directly to perf. " + "Defaults to %r." % PERF_FREQ) + record_parser.add_argument( + '--perf-period', + help="perf sampling period. This is passed directly to perf.") + record_parser.add_argument( + '--perf-events', + help="perf events to record. This is passed directly to perf. " + "Defaults to %r." % PERF_EVENTS) + record_parser.add_argument( + '--perf-tool', + type=lambda x: x.split(), + help="Path to the perf tool to use. Defaults to %r." % PERF_TOOL) + + # avoid intermixed/REMAINDER conflict, see above + if nargs == argparse.REMAINDER: + args = parser.parse_args() + else: + args = parser.parse_intermixed_args() + + # perf_paths/command overlap, so need to do some munging here + args.command = args.perf_paths + args.perf_paths = args.perf_paths or PERF_PATHS + + sys.exit(main(**{k: v + for k, v in vars(args).items() + if v is not None})) diff --git a/scripts/plot.py b/scripts/plot.py index 15ec846..52faf81 100755 --- a/scripts/plot.py +++ b/scripts/plot.py @@ -727,7 +727,8 @@ if __name__ == "__main__": import sys import argparse parser = argparse.ArgumentParser( - description="Plot CSV files in terminal.") + description="Plot CSV files in terminal.", + allow_abbrev=False) parser.add_argument( 'csv_paths', nargs='*', diff --git a/scripts/prettyasserts.py b/scripts/prettyasserts.py index 73a43a1..f69c5ca 100755 --- a/scripts/prettyasserts.py +++ b/scripts/prettyasserts.py @@ -424,7 +424,8 @@ if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( - description="Preprocessor that makes asserts easier to debug.") + description="Preprocessor that makes asserts easier to debug.", + allow_abbrev=False) parser.add_argument( 'input', help="Input C file.") diff --git a/scripts/stack.py b/scripts/stack.py index 6cb20ff..4da0f13 100755 --- a/scripts/stack.py +++ b/scripts/stack.py @@ -4,7 +4,7 @@ # report as infinite stack usage. # # Example: -# ./scripts/stack.py lfs.ci lfs_util.ci -S +# ./scripts/stack.py lfs.ci lfs_util.ci -Slimit # # Copyright (c) 2022, The littlefs authors. # SPDX-License-Identifier: BSD-3-Clause @@ -131,6 +131,7 @@ def openio(path, mode='r'): return open(path, mode) def collect(paths, *, + sources=None, everything=False, **args): # parse the vcg format @@ -181,8 +182,9 @@ def collect(paths, *, if (not args.get('quiet') and 'static' not in type and 'bounded' not in type): - print("warning: found non-static stack for %s (%s)" - % (function, type, size)) + print("warning: " + "found non-static stack for %s (%s, %s)" % ( + function, type, size)) _, _, _, targets = callgraph[info['title']] callgraph[info['title']] = ( file, function, int(size), targets) @@ -193,11 +195,48 @@ def collect(paths, *, else: continue + callgraph_ = co.defaultdict(lambda: (None, None, 0, set())) + for source, (s_file, s_function, frame, targets) in callgraph.items(): + # discard internal functions + if not everything and s_function.startswith('__'): + continue + # ignore filtered sources + if sources is not None: + if not any( + os.path.abspath(s_file) == os.path.abspath(s) + for s in sources): + continue + else: + # default to only cwd + if not everything and not os.path.commonpath([ + os.getcwd(), + os.path.abspath(s_file)]) == os.getcwd(): + continue + + # smiplify path + if os.path.commonpath([ + os.getcwd(), + os.path.abspath(s_file)]) == os.getcwd(): + s_file = os.path.relpath(s_file) + else: + s_file = os.path.abspath(s_file) + + callgraph_[source] = (s_file, s_function, frame, targets) + callgraph = callgraph_ + if not everything: - for source, (s_file, s_function, _, _) in list(callgraph.items()): + callgraph_ = co.defaultdict(lambda: (None, None, 0, set())) + for source, (s_file, s_function, frame, targets) in callgraph.items(): + # discard filtered sources + if sources is not None and not any( + os.path.abspath(s_file) == os.path.abspath(s) + for s in sources): + continue # discard internal functions - if s_file.startswith('<') or s_file.startswith('/usr/include'): - del callgraph[source] + if s_function.startswith('__'): + continue + callgraph_[source] = (s_file, s_function, frame, targets) + callgraph = callgraph_ # find maximum stack size recursively, this requires also detecting cycles # (in case of recursion) @@ -278,7 +317,7 @@ def table(Result, results, diff_results=None, *, all=False, percent=False, tree=False, - depth=None, + depth=1, **_): all_, all = all, __builtins__.all @@ -467,15 +506,8 @@ def table(Result, results, diff_results=None, *, # adjust the name width based on the expected call depth, though # note this doesn't really work with unbounded recursion if not summary: - # it doesn't really make sense to not have a depth with tree, - # so assume depth=inf if tree by default - if depth is None: - depth = m.inf if tree else 0 - elif depth == 0: - depth = m.inf - if not m.isinf(depth): - widths[0] += 4*depth + widths[0] += 4*(depth-1) # print our table with optional call info # @@ -528,7 +560,7 @@ def table(Result, results, diff_results=None, *, prefixes[2+is_last] + "'-> ", prefixes[2+is_last] + "| ", prefixes[2+is_last] + " ")) - recurse(names, depth) + recurse(names, depth-1) if not tree: print('%-*s %s%s' % ( @@ -544,6 +576,13 @@ def main(ci_paths, defines=None, sort=None, **args): + # it doesn't really make sense to not have a depth with tree, + # so assume depth=inf if tree by default + if args.get('depth') is None: + args['depth'] = m.inf if args['tree'] else 1 + elif args.get('depth') == 0: + args['depth'] = m.inf + # find sizes if not args.get('use', None): # find .ci files @@ -588,13 +627,16 @@ def main(ci_paths, # write results to CSV if args.get('output'): with openio(args['output'], 'w') as f: - writer = csv.DictWriter(f, StackResult._by + writer = csv.DictWriter(f, + (by if by is not None else StackResult._by) + ['stack_'+k for k in StackResult._fields]) writer.writeheader() for r in results: writer.writerow( - {k: getattr(r, k) for k in StackResult._by} - | {'stack_'+k: getattr(r, k) for k in StackResult._fields}) + {k: getattr(r, k) + for k in (by if by is not None else StackResult._by)} + | {'stack_'+k: getattr(r, k) + for k in StackResult._fields}) # find previous results? if args.get('diff'): @@ -636,7 +678,8 @@ if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( - description="Find stack usage at the function level.") + description="Find stack usage at the function level.", + allow_abbrev=False) parser.add_argument( 'ci_paths', nargs='*', @@ -703,7 +746,13 @@ if __name__ == "__main__": action='store_true', help="Only show the total.") parser.add_argument( - '-A', '--everything', + '-F', '--source', + dest='sources', + action='append', + help="Only consider definitions in this file. Defaults to anything " + "in the current directory.") + parser.add_argument( + '--everything', action='store_true', help="Include builtin and libc specific symbols.") parser.add_argument( @@ -711,20 +760,16 @@ if __name__ == "__main__": action='store_true', help="Only show the function call tree.") parser.add_argument( - '-L', '--depth', + '-Z', '--depth', nargs='?', type=lambda x: int(x, 0), const=0, - help="Depth of function calls to show. 0 show all calls but may not " + help="Depth of function calls to show. 0 shows all calls but may not " "terminate!") parser.add_argument( '-e', '--error-on-recursion', action='store_true', help="Error if any functions are recursive.") - parser.add_argument( - '--build-dir', - help="Specify the relative build directory. Used to map object files " - "to the correct source files.") sys.exit(main(**{k: v for k, v in vars(parser.parse_intermixed_args()).items() if v is not None})) diff --git a/scripts/struct_.py b/scripts/struct_.py index ed6584c..76a77ba 100755 --- a/scripts/struct_.py +++ b/scripts/struct_.py @@ -3,7 +3,7 @@ # Script to find struct sizes. # # Example: -# ./scripts/struct_.py lfs.o lfs_util.o -S +# ./scripts/struct_.py lfs.o lfs_util.o -Ssize # # Copyright (c) 2022, The littlefs authors. # SPDX-License-Identifier: BSD-3-Clause @@ -11,6 +11,7 @@ import collections as co import csv +import difflib import glob import itertools as it import math as m @@ -128,26 +129,28 @@ def openio(path, mode='r'): else: return open(path, mode) -def collect(paths, *, +def collect(obj_paths, *, objdump_tool=OBJDUMP_TOOL, - build_dir=None, + sources=None, everything=False, + internal=False, **args): - decl_pattern = re.compile( - '^\s+(?P<no>[0-9]+)' - '\s+(?P<dir>[0-9]+)' - '\s+.*' - '\s+(?P<file>[^\s]+)$') - struct_pattern = re.compile( - '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*' + line_pattern = re.compile( + '^\s+(?P<no>[0-9]+)\s+' + '(?:(?P<dir>[0-9]+)\s+)?' + '.*\s+' + '(?P<path>[^\s]+)$') + info_pattern = re.compile( + '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*' '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*' - '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*' + '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*' '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$') results = [] - for path in paths: - # find decl, we want to filter by structs in .h files - decls = {} + for path in obj_paths: + # find files, we want to filter by structs in .h files + dirs = {} + files = {} # note objdump-tool may contain extra args cmd = objdump_tool + ['--dwarf=rawline', path] if args.get('verbose'): @@ -156,12 +159,26 @@ def collect(paths, *, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, - errors='replace') + errors='replace', + close_fds=False) for line in proc.stdout: - # find file numbers - m = decl_pattern.match(line) + # note that files contain references to dirs, which we + # dereference as soon as we see them as each file table follows a + # dir table + m = line_pattern.match(line) if m: - decls[int(m.group('no'))] = m.group('file') + if not m.group('dir'): + # found a directory entry + dirs[int(m.group('no'))] = m.group('path') + else: + # found a file entry + dir = int(m.group('dir')) + if dir in dirs: + files[int(m.group('no'))] = os.path.join( + dirs[dir], + m.group('path')) + else: + files[int(m.group('no'))] = m.group('path') proc.wait() if proc.returncode != 0: if not args.get('verbose'): @@ -170,11 +187,11 @@ def collect(paths, *, sys.exit(-1) # collect structs as we parse dwarf info - found = False - name = None - decl = None - size = None - + results_ = [] + is_struct = False + s_name = None + s_file = None + s_size = None # note objdump-tool may contain extra args cmd = objdump_tool + ['--dwarf=info', path] if args.get('verbose'): @@ -183,38 +200,23 @@ def collect(paths, *, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, universal_newlines=True, - errors='replace') + errors='replace', + close_fds=False) for line in proc.stdout: # state machine here to find structs - m = struct_pattern.match(line) + m = info_pattern.match(line) if m: if m.group('tag'): - if (name is not None - and decl is not None - and size is not None): - file = decls.get(decl, '?') - # map to source file - file = re.sub('\.o$', '.c', file) - if build_dir: - file = re.sub( - '%s/*' % re.escape(build_dir), '', - file) - # only include structs declared in header files in the - # current directory, ignore internal-only structs ( - # these are represented in other measurements) - if everything or file.endswith('.h'): - results.append(StructResult(file, name, size)) - - found = (m.group('tag') == 'structure_type') - name = None - decl = None - size = None - elif found and m.group('name'): - name = m.group('name') - elif found and name and m.group('decl'): - decl = int(m.group('decl')) - elif found and name and m.group('size'): - size = int(m.group('size')) + if is_struct: + file = files.get(s_file, '?') + results_.append(StructResult(file, s_name, s_size)) + is_struct = (m.group('tag') == 'DW_TAG_structure_type') + elif m.group('name'): + s_name = m.group('name') + elif m.group('file'): + s_file = int(m.group('file')) + elif m.group('size'): + s_size = int(m.group('size')) proc.wait() if proc.returncode != 0: if not args.get('verbose'): @@ -222,6 +224,34 @@ def collect(paths, *, sys.stdout.write(line) sys.exit(-1) + for r in results_: + # ignore filtered sources + if sources is not None: + if not any( + os.path.abspath(r.file) == os.path.abspath(s) + for s in sources): + continue + else: + # default to only cwd + if not everything and not os.path.commonpath([ + os.getcwd(), + os.path.abspath(r.file)]) == os.getcwd(): + continue + + # limit to .h files unless --internal + if not internal and not r.file.endswith('.h'): + continue + + # simplify path + if os.path.commonpath([ + os.getcwd(), + os.path.abspath(r.file)]) == os.getcwd(): + file = os.path.relpath(r.file) + else: + file = os.path.abspath(r.file) + + results.append(StructResult(r.file, r.struct, r.size)) + return results @@ -479,7 +509,7 @@ def main(obj_paths, *, paths.append(path) if not paths: - print("error: no .obj files found in %r?" % obj_paths) + print("error: no .o files found in %r?" % obj_paths) sys.exit(-1) results = collect(paths, **args) @@ -513,12 +543,14 @@ def main(obj_paths, *, # write results to CSV if args.get('output'): with openio(args['output'], 'w') as f: - writer = csv.DictWriter(f, StructResult._by + writer = csv.DictWriter(f, + (by if by is not None else StructResult._by) + ['struct_'+k for k in StructResult._fields]) writer.writeheader() for r in results: writer.writerow( - {k: getattr(r, k) for k in StructResult._by} + {k: getattr(r, k) + for k in (by if by is not None else StructResult._by)} | {'struct_'+k: getattr(r, k) for k in StructResult._fields}) @@ -559,7 +591,8 @@ if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( - description="Find struct sizes.") + description="Find struct sizes.", + allow_abbrev=False) parser.add_argument( 'obj_paths', nargs='*', @@ -626,18 +659,24 @@ if __name__ == "__main__": action='store_true', help="Only show the total.") parser.add_argument( - '-A', '--everything', + '-F', '--source', + dest='sources', + action='append', + help="Only consider definitions in this file. Defaults to anything " + "in the current directory.") + parser.add_argument( + '--everything', action='store_true', help="Include builtin and libc specific symbols.") parser.add_argument( + '--internal', + action='store_true', + help="Also show structs in .c files.") + parser.add_argument( '--objdump-tool', type=lambda x: x.split(), default=OBJDUMP_TOOL, help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL) - parser.add_argument( - '--build-dir', - help="Specify the relative build directory. Used to map object files " - "to the correct source files.") sys.exit(main(**{k: v for k, v in vars(parser.parse_intermixed_args()).items() if v is not None})) diff --git a/scripts/summary.py b/scripts/summary.py index 9a13703..ba7fd40 100755 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -750,7 +750,8 @@ if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( - description="Summarize measurements in CSV files.") + description="Summarize measurements in CSV files.", + allow_abbrev=False) parser.add_argument( 'csv_paths', nargs='*', diff --git a/scripts/tailpipe.py b/scripts/tailpipe.py index 7e8c454..69e6455 100755 --- a/scripts/tailpipe.py +++ b/scripts/tailpipe.py @@ -115,7 +115,8 @@ if __name__ == "__main__": import sys import argparse parser = argparse.ArgumentParser( - description="Efficiently displays the last n lines of a file/pipe.") + description="Efficiently displays the last n lines of a file/pipe.", + allow_abbrev=False) parser.add_argument( 'path', nargs='?', diff --git a/scripts/test.py b/scripts/test.py index eaab3e3..358f0e8 100755 --- a/scripts/test.py +++ b/scripts/test.py @@ -27,9 +27,13 @@ import time import toml -RUNNER_PATH = 'runners/test_runner' +RUNNER_PATH = './runners/test_runner' HEADER_PATH = 'runners/test_runner.h' +GDB_TOOL = ['gdb'] +VALGRIND_TOOL = ['valgrind'] +PERF_SCRIPT = ['./scripts/perf.py'] + def openio(path, mode='r', buffering=-1, nb=False): if path == '-': @@ -516,12 +520,25 @@ def find_runner(runner, **args): # run under valgrind? if args.get('valgrind'): - cmd[:0] = filter(None, [ - 'valgrind', + cmd[:0] = args['valgrind_tool'] + [ '--leak-check=full', '--track-origins=yes', '--error-exitcode=4', - '-q']) + '-q'] + + # run under perf? + if args.get('perf'): + cmd[:0] = args['perf_script'] + list(filter(None, [ + '-R', + '--perf-freq=%s' % args['perf_freq'] + if args.get('perf_freq') else None, + '--perf-period=%s' % args['perf_period'] + if args.get('perf_period') else None, + '--perf-events=%s' % args['perf_events'] + if args.get('perf_events') else None, + '--perf-tool=%s' % args['perf_tool'] + if args.get('perf_tool') else None, + '-o%s' % args['perf']])) # other context if args.get('geometry'): @@ -799,9 +816,9 @@ def run_stage(name, runner_, ids, output_, **args): try: line = mpty.readline() except OSError as e: - if e.errno == errno.EIO: - break - raise + if e.errno != errno.EIO: + raise + break if not line: break last_stdout.append(line) @@ -1126,24 +1143,24 @@ def run(runner, test_ids=[], **args): cmd = runner_ + [failure.id] if args.get('gdb_main'): - cmd[:0] = ['gdb', + cmd[:0] = args['gdb_tool'] + [ '-ex', 'break main', '-ex', 'run', '--args'] elif args.get('gdb_case'): path, lineno = find_path(runner_, failure.id, **args) - cmd[:0] = ['gdb', + cmd[:0] = args['gdb_tool'] + [ '-ex', 'break %s:%d' % (path, lineno), '-ex', 'run', '--args'] elif failure.assert_ is not None: - cmd[:0] = ['gdb', + cmd[:0] = args['gdb_tool'] + [ '-ex', 'run', '-ex', 'frame function raise', '-ex', 'up 2', '--args'] else: - cmd[:0] = ['gdb', + cmd[:0] = args['gdb_tool'] + [ '-ex', 'run', '--args'] @@ -1188,6 +1205,7 @@ if __name__ == "__main__": argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None parser = argparse.ArgumentParser( description="Build and run tests.", + allow_abbrev=False, conflict_handler='ignore') parser.add_argument( '-v', '--verbose', @@ -1324,6 +1342,11 @@ if __name__ == "__main__": help="Drop into gdb on test failure but stop at the beginning " "of main.") test_parser.add_argument( + '--gdb-tool', + type=lambda x: x.split(), + default=GDB_TOOL, + help="Path to gdb tool to use. Defaults to %r." % GDB_TOOL) + test_parser.add_argument( '--exec', type=lambda e: e.split(), help="Run under another executable.") @@ -1332,6 +1355,37 @@ if __name__ == "__main__": action='store_true', help="Run under Valgrind to find memory errors. Implicitly sets " "--isolate.") + test_parser.add_argument( + '--valgrind-tool', + type=lambda x: x.split(), + default=VALGRIND_TOOL, + help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL) + test_parser.add_argument( + '--perf', + help="Run under Linux's perf to sample performance counters, writing " + "samples to this file.") + test_parser.add_argument( + '--perf-freq', + help="perf sampling frequency. This is passed directly to the perf " + "script.") + test_parser.add_argument( + '--perf-period', + help="perf sampling period. This is passed directly to the perf " + "script.") + test_parser.add_argument( + '--perf-events', + help="perf events to record. This is passed directly to the perf " + "script.") + test_parser.add_argument( + '--perf-script', + type=lambda x: x.split(), + default=PERF_SCRIPT, + help="Path to the perf script to use. Defaults to %r." % PERF_SCRIPT) + test_parser.add_argument( + '--perf-tool', + type=lambda x: x.split(), + help="Path to the perf tool to use. This is passed directly to the " + "perf script") # compilation flags comp_parser = parser.add_argument_group('compilation options') @@ -1356,7 +1410,7 @@ if __name__ == "__main__": '-o', '--output', help="Output file.") - # runner + test_ids overlaps test_paths, so we need to do some munging here + # runner/test_paths overlap, so need to do some munging here args = parser.parse_intermixed_args() args.test_paths = [' '.join(args.runner or [])] + args.test_ids args.runner = args.runner or [RUNNER_PATH] diff --git a/scripts/tracebd.py b/scripts/tracebd.py index d69c132..556cd0d 100755 --- a/scripts/tracebd.py +++ b/scripts/tracebd.py @@ -830,7 +830,8 @@ if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Display operations on block devices based on " - "trace output.") + "trace output.", + allow_abbrev=False) parser.add_argument( 'path', nargs='?', |