Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/llvm/llvm-project.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Mayer <fmayer@google.com>2022-05-05 21:51:53 +0300
committerFlorian Mayer <fmayer@google.com>2022-05-06 02:41:11 +0300
commit6ca1df61d29c1c46d8d6f51a1091a7651c8b1ab1 (patch)
tree938464d185d33d6f24c002c003556f29204efeba /compiler-rt
parent4af9392e13a212fe295dc33455bc591b2dc8f859 (diff)
[HWASan] Clean up hwasan_symbolize.
The globals are better expressed as members of the Symbolizer, and all functions operating on it should be methods instead. Also using the standard idiom of wrapping the main code in `if __name__ == '__main__'`. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D125032
Diffstat (limited to 'compiler-rt')
-rwxr-xr-xcompiler-rt/lib/hwasan/scripts/hwasan_symbolize369
1 files changed, 186 insertions, 183 deletions
diff --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
index 3b457c202daf..7f36c3983a5e 100755
--- a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
+++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
@@ -31,9 +31,6 @@ if sys.version_info.major < 3:
import codecs
sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
-last_access_address = None
-last_access_tag = None
-
# Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
# and only parses what is necessary to find the build ids. It uses a memoryview
# into an mmap to avoid copying.
@@ -110,6 +107,8 @@ class Symbolizer:
self.__index = {}
self.__link_prefixes = []
self.__html = False
+ self.__last_access_address = None
+ self.__last_access_tag = None
def enable_html(self, enable):
self.__html = enable
@@ -268,147 +267,81 @@ class Symbolizer:
if bid is not None:
self.__index[bid] = filename
-def symbolize_line(line, symbolizer_path):
- #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
- match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
- r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
- if match:
- frameno = match.group(2)
- binary = match.group(5)
- addr = int(match.group(6), 16)
- buildid = match.group(7)
-
- frames = list(symbolizer.iter_call_stack(binary, buildid, addr))
-
- if len(frames) > 0:
- symbolizer.print(
- symbolizer.maybe_escape(
- "%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
- frames[0][0])
- ) + symbolizer.maybe_linkify(frames[0][1]),
- escape=False)
- for i in range(1, len(frames)):
- space1 = ' ' * match.end(1)
- space2 = ' ' * (match.start(4) - match.end(1) - 2)
- symbolizer.print(
- symbolizer.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
- + symbolizer.maybe_linkify(frames[i][1]), escape=False)
+ def symbolize_line(self, line):
+ #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
+ match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
+ r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
+ if match:
+ frameno = match.group(2)
+ binary = match.group(5)
+ addr = int(match.group(6), 16)
+ buildid = match.group(7)
+
+ frames = list(self.iter_call_stack(binary, buildid, addr))
+
+ if len(frames) > 0:
+ self.print(
+ self.maybe_escape(
+ "%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
+ frames[0][0])
+ ) + self.maybe_linkify(frames[0][1]),
+ escape=False)
+ for i in range(1, len(frames)):
+ space1 = ' ' * match.end(1)
+ space2 = ' ' * (match.start(4) - match.end(1) - 2)
+ self.print(
+ self.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
+ + self.maybe_linkify(frames[i][1]), escape=False)
+ else:
+ self.print(line.rstrip())
else:
- symbolizer.print(line.rstrip())
- else:
- symbolizer.print(line.rstrip())
-
-def save_access_address(line):
- global last_access_address, last_access_tag
- match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
- if match:
- last_access_address = int(match.group(2), 16)
- match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
- if match:
- last_access_tag = int(match.group(2), 16)
-
-def process_stack_history(line, symbolizer, ignore_tags=False):
- if last_access_address is None or last_access_tag is None:
- return
- if re.match(r'Previously allocated frames:', line, re.UNICODE):
- return True
- pc_mask = (1 << 48) - 1
- fp_mask = (1 << 20) - 1
- # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
- match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
- r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
- if match:
- record_addr = int(match.group(2), 16)
- record = int(match.group(3), 16)
- binary = match.group(4)
- addr = int(match.group(5), 16)
- buildid = match.group(6)
- base_tag = (record_addr >> 3) & 0xFF
- fp = (record >> 48) << 4
- pc = record & pc_mask
-
- for local in symbolizer.iter_locals(binary, addr, buildid):
- frame_offset = local[3]
- size = local[4]
- if frame_offset is None or size is None:
- continue
- obj_offset = (last_access_address - fp - frame_offset) & fp_mask
- if obj_offset >= size:
- continue
- tag_offset = local[5]
- if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
- continue
- symbolizer.print('')
- symbolizer.print('Potentially referenced stack object:')
- symbolizer.print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
- symbolizer.print(' at %s' % (local[1],))
- return True
- return False
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-d', action='store_true')
-parser.add_argument('-v', action='store_true')
-parser.add_argument('--ignore-tags', action='store_true')
-parser.add_argument('--symbols', action='append')
-parser.add_argument('--source', action='append')
-parser.add_argument('--index', action='store_true')
-parser.add_argument('--symbolizer')
-parser.add_argument('--linkify', type=str)
-parser.add_argument('--html', action='store_true')
-parser.add_argument('args', nargs=argparse.REMAINDER)
-args = parser.parse_args()
-
-# Unstripped binaries location.
-binary_prefixes = args.symbols or []
-if not binary_prefixes:
- if 'ANDROID_PRODUCT_OUT' in os.environ:
- product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
- binary_prefixes.append(product_out)
- binary_prefixes.append('/')
-
-for p in binary_prefixes:
- if not os.path.isdir(p):
- print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
- sys.exit(1)
-
-# Source location.
-paths_to_cut = args.source or []
-if not paths_to_cut:
- paths_to_cut.append(os.getcwd() + '/')
- if 'ANDROID_BUILD_TOP' in os.environ:
- paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
-
-# llvm-symbolizer binary.
-# 1. --symbolizer flag
-# 2. environment variable
-# 3. unsuffixed binary in the current directory
-# 4. if inside Android platform, prebuilt binary at a known path
-# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
-# highest available version in $PATH
-symbolizer_path = args.symbolizer
-if not symbolizer_path:
- if 'LLVM_SYMBOLIZER_PATH' in os.environ:
- symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
- elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
- symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
-
-if not symbolizer_path:
- s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
- if os.path.exists(s):
- symbolizer_path = s
-
-if not symbolizer_path:
- if 'ANDROID_BUILD_TOP' in os.environ:
- s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
- if os.path.exists(s):
- symbolizer_path = s
-
-if not symbolizer_path:
- for path in os.environ["PATH"].split(os.pathsep):
- p = os.path.join(path, 'llvm-symbolizer')
- if os.path.exists(p):
- symbolizer_path = p
- break
+ self.print(line.rstrip())
+
+ def save_access_address(self, line):
+ match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
+ if match:
+ self.__last_access_address = int(match.group(2), 16)
+ match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
+ if match:
+ self.__last_access_tag = int(match.group(2), 16)
+
+ def process_stack_history(self, line, ignore_tags=False):
+ if self.__last_access_address is None or self.__last_access_tag is None:
+ return
+ if re.match(r'Previously allocated frames:', line, re.UNICODE):
+ return True
+ pc_mask = (1 << 48) - 1
+ fp_mask = (1 << 20) - 1
+ # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
+ match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
+ r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
+ if match:
+ record_addr = int(match.group(2), 16)
+ record = int(match.group(3), 16)
+ binary = match.group(4)
+ addr = int(match.group(5), 16)
+ buildid = match.group(6)
+ base_tag = (record_addr >> 3) & 0xFF
+ fp = (record >> 48) << 4
+ pc = record & pc_mask
+
+ for local in self.iter_locals(binary, addr, buildid):
+ frame_offset = local[3]
+ size = local[4]
+ if frame_offset is None or size is None:
+ continue
+ obj_offset = (self.__last_access_address - fp - frame_offset) & fp_mask
+ if obj_offset >= size:
+ continue
+ tag_offset = local[5]
+ if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != self.__last_access_tag):
+ continue
+ self.print('')
+ self.print('Potentially referenced stack object:')
+ self.print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
+ self.print(' at %s' % (local[1],))
+ return True
+ return False
def extract_version(s):
idx = s.rfind('-')
@@ -417,44 +350,114 @@ def extract_version(s):
x = float(s[idx + 1:])
return x
-if not symbolizer_path:
- for path in os.environ["PATH"].split(os.pathsep):
- candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
- if len(candidates) > 0:
- candidates.sort(key = extract_version, reverse = True)
- symbolizer_path = candidates[0]
- break
-
-if not os.path.exists(symbolizer_path):
- print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
- sys.exit(1)
-
-if args.v:
- print("Looking for symbols in:")
- for s in binary_prefixes:
- print(" %s" % (s,))
- print("Stripping source path prefixes:")
- for s in paths_to_cut:
- print(" %s" % (s,))
- print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,))
- print()
-
-symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
-symbolizer.enable_html(args.html)
-symbolizer.enable_logging(args.d)
-if args.index:
- symbolizer.build_index()
-
-if args.linkify:
- if not args.html:
- print('Need --html to --linkify', file=sys.stderr)
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-d', action='store_true')
+ parser.add_argument('-v', action='store_true')
+ parser.add_argument('--ignore-tags', action='store_true')
+ parser.add_argument('--symbols', action='append')
+ parser.add_argument('--source', action='append')
+ parser.add_argument('--index', action='store_true')
+ parser.add_argument('--symbolizer')
+ parser.add_argument('--linkify', type=str)
+ parser.add_argument('--html', action='store_true')
+ parser.add_argument('args', nargs=argparse.REMAINDER)
+ args = parser.parse_args()
+
+ # Unstripped binaries location.
+ binary_prefixes = args.symbols or []
+ if not binary_prefixes:
+ if 'ANDROID_PRODUCT_OUT' in os.environ:
+ product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
+ binary_prefixes.append(product_out)
+ binary_prefixes.append('/')
+
+ for p in binary_prefixes:
+ if not os.path.isdir(p):
+ print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
+ sys.exit(1)
+
+ # Source location.
+ paths_to_cut = args.source or []
+ if not paths_to_cut:
+ paths_to_cut.append(os.getcwd() + '/')
+ if 'ANDROID_BUILD_TOP' in os.environ:
+ paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
+
+ # llvm-symbolizer binary.
+ # 1. --symbolizer flag
+ # 2. environment variable
+ # 3. unsuffixed binary in the current directory
+ # 4. if inside Android platform, prebuilt binary at a known path
+ # 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
+ # highest available version in $PATH
+ symbolizer_path = args.symbolizer
+ if not symbolizer_path:
+ if 'LLVM_SYMBOLIZER_PATH' in os.environ:
+ symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
+ elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
+ symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
+
+ if not symbolizer_path:
+ s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
+ if os.path.exists(s):
+ symbolizer_path = s
+
+ if not symbolizer_path:
+ if 'ANDROID_BUILD_TOP' in os.environ:
+ s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
+ if os.path.exists(s):
+ symbolizer_path = s
+
+ if not symbolizer_path:
+ for path in os.environ["PATH"].split(os.pathsep):
+ p = os.path.join(path, 'llvm-symbolizer')
+ if os.path.exists(p):
+ symbolizer_path = p
+ break
+
+ if not symbolizer_path:
+ for path in os.environ["PATH"].split(os.pathsep):
+ candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
+ if len(candidates) > 0:
+ candidates.sort(key = extract_version, reverse = True)
+ symbolizer_path = candidates[0]
+ break
+
+ if not os.path.exists(symbolizer_path):
+ print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
sys.exit(1)
- symbolizer.read_linkify(args.linkify)
-
-for line in sys.stdin:
- if sys.version_info.major < 3:
- line = line.decode('utf-8')
- save_access_address(line)
- if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
- continue
- symbolize_line(line, symbolizer_path)
+
+ if args.v:
+ print("Looking for symbols in:")
+ for s in binary_prefixes:
+ print(" %s" % (s,))
+ print("Stripping source path prefixes:")
+ for s in paths_to_cut:
+ print(" %s" % (s,))
+ print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,))
+ print()
+
+ symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
+ symbolizer.enable_html(args.html)
+ symbolizer.enable_logging(args.d)
+ if args.index:
+ symbolizer.build_index()
+
+ if args.linkify:
+ if not args.html:
+ print('Need --html to --linkify', file=sys.stderr)
+ sys.exit(1)
+ symbolizer.read_linkify(args.linkify)
+
+ for line in sys.stdin:
+ if sys.version_info.major < 3:
+ line = line.decode('utf-8')
+ symbolizer.save_access_address(line)
+ if symbolizer.process_stack_history(line, ignore_tags=args.ignore_tags):
+ continue
+ symbolizer.symbolize_line(line)
+
+
+if __name__ == '__main__':
+ main()