diff options
author | mjk <mjk@disroot.org> | 2022-11-14 19:40:27 +0300 |
---|---|---|
committer | mjk <mjk@disroot.org> | 2022-12-01 00:05:07 +0300 |
commit | b84ef377fedac48fb660406b04f2b9908d86baac (patch) | |
tree | 6f6c94a7855f525117bbf5471190d61365656233 /scripts | |
parent | 2f8986b3d3b4f83ed942b8f108b62bf9ddf49e53 (diff) |
imprv: Styling: Introduce URI scheme whitelisting
Fixes #11266
Co-authored-by: Philipp Hörist <philipp@hoerist.com>
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/get_iana_data.py | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/scripts/get_iana_data.py b/scripts/get_iana_data.py new file mode 100755 index 000000000..ec0e62a29 --- /dev/null +++ b/scripts/get_iana_data.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +import argparse +from datetime import datetime +from urllib.request import urlopen +import csv +import io +import logging +from pathlib import Path +import re +import sys + + +SCHEMES_URL = 'https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv' + +logging.basicConfig(level='INFO', format='%(levelname)s: %(message)s') + + +def download_file(url: str) -> str: + logging.info('Download: %s', url) + with urlopen(url) as f: + content = f.read() + return content.decode() + + +def parse_uri_schemes(content: str) -> list[str]: + logging.info('Parse uri schemes') + schemes: list[str] = [] + + reader = csv.reader(io.StringIO(content), delimiter=',') + next(reader) # Skip header row + for line in reader: + scheme = line[0].lower().removesuffix(' (obsolete)') + if not re.fullmatch('[a-z0-9+.-]+', scheme): + logging.warning('unexpected scheme field contents: %s', scheme) + continue + + schemes.append(scheme) + + return schemes + + +def generate_output(schemes: list[str], + out_path: Path): + + logging.info('Generate output') + current_date = datetime.utcnow().isoformat() + content = f'# Generated by get_iana_data.py @ {current_date}\n' + + content += '\n' + + content += 'URI_SCHEMES = {\n' + for scheme in schemes: + content += f" '{scheme}',\n" + content += '}\n' + + outpath.write_text(content) + logging.info('Wrote file to %s', outpath) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Generate IANA data') + parser.add_argument('out', help='Path to output file') + args = parser.parse_args() + + outpath = Path(args.out) + if outpath.is_dir(): + sys.exit('Output path is a directory') + + scheme_content = download_file(SCHEMES_URL) + schemes = parse_uri_schemes(scheme_content) + output = generate_output(schemes, + outpath) + logging.info('Finished !') |