#!/usr/bin/env python3 from typing import NamedTuple import argparse import csv import io import logging import operator import re import sys from datetime import datetime from datetime import timezone from pathlib import Path from urllib.request import urlopen SCHEMES_URL = 'https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv' COUNTRY_URL = 'https://data.iana.org/time-zones/data/iso3166.tab' ZONE_URL = 'https://data.iana.org/time-zones/data/zone.tab' BOILER_PLATE = '''# Generated by get_iana_data.py @ {current_date} from typing import NamedTuple class ZoneData(NamedTuple): key: str full_name: str short_name: str country_code: str country_name: str def get_zone_data(key: str) -> ZoneData: return ZoneData(*ZONE_DATA[key]) ''' logging.basicConfig(level='INFO', format='%(levelname)s: %(message)s') class ZoneData(NamedTuple): key: str full_name: str short_name: str country_code: str country_name: str def download_file(url: str) -> str: logging.info('Download: %s', url) with urlopen(url) as f: content = f.read() return content.decode() def parse_uri_schemes(content: str) -> list[str]: logging.info('Parse uri schemes') schemes: list[str] = [] reader = csv.reader(io.StringIO(content), delimiter=',') next(reader) # Skip header row for line in reader: scheme = line[0].lower().removesuffix(' (obsolete)') if not re.fullmatch('[a-z0-9+.-]+', scheme): logging.warning('unexpected scheme field contents: %s', scheme) continue schemes.append(scheme) return schemes def parse_zone_names(content: str) -> list[tuple[str, str]]: logging.info('Parse tz data') zones: list[tuple[str, str]] = [] data = csv.reader(io.StringIO(content), delimiter = '\t') for row in data: if not row or row[0].startswith('#'): continue zones.append((row[0], row[2])) zones.sort(key=operator.itemgetter(1)) return zones def parse_country_names(content: str) -> dict[str, str]: logging.info('Parse country data') countrys: dict[str, str] = {} data = csv.reader(io.StringIO(content), delimiter = '\t') for row in data: if not row or row[0].startswith('#'): continue countrys[row[0]] = row[1] return countrys def merge_zone_data( zones: list[tuple[str, str]], countries: dict[str, str] ) -> dict[str, ZoneData]: data: dict[str, ZoneData] = {} for country_code, key in zones: country_name = countries[country_code] full_name = key.replace('_', ' ') short_name = full_name.rsplit('/', maxsplit=1)[1] data[key] = ZoneData(key=key, full_name=full_name, short_name=short_name, country_code=country_code, country_name=country_name) return data def generate_output( schemes: list[str], zones: dict[str, ZoneData], outpath: Path) -> None: logging.info('Generate output') current_date = datetime.now(tz=timezone.utc).isoformat() content = BOILER_PLATE.format(current_date=current_date) content += 'ZONE_DATA = {\n' for key, zone_data in zones.items(): zone_data_tuple = ', '.join([repr(d) for d in zone_data]) content += f" '{key}': ({zone_data_tuple}),\n" content += '}\n\n\n' content += 'URI_SCHEMES = {\n' for scheme in schemes: content += f" '{scheme}',\n" content += '}\n' outpath.write_text(content) logging.info('Wrote file to %s', outpath) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Generate IANA data') parser.add_argument('out', help='Path to output file') args = parser.parse_args() outpath = Path(args.out) if outpath.is_dir(): sys.exit('Output path is a directory') scheme_content = download_file(SCHEMES_URL) schemes = parse_uri_schemes(scheme_content) country_file_data = download_file(COUNTRY_URL) zone_file_data = download_file(ZONE_URL) country_data = parse_country_names(country_file_data) zone_data = parse_zone_names(zone_file_data) zones = merge_zone_data(zone_data, country_data) generate_output(schemes, zones, outpath) logging.info('Finished !')