diff options
author | Philipp Hörist <philipp@hoerist.com> | 2023-08-30 23:03:23 +0300 |
---|---|---|
committer | Philipp Hörist <philipp@hoerist.com> | 2023-08-30 23:03:23 +0300 |
commit | 9bf767eab5efa4d838d3f7778c70fec807b513ee (patch) | |
tree | 94736a9621eac8b1c800f9b453b9ed4bf58cb9bf /scripts | |
parent | b684854fd6f06ef85898371c40fe2e35a50bc002 (diff) |
other: Scripts: Generate IANA timezone data
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/get_iana_data.py | 106 |
1 files changed, 101 insertions, 5 deletions
diff --git a/scripts/get_iana_data.py b/scripts/get_iana_data.py index d7e74cc95..4cdfc3434 100755 --- a/scripts/get_iana_data.py +++ b/scripts/get_iana_data.py @@ -1,9 +1,12 @@ #!/usr/bin/env python3 +from typing import NamedTuple + import argparse import csv import io import logging +import operator import re import sys from datetime import datetime @@ -11,10 +14,39 @@ from pathlib import Path from urllib.request import urlopen SCHEMES_URL = 'https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv' +COUNTRY_URL = 'https://data.iana.org/time-zones/data/iso3166.tab' +ZONE_URL = 'https://data.iana.org/time-zones/data/zone.tab' + +BOILER_PLATE = '''# Generated by get_iana_data.py @ {current_date} + +from typing import NamedTuple + + +class ZoneData(NamedTuple): + key: str + full_name: str + short_name: str + country_code: str + country_name: str + + +def get_zone_data(key: str) -> ZoneData: + return ZoneData(*ZONE_DATA[key]) + + +''' logging.basicConfig(level='INFO', format='%(levelname)s: %(message)s') +class ZoneData(NamedTuple): + key: str + full_name: str + short_name: str + country_code: str + country_name: str + + def download_file(url: str) -> str: logging.info('Download: %s', url) with urlopen(url) as f: @@ -39,14 +71,69 @@ def parse_uri_schemes(content: str) -> list[str]: return schemes -def generate_output(schemes: list[str], - outpath: Path) -> None: +def parse_zone_names(content: str) -> list[tuple[str, str]]: + logging.info('Parse tz data') + + zones: list[tuple[str, str]] = [] + + data = csv.reader(io.StringIO(content), delimiter = '\t') + for row in data: + if not row or row[0].startswith('#'): + continue + zones.append((row[0], row[2])) + + zones.sort(key=operator.itemgetter(1)) + return zones + + +def parse_country_names(content: str) -> dict[str, str]: + logging.info('Parse country data') + + countrys: dict[str, str] = {} + + data = csv.reader(io.StringIO(content), delimiter = '\t') + for row in data: + if not row or row[0].startswith('#'): + continue + countrys[row[0]] = row[1] + + return countrys + + +def merge_zone_data( + zones: list[tuple[str, str]], + countries: dict[str, str] +) -> dict[str, ZoneData]: + + data: dict[str, ZoneData] = {} + + for country_code, key in zones: + country_name = countries[country_code] + full_name = key.replace('_', ' ') + short_name = full_name.rsplit('/', maxsplit=1)[1] + data[key] = ZoneData(key=key, + full_name=full_name, + short_name=short_name, + country_code=country_code, + country_name=country_name) + + return data + + +def generate_output( + schemes: list[str], + zones: dict[str, ZoneData], + outpath: Path) -> None: logging.info('Generate output') current_date = datetime.utcnow().isoformat() - content = f'# Generated by get_iana_data.py @ {current_date}\n' + content = BOILER_PLATE.format(current_date=current_date) - content += '\n' + content += 'ZONE_DATA = {\n' + for key, zone_data in zones.items(): + zone_data_tuple = ', '.join([repr(d) for d in zone_data]) + content += f" '{key}': ({zone_data_tuple}),\n" + content += '}\n\n\n' content += 'URI_SCHEMES = {\n' for scheme in schemes: @@ -68,5 +155,14 @@ if __name__ == '__main__': scheme_content = download_file(SCHEMES_URL) schemes = parse_uri_schemes(scheme_content) - generate_output(schemes, outpath) + + country_file_data = download_file(COUNTRY_URL) + zone_file_data = download_file(ZONE_URL) + + country_data = parse_country_names(country_file_data) + zone_data = parse_zone_names(zone_file_data) + + zones = merge_zone_data(zone_data, country_data) + + generate_output(schemes, zones, outpath) logging.info('Finished !') |