From 5a9e65af7c76e038daa42dff09b9cdb529b87939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20H=C3=B6rist?= Date: Sun, 17 Sep 2023 20:17:09 +0200 Subject: imprv: JID: Improve compliance when generating IRIs --- nbxmpp/xmppiri.py | 116 ++++++++++++++++++++++++++++++++---------- test/unit/test_jid_parsing.py | 1 + 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/nbxmpp/xmppiri.py b/nbxmpp/xmppiri.py index 18af188..bc0035e 100644 --- a/nbxmpp/xmppiri.py +++ b/nbxmpp/xmppiri.py @@ -1,57 +1,117 @@ +from __future__ import annotations +from typing import Callable + +import functools import re -from gi.repository import GLib # https://www.rfc-editor.org/rfc/rfc3987 - -ucschar = r'\xA0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'\ - r'\U00010000-\U0001FFFD\U00020000-\U0002FFFD\U00030000-\U0003FFFD'\ - r'\U00040000-\U0004FFFD\U00050000-\U0005FFFD\U00060000-\U0006FFFD'\ - r'\U00070000-\U0007FFFD\U00080000-\U0008FFFD\U00090000-\U0009FFFD'\ - r'\U000A0000-\U000AFFFD\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD'\ - r'\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD' -unreserved = r'A-Za-z0-9\-._~' -iunreserved = fr'{unreserved}{ucschar}' -subdelims = r"!$&'()*+,;=" +ucschar = ( + '\xA0-\uD7FF' + '\uF900-\uFDCF' + '\uFDF0-\uFFEF' + '\U00010000-\U0001FFFD' + '\U00020000-\U0002FFFD' + '\U00030000-\U0003FFFD' + '\U00040000-\U0004FFFD' + '\U00050000-\U0005FFFD' + '\U00060000-\U0006FFFD' + '\U00070000-\U0007FFFD' + '\U00080000-\U0008FFFD' + '\U00090000-\U0009FFFD' + '\U000A0000-\U000AFFFD' + '\U000B0000-\U000BFFFD' + '\U000C0000-\U000CFFFD' + '\U000D0000-\U000DFFFD' + '\U000E1000-\U000EFFFD' +) + +ALPHA = 'A-Za-z' +DIGIT = '0-9' +unreserved = fr'{ALPHA}{DIGIT}\-\._\~' +subdelims = "!$&'()*+,;=" +iunreserved = f'{unreserved}{ucschar}' +ipchar = f'{iunreserved}{re.escape(subdelims)}:@' +ifragment = fr'{ipchar}/\?' # https://www.rfc-editor.org/rfc/rfc5122.html#section-2.2 -nodeallow = r"!$()*+,;=" -resallow = r"!$&'()*+,:;=" +nodeallow = r"!$()*+,;=" +resallow = r"!$&'()*+,:;=" +inode = f'{iunreserved}{re.escape(nodeallow)}' +ires = f'{iunreserved}{re.escape(resallow)}' +ivalue = f'{iunreserved}' + +rx_iunreserved = re.compile(f'[{iunreserved}]*') +rx_inode = re.compile(f'[{inode}]') +rx_ires = re.compile(f'[{ires}]') +rx_ikey = rx_iunreserved +rx_iquerytype = rx_iunreserved +rx_ivalue = rx_iunreserved +rx_ifragment = re.compile(f'[{ifragment}]') + + +class _Quoter(dict[str, str]): + """A mapping from a string to its percent encoded form. + + Mapping is only done if string is not in safe range. -# ifragment without iunreserved and pct-encoded -reserved_chars_allowed_in_ifragment = subdelims + ":@" + "/?" + Keeps a cache internally, via __missing__, for efficiency (lookups + of cached keys don't call Python code at all). + """ + def __init__(self, safe: re.Pattern[str]) -> None: + self._safe = safe -rx_ikey = f'[{iunreserved}]*' -rx_iquerytype = f'[{iunreserved}]*' + def __repr__(self): + return f"" + + def __missing__(self, b: str): + if len(b) != 1: + raise ValueError("String must be exactly one character long") + + if self._safe.fullmatch(b) is None: + res = "".join(['%{:02X}'.format(i) for i in b.encode()]) + else: + res = b + self[b] = res + return res + + +@functools.lru_cache +def _quoter_factory(safe: re.Pattern[str]) -> Callable[[str], str]: + return _Quoter(safe).__getitem__ def validate_ikey(ikey: str) -> str: - res = re.fullmatch(rx_ikey, ikey) + res = rx_ikey.fullmatch(ikey) if res is None: raise ValueError('Not allowed characters in key') return ikey def validate_querytype(querytype: str) -> str: - res = re.fullmatch(rx_iquerytype, querytype) + res = rx_iquerytype.fullmatch(querytype) if res is None: raise ValueError('Not allowed characters in querytype') return querytype -def escape_ifragment(ifragment: str) -> str: - return GLib.Uri.escape_string( - ifragment, reserved_chars_allowed_in_ifragment, True) +def _escape(string: str, pattern: re.Pattern[str]) -> str: + quoter = _quoter_factory(safe=pattern) + return ''.join([quoter(c) for c in string]) + + +def escape_ifragment(fragment: str) -> str: + return _escape(fragment, rx_ifragment) -def escape_ivalue(ivalue: str) -> str: - return GLib.Uri.escape_string(ivalue, None, True) +def escape_ivalue(value: str) -> str: + return _escape(value, rx_ivalue) -def escape_inode(inode: str) -> str: - return GLib.Uri.escape_string(inode, nodeallow, True) +def escape_inode(node: str) -> str: + return _escape(node, rx_inode) -def escape_ires(ires: str) -> str: - return GLib.Uri.escape_string(ires, resallow, True) +def escape_ires(res: str) -> str: + return _escape(res, rx_ires) diff --git a/test/unit/test_jid_parsing.py b/test/unit/test_jid_parsing.py index ae59ab4..7199595 100644 --- a/test/unit/test_jid_parsing.py +++ b/test/unit/test_jid_parsing.py @@ -194,6 +194,7 @@ class JIDParsing(unittest.TestCase): tests = [ ('nasty!#$%()*+,-.;=?[\\]^_`{|}~node@example.com', 'xmpp:nasty!%23$%25()*+,-.;=%3F%5B%5C%5D%5E_%60%7B%7C%7D~node@example.com'), ('node@example.com/repulsive !#"$%&\'()*+,-./:;<=>?@[\\]^_`{|}~resource', 'xmpp:node@example.com/repulsive%20!%23%22$%25&\'()*+,-.%2F:;%3C=%3E%3F%40%5B%5C%5D%5E_%60%7B%7C%7D~resource'), + ('jiři@čechy.example/v Praze', 'xmpp:jiři@čechy.example/v%20Praze') ] for jid, iri in tests: -- cgit v1.2.3