Welcome to mirror list, hosted at ThFree Co, Russian Federation.

dev.gajim.org/gajim/python-nbxmpp.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hörist <philipp@hoerist.com>2023-09-17 21:17:09 +0300
committerPhilipp Hörist <philipp@hoerist.com>2023-09-17 21:17:09 +0300
commit5a9e65af7c76e038daa42dff09b9cdb529b87939 (patch)
treedf166243dc43222606879d43b6e16ed595871002
parent58e4c2758c783b8ffe01c4525c9c569d4393364d (diff)
imprv: JID: Improve compliance when generating IRIs
-rw-r--r--nbxmpp/xmppiri.py116
-rw-r--r--test/unit/test_jid_parsing.py1
2 files changed, 89 insertions, 28 deletions
diff --git a/nbxmpp/xmppiri.py b/nbxmpp/xmppiri.py
index 18af188..bc0035e 100644
--- a/nbxmpp/xmppiri.py
+++ b/nbxmpp/xmppiri.py
@@ -1,57 +1,117 @@
+from __future__ import annotations
+from typing import Callable
+
+import functools
import re
-from gi.repository import GLib
# https://www.rfc-editor.org/rfc/rfc3987
-
-ucschar = r'\xA0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'\
- r'\U00010000-\U0001FFFD\U00020000-\U0002FFFD\U00030000-\U0003FFFD'\
- r'\U00040000-\U0004FFFD\U00050000-\U0005FFFD\U00060000-\U0006FFFD'\
- r'\U00070000-\U0007FFFD\U00080000-\U0008FFFD\U00090000-\U0009FFFD'\
- r'\U000A0000-\U000AFFFD\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD'\
- r'\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD'
-unreserved = r'A-Za-z0-9\-._~'
-iunreserved = fr'{unreserved}{ucschar}'
-subdelims = r"!$&'()*+,;="
+ucschar = (
+ '\xA0-\uD7FF'
+ '\uF900-\uFDCF'
+ '\uFDF0-\uFFEF'
+ '\U00010000-\U0001FFFD'
+ '\U00020000-\U0002FFFD'
+ '\U00030000-\U0003FFFD'
+ '\U00040000-\U0004FFFD'
+ '\U00050000-\U0005FFFD'
+ '\U00060000-\U0006FFFD'
+ '\U00070000-\U0007FFFD'
+ '\U00080000-\U0008FFFD'
+ '\U00090000-\U0009FFFD'
+ '\U000A0000-\U000AFFFD'
+ '\U000B0000-\U000BFFFD'
+ '\U000C0000-\U000CFFFD'
+ '\U000D0000-\U000DFFFD'
+ '\U000E1000-\U000EFFFD'
+)
+
+ALPHA = 'A-Za-z'
+DIGIT = '0-9'
+unreserved = fr'{ALPHA}{DIGIT}\-\._\~'
+subdelims = "!$&'()*+,;="
+iunreserved = f'{unreserved}{ucschar}'
+ipchar = f'{iunreserved}{re.escape(subdelims)}:@'
+ifragment = fr'{ipchar}/\?'
# https://www.rfc-editor.org/rfc/rfc5122.html#section-2.2
-nodeallow = r"!$()*+,;="
-resallow = r"!$&'()*+,:;="
+nodeallow = r"!$()*+,;="
+resallow = r"!$&'()*+,:;="
+inode = f'{iunreserved}{re.escape(nodeallow)}'
+ires = f'{iunreserved}{re.escape(resallow)}'
+ivalue = f'{iunreserved}'
+
+rx_iunreserved = re.compile(f'[{iunreserved}]*')
+rx_inode = re.compile(f'[{inode}]')
+rx_ires = re.compile(f'[{ires}]')
+rx_ikey = rx_iunreserved
+rx_iquerytype = rx_iunreserved
+rx_ivalue = rx_iunreserved
+rx_ifragment = re.compile(f'[{ifragment}]')
+
+
+class _Quoter(dict[str, str]):
+ """A mapping from a string to its percent encoded form.
+
+ Mapping is only done if string is not in safe range.
-# ifragment without iunreserved and pct-encoded
-reserved_chars_allowed_in_ifragment = subdelims + ":@" + "/?"
+ Keeps a cache internally, via __missing__, for efficiency (lookups
+ of cached keys don't call Python code at all).
+ """
+ def __init__(self, safe: re.Pattern[str]) -> None:
+ self._safe = safe
-rx_ikey = f'[{iunreserved}]*'
-rx_iquerytype = f'[{iunreserved}]*'
+ def __repr__(self):
+ return f"<Quoter {dict(self)!r}>"
+
+ def __missing__(self, b: str):
+ if len(b) != 1:
+ raise ValueError("String must be exactly one character long")
+
+ if self._safe.fullmatch(b) is None:
+ res = "".join(['%{:02X}'.format(i) for i in b.encode()])
+ else:
+ res = b
+ self[b] = res
+ return res
+
+
+@functools.lru_cache
+def _quoter_factory(safe: re.Pattern[str]) -> Callable[[str], str]:
+ return _Quoter(safe).__getitem__
def validate_ikey(ikey: str) -> str:
- res = re.fullmatch(rx_ikey, ikey)
+ res = rx_ikey.fullmatch(ikey)
if res is None:
raise ValueError('Not allowed characters in key')
return ikey
def validate_querytype(querytype: str) -> str:
- res = re.fullmatch(rx_iquerytype, querytype)
+ res = rx_iquerytype.fullmatch(querytype)
if res is None:
raise ValueError('Not allowed characters in querytype')
return querytype
-def escape_ifragment(ifragment: str) -> str:
- return GLib.Uri.escape_string(
- ifragment, reserved_chars_allowed_in_ifragment, True)
+def _escape(string: str, pattern: re.Pattern[str]) -> str:
+ quoter = _quoter_factory(safe=pattern)
+ return ''.join([quoter(c) for c in string])
+
+
+def escape_ifragment(fragment: str) -> str:
+ return _escape(fragment, rx_ifragment)
-def escape_ivalue(ivalue: str) -> str:
- return GLib.Uri.escape_string(ivalue, None, True)
+def escape_ivalue(value: str) -> str:
+ return _escape(value, rx_ivalue)
-def escape_inode(inode: str) -> str:
- return GLib.Uri.escape_string(inode, nodeallow, True)
+def escape_inode(node: str) -> str:
+ return _escape(node, rx_inode)
-def escape_ires(ires: str) -> str:
- return GLib.Uri.escape_string(ires, resallow, True)
+def escape_ires(res: str) -> str:
+ return _escape(res, rx_ires)
diff --git a/test/unit/test_jid_parsing.py b/test/unit/test_jid_parsing.py
index ae59ab4..7199595 100644
--- a/test/unit/test_jid_parsing.py
+++ b/test/unit/test_jid_parsing.py
@@ -194,6 +194,7 @@ class JIDParsing(unittest.TestCase):
tests = [
('nasty!#$%()*+,-.;=?[\\]^_`{|}~node@example.com', 'xmpp:nasty!%23$%25()*+,-.;=%3F%5B%5C%5D%5E_%60%7B%7C%7D~node@example.com'),
('node@example.com/repulsive !#"$%&\'()*+,-./:;<=>?@[\\]^_`{|}~resource', 'xmpp:node@example.com/repulsive%20!%23%22$%25&\'()*+,-.%2F:;%3C=%3E%3F%40%5B%5C%5D%5E_%60%7B%7C%7D~resource'),
+ ('jiři@čechy.example/v Praze', 'xmpp:jiři@čechy.example/v%20Praze')
]
for jid, iri in tests: