From 4ff3d8eccf8a934c74e44578b44c56a1c4b59542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20H=C3=B6rist?= Date: Sun, 17 Sep 2023 21:21:43 +0200 Subject: feat: JID: Add JID.from_iri() --- nbxmpp/protocol.py | 63 +++++++++++++++++++++++++++++++------------ nbxmpp/xmppiri.py | 17 ++++++++++++ test/unit/test_jid_parsing.py | 11 ++++++++ 3 files changed, 74 insertions(+), 17 deletions(-) diff --git a/nbxmpp/protocol.py b/nbxmpp/protocol.py index fe88984..0bcc0e9 100644 --- a/nbxmpp/protocol.py +++ b/nbxmpp/protocol.py @@ -39,6 +39,7 @@ from dataclasses import asdict from gi.repository import GLib import idna +from nbxmpp.xmppiri import clean_iri from nbxmpp.xmppiri import escape_ifragment from nbxmpp.xmppiri import escape_inode from nbxmpp.xmppiri import escape_ires @@ -491,6 +492,31 @@ def deprecation_warning(message): warnings.warn(message, DeprecationWarning) +def split_jid_string( + jid_string: str +) -> tuple[str | None, str, str | None]: + + # https://tools.ietf.org/html/rfc7622#section-3.2 + + # Remove any portion from the first '/' character to the end of the + # string (if there is a '/' character present). + + # Remove any portion from the beginning of the string to the first + # '@' character (if there is an '@' character present). + + if jid_string.find('/') != -1: + rest, resourcepart = jid_string.split('/', 1) + else: + rest, resourcepart = jid_string, None + + if rest.find('@') != -1: + localpart, domainpart = rest.split('@', 1) + else: + localpart, domainpart = None, rest + + return localpart, domainpart, resourcepart + + @functools.lru_cache(maxsize=None) def validate_localpart(localpart: str) -> str: if not localpart or len(localpart.encode()) > 1023: @@ -635,23 +661,7 @@ class JID: @classmethod @functools.lru_cache(maxsize=None) def from_string(cls, jid_string: str, force_bare: bool = False) -> JID: - # https://tools.ietf.org/html/rfc7622#section-3.2 - - # Remove any portion from the first '/' character to the end of the - # string (if there is a '/' character present). - - # Remove any portion from the beginning of the string to the first - # '@' character (if there is an '@' character present). - - if jid_string.find('/') != -1: - rest, resourcepart = jid_string.split('/', 1) - else: - rest, resourcepart = jid_string, None - - if rest.find('@') != -1: - localpart, domainpart = rest.split('@', 1) - else: - localpart, domainpart = None, rest + localpart, domainpart, resourcepart = split_jid_string(jid_string) if force_bare: resourcepart = None @@ -693,6 +703,25 @@ class JID: domain=domainpart, resource=None) + @classmethod + @functools.lru_cache(maxsize=None) + def from_iri(cls, iri_str: str, *, force_bare: bool = False) -> JID: + iri_str = clean_iri(iri_str) + localpart, domainpart, resourcepart = split_jid_string(iri_str) + + if localpart is not None: + localpart = GLib.Uri.unescape_string(localpart) + + if force_bare: + resourcepart = None + + if resourcepart is not None: + resourcepart = GLib.Uri.unescape_string(resourcepart) + + return cls(localpart=localpart, + domain=domainpart, + resource=resourcepart) + def __str__(self) -> str: if self.localpart: jid = f'{self.localpart}@{self.domain}' diff --git a/nbxmpp/xmppiri.py b/nbxmpp/xmppiri.py index bc0035e..fd3c491 100644 --- a/nbxmpp/xmppiri.py +++ b/nbxmpp/xmppiri.py @@ -115,3 +115,20 @@ def escape_inode(node: str) -> str: def escape_ires(res: str) -> str: return _escape(res, rx_ires) + + +def clean_iri(iri_str: str) -> str: + if not iri_str.startswith('xmpp:'): + raise ValueError('IRI must start with xmpp scheme') + + iri_str = iri_str.removeprefix('xmpp:') + + if iri_str.startswith('//'): + # Remove auth component + iri_str = iri_str.removeprefix('//') + iri_str = iri_str.split('/', maxsplit=1)[1] + + # Remove query and fragment + iri_str = iri_str.split('?', maxsplit=1)[0] + iri_str = iri_str.split('#', maxsplit=1)[0] + return iri_str diff --git a/test/unit/test_jid_parsing.py b/test/unit/test_jid_parsing.py index 7199595..f118bf8 100644 --- a/test/unit/test_jid_parsing.py +++ b/test/unit/test_jid_parsing.py @@ -213,3 +213,14 @@ class JIDParsing(unittest.TestCase): jid = JID.from_user_input('call me "ishmael"@example.com') self.assertEqual(jid.to_iri(), 'xmpp:call%5C20me%5C20%5C22ishmael%5C22@example.com') + + def test_iri_to_jid(self): + tests = [ + ('nasty!#$%()*+,-.;=?[\\]^_`{|}~node@example.com', 'xmpp:nasty!%23$%25()*+,-.;=%3F%5B%5C%5D%5E_%60%7B%7C%7D~node@example.com'), + ('node@example.com/repulsive !#"$%&\'()*+,-./:;<=>?@[\\]^_`{|}~resource', 'xmpp:node@example.com/repulsive%20!%23%22$%25&\'()*+,-.%2F:;%3C=%3E%3F%40%5B%5C%5D%5E_%60%7B%7C%7D~resource'), + ('jiři@čechy.example/v Praze', 'xmpp:jiři@čechy.example/v%20Praze') + ] + + for jid_string, iri_string in tests: + parsed_jid = JID.from_iri(iri_string) + self.assertEqual(str(parsed_jid), jid_string) -- cgit v1.2.3