import unittest

from gajim import gui
gui.init('gtk')

from gajim.common import app  # Avoids circular imports from common.helpers
import gajim.common.styling as styling
from gajim.common.styling import PlainBlock
from gajim.common.styling import PreBlock
from gajim.common.styling import QuoteBlock
from gajim.common.styling import PreTextSpan
from gajim.common.styling import StrongSpan
from gajim.common.styling import EmphasisSpan
from gajim.common.styling import StrikeSpan
from gajim.common.styling import Hyperlink
from gajim.common.styling import process_uris
from gajim.common.text_helpers import jid_to_iri


STYLING = {
    'pre cannot have children':  {
        'input': '_no pre `with *children*`_',
        'tokens': [
            PlainBlock(start=0, end=26, text='_no pre `with *children*`_', spans=[
                PreTextSpan(start=8, start_byte=8, end=25, end_byte=25, text='`with *children*`'),
                EmphasisSpan(start=0, start_byte=0, end=26, end_byte=26, text='_no pre `with *children*`_')
            ])
        ]
    },

    'nested spans':  {
        'input': '_*~children~*_',
        'tokens': [
            PlainBlock(start=0, end=14, text='_*~children~*_', spans=[
                StrikeSpan(start=2, start_byte=2, end=12, end_byte=12, text='~children~'),
                StrongSpan(start=1, start_byte=1, end=13, end_byte=13, text='*~children~*'),
                EmphasisSpan(start=0, start_byte=0, end=14, end_byte=14, text='_*~children~*_'),
            ])
        ]
    },

    'spans': {
        'input': '*strong* _emph_~strike~  `pre`',
        'tokens': [
            PlainBlock(start=0, end=30, text='*strong* _emph_~strike~  `pre`', spans=[
                StrongSpan(start=0, start_byte=0, end=8, end_byte=8, text='*strong*'),
                EmphasisSpan(start=9, start_byte=9, end=15, end_byte=15, text='_emph_'),
                StrikeSpan(start=15, start_byte=15, end=23, end_byte=23, text='~strike~'),
                PreTextSpan(start=25, start_byte=25, end=30, end_byte=30, text='`pre`')
            ])
        ]
    },

    'spans lazily match': {
        'input': '*strong*plain*',
        'tokens': [
            PlainBlock(start=0, end=14, text='*strong*plain*', spans=[
                StrongSpan(start=0, start_byte=0, end=8, end_byte=8, text='*strong*')
            ])
        ]
    },

    'start span only': {
        'input': '*not strong',
        'tokens': [
            PlainBlock(start=0, end=11, text='*not strong', spans=[])
        ]
    },

    'byte pos is different': {
        'input': '*ö* *öö*',
        'tokens': [
            PlainBlock(start=0, end=8, text='*ö* *öö*', spans=[
                StrongSpan(start=0, start_byte=0, end=3, end_byte=4, text='*ö*'),
                StrongSpan(start=4, start_byte=5, end=8, end_byte=11, text='*öö*')
            ])
        ]
    },

    'byte pos is different with multiple blocks': {
        'input': '```\npre\n```\n*pláin*',
        'tokens': [
            PreBlock(start=0, end=12, text='```\npre\n```\n'),
            PlainBlock(start=12, end=19, text='*pláin*', spans=[
                StrongSpan(start=0, start_byte=0, end=7, end_byte=8, text='*pláin*')
            ])
        ]
    },

    'end span only': {
        'input': 'not strong*',
        'tokens': [
            PlainBlock(start=0, end=11, text='not strong*', spans=[])
        ]
    },

    'invalid end span': {
        'input': '*not *strong',
        'tokens': [
            PlainBlock(start=0, end=12, text='*not *strong', spans=[])
        ]
    },

    'empty span': {
        'input': '**',
        'tokens': [
            PlainBlock(start=0, end=2, text='**', spans=[])
        ]
    },

    '3 unmatched directives': {
        'input': '***',
        'tokens': [
            PlainBlock(start=0, end=3, text='***', spans=[])
        ]
    },

    '4 unmatched directives': {
        'input': '****',
        'tokens': [
            PlainBlock(start=0, end=4, text='****', spans=[])
        ]
    },

    'invalid diretives ignored': {
        'input': '* plain *strong*',
        'tokens': [
            PlainBlock(start=0, end=16, text='* plain *strong*', spans=[
                StrongSpan(start=8, start_byte=8, end=16, end_byte=16, text='*strong*')
            ])
        ]
    },

    'uneven start directives': {
        'input': '*this is *uneven*',
        'tokens': [
            PlainBlock(start=0, end=17, text='*this is *uneven*', spans=[
                StrongSpan(start=9, start_byte=9, end=17, end_byte=17, text='*uneven*')
            ])
        ]
    },

    'overlapping directives': {
        'input': '*this cannot _overlap*_',
        'tokens': [
            PlainBlock(start=0, end=23, text='*this cannot _overlap*_', spans=[
                StrongSpan(start=0, start_byte=0, end=22, end_byte=22, text='*this cannot _overlap*')
            ])
        ]
    },

    'plain blocks': {
        'input': 'one\nand two',
        'tokens': [
            PlainBlock(start=0, end=11, text='one\nand two', spans=[])
        ]
    },

    'pre block with closing': {
        'input': '```\npre *fmt* ```\n```\nplain',
        'tokens': [
            PreBlock(start=0, end=22, text='```\npre *fmt* ```\n```\n'),
            PlainBlock(start=22, end=27, text='plain', spans=[])
        ]
    },

    'pre block EOF': {
        'input': '````\na\n```',
        'tokens': [
            PreBlock(start=0, end=10, text='````\na\n```')
        ]
    },

    'pre block no terminator EOF': {
        'input': '```\na```',
        'tokens': [
            PlainBlock(start=0, end=8, text='```\na```', spans=[])
        ]
    },

    'pre block no body EOF': {
        'input': '```newtoken\n',
        'tokens': [
            PlainBlock(start=0, end=12, text='```newtoken\n', spans=[])
        ]
    },

    'single level block quote': {
        'input': '>  quoted\nnot quoted',
        'tokens': [
            QuoteBlock(start=0, end=10, text='>  quoted\n', blocks=[
                PlainBlock(start=0, end=8, text=' quoted\n', spans=[])
            ]),
            PlainBlock(start=10, end=20, text='not quoted', spans=[])
        ]
    },

    'multi level block quote': {
        'input': '>  quoted\n>>   quote > 2\n>quote 1\n\nnot quoted',
        'tokens': [
            QuoteBlock(start=0, end=34, text='>  quoted\n>>   quote > 2\n>quote 1\n', blocks=[
                PlainBlock(start=0, end=8, text=' quoted\n', spans=[]),
                QuoteBlock(start=8, end=22, text='>   quote > 2\n', blocks=[
                    PlainBlock(start=0, end=12, text='  quote > 2\n', spans=[])
                ]),
                PlainBlock(start=22, end=30, text='quote 1\n', spans=[])
            ]),
            PlainBlock(start=34, end=45, text='\nnot quoted', spans=[])
        ]
    },

    'quote start then EOF': {
        'input': '> ',
        'tokens': [
            QuoteBlock(start=0, end=2, text='> ', blocks=[])
        ]
    },

    'quote with children': {
        'input': '> ```\n> pre\n> ```\n> not pre',
        'tokens': [
            QuoteBlock(start=0, end=27, text='> ```\n> pre\n> ```\n> not pre', blocks=[
                PreBlock(start=0, end=12, text='```\npre\n```\n'),
                PlainBlock(start=12, end=19, text='not pre', spans=[])
            ])
        ]
    },

    'pre end of parent': {
        'input': '> ``` \n> pre\nplain',
        'tokens': [
            QuoteBlock(start=0, end=13, text='> ``` \n> pre\n', blocks=[
                PreBlock(start=0, end=9, text='``` \npre\n')
            ]),
            PlainBlock(start=13, end=18, text='plain', spans=[])
        ]
    },

    'span lines': {
        'input': '*not \n strong*',
        'tokens': [
            PlainBlock(start=0, end=14, text='*not \n strong*', spans=[])
        ]
    },

    'plain with uri': {
        'input': 'some kind of link http://foo.com/blah_blah',
        'tokens': [
            PlainBlock(start=0, end=42, text='some kind of link http://foo.com/blah_blah', spans=[], uris=[
                Hyperlink(start=18, start_byte=18, end=42, end_byte=42, text='http://foo.com/blah_blah', uri='http://foo.com/blah_blah')
            ])
        ]
    },

    'plain with uri don’t consider comma': {
        'input': 'some kind of link http://foo.com/blah_blah,',
        'tokens': [
            PlainBlock(start=0, end=43, text='some kind of link http://foo.com/blah_blah,', spans=[], uris=[
                Hyperlink(start=18, start_byte=18, end=42, end_byte=42, text='http://foo.com/blah_blah', uri='http://foo.com/blah_blah')
            ])
        ]
    },

    'plain with uri and styling': {
        'input': 'some *kind* of link http://foo.com/blah_blah',
        'tokens': [
            PlainBlock(start=0, end=44, text='some *kind* of link http://foo.com/blah_blah', spans=[
                StrongSpan(start=5, start_byte=5, end=11, end_byte=11, text='*kind*')
            ], uris=[
                Hyperlink(start=20, start_byte=20, end=44, end_byte=44, text='http://foo.com/blah_blah', uri='http://foo.com/blah_blah')
            ])
        ]
    },

    'plain with multiple uris': {
        'input': 'some http://foo.com/blah_blah and http://foo.com/blah_blah/123',
        'tokens': [
            PlainBlock(start=0, end=62, text='some http://foo.com/blah_blah and http://foo.com/blah_blah/123', spans=[], uris=[
                Hyperlink(start=5, start_byte=5, end=29, end_byte=29, text='http://foo.com/blah_blah', uri='http://foo.com/blah_blah'),
                Hyperlink(start=34, start_byte=34, end=62, end_byte=62, text='http://foo.com/blah_blah/123', uri='http://foo.com/blah_blah/123')
            ])
        ]
    },

}


# Most of the URI/JID test sets belong in test_regex.py, and should be imported
# here somehow (TODO).
URIS = [
    'a:b',
    'a-:b',
    'a.:b',
    'xmpp:conference.gajim.org',
    'xmpp:asd@at',
    'xmpp:asd@asd.at',
    'xmpp:asd-asd@asd.asdasd.at.',
    'xmpp:me@%5B::1%5D',
    'xmpp:myself@127.13.42.69',
    'xmpp:myself@127.13.42.69/localhost',
    'xmpp:%23room%25irc.example@biboumi.xmpp.example',
    'xmpp:+15551234567@cheogram.com',
    'xmpp:romeo@montague.net?message;subject=Test%20Message;body=Here%27s%20a%20test%20message',
    'geo:1,2',
    'geo:1,2,3',
    'file:/foo/bar/baz',  # xffm
    'file:///foo/bar/baz',  # nautilus, rox
    'file:///x:/foo/bar/baz',  # windows
    'file://localhost/foo/bar/baz',
    'file://nonlocalhost/foo/bar/baz',

    # These seem to be from https://mathiasbynens.be/demo/url-regex
    'http://foo.com/blah_blah',
    'http://foo.com/blah_blah/',
    'http://foo.com/blah_blah_(wikipedia)',
    'http://foo.com/blah_blah_(wikipedia)_(again)',
    'http://www.example.com/wpstyle/?p=364',
    'https://www.example.com/foo/?bar=baz&inga=42&quux',
    'http://✪df.ws/123',
    'http://userid:password@example.com:8080',
    'http://userid:password@example.com:8080/',
    'http://userid@example.com',
    'http://userid@example.com/',
    'http://userid@example.com:8080',
    'http://userid@example.com:8080/',
    'http://userid:password@example.com',
    'http://userid:password@example.com/',
    'http://142.42.1.1/',
    'http://142.42.1.1:8080/',
    'http://➡.ws/䨹',
    'http://⌘.ws',
    'http://⌘.ws/',
    'http://foo.com/blah_(wikipedia)#cite-1',
    'http://foo.com/blah_(wikipedia)_blah#cite-1',
    'http://foo.com/unicode_(✪)_in_parens',
    'http://foo.com/(something)?after=parens',
    'http://☺.damowmow.com/',
    'http://code.google.com/events/#&product=browser',
    'http://j.mp',
    'ftp://foo.bar/baz',
    'http://foo.bar/?q=Test%20URL-encoded%20stuff',
    'http://مثال.إختبار',
    'http://例子.测试',
    'http://उदाहरण.परीक्षा',
    'http://-.~_!$&\'()*+,;=:%40:80%2f::::::@example.com',
    'http://1337.net',
    'http://a.b-c.de',
    'http://223.255.255.254',

    'https://foo_bar.example.com/',

    # These are from https://rfc-editor.org/rfc/rfc3513#section-2.2
    'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]',
    'http://[1080:0:0:0:8:800:200C:417A]',
    'http://[1080:0:0:0:8:800:200C:417A]',
    'http://[FF01:0:0:0:0:0:0:101]',
    'http://[0:0:0:0:0:0:0:1]',
    'http://[0:0:0:0:0:0:0:0]',
    'http://[1080::8:800:200C:417A]',
    'http://[FF01::101]',
    'http://[::1]',
    'http://[::]',
    'http://[0:0:0:0:0:0:13.1.68.3]',
    'http://[0:0:0:0:0:FFFF:129.144.52.38]',
    'http://[::13.1.68.3]',
    'http://[::FFFF:129.144.52.38]',

    # These are from https://rfc-editor.org/rfc/rfc3986#section-1.1.2
    'ftp://ftp.is.co.za/rfc/rfc1808.txt',
    'http://www.ietf.org/rfc/rfc2396.txt',
    'ldap://[2001:db8::7]/c=GB?objectClass?one',
    'mailto:John.Doe@example.com',
    'news:comp.infosystems.www.servers.unix',
    'tel:+1-816-555-1212',
    'telnet://192.0.2.16:80/',
    'urn:oasis:names:specification:docbook:dtd:xml:4.1.2',
]


# * non-URI foos
# * non-absolute URIs
NONURIS = [
    '',
    ' ',
    '.',
    ':',

    # These are from https://mathiasbynens.be/demo/url-regex
    '//',
    '//a',
    '///a',
    '///',
    'foo.com',
    'http://foo.bar?q=Spaces should be encoded',
    'http:// shouldfail.com',
    ':// should fail',
    'http://foo.bar/foo(bar)baz quux',
]


# * valid scheme-only URIs
# * valid generic URIs that fail requirements of their specific scheme.
UNACCEPTABLE_URIS = [
    'scheme:',

    # These are from https://mathiasbynens.be/demo/url-regex
    'http://',
    'http://?',
    'http://??',
    'http://??/',
    'http://#',
    'http://##',
    'http://##/',
    'http:///a',

    'geo:1,',
    'geo:,2',
    #'geo:1,2,',  FIXME: wrongly parsed as valid
    'geo:1,,3',
    'geo:,2,3',
    'geo:1,,',
    'geo:,2,',
    'geo:,,3',
    'geo:,,',

    'file:',
    'file:a',
    'file:a/',
    'file:a/b',
]


JIDS = [
    'asd@at',
    'asd@asd.at',
    'asd@asd.asd.at',
    'asd@asd.asd-asd.at',
    'asd.asd@asd.asd-asd.at',
    'asd-asd@asd.asdasd.at',
    'asd-asd@asd.asdasd.at.',
    'me@[::1]',
    'myself@127.13.42.69',
    '#room%irc.example@biboumi.xmpp.example',
    '+15551234567@cheogram.com',

    # These are from https://rfc-editor.org/rfc/rfc7622#section-3.5
    'fußball@example.com',
    'π@example.com',

    # These are from https://xmpp.org/extensions/xep-0106.html#examples
    r'space\20cadet@example.com',
    r'call\20me\20\22ishmael\22@example.com',
    r'at\26t\20guy@example.com',
    r'd\27artagnan@example.com',
    r'\2f.fanboy@example.com',
    r'\3a\3afoo\3a\3a@example.com',
    r'\3cfoo\3e@example.com',
    r'user\40host@example.com',
    r'c\3a\net@example.com',
    r'c\3a\\net@example.com',
    r'c\3a\cool\20stuff@example.com',
    r'c\3a\5c5commas@example.com',
    r'here\27s_a_wild_\26_\2fcr%zy\2f_address@example.com',
    r'here\27s_a_wild_\26_\2fcr%zy\2f_address_for\3a\3cwv\3e(\22IMPS\22)@example.com',
    # Some more from the same document
    r'tréville\40musketeers.lit@smtp.gascon.fr',
    r'\5c3and\2is\5c5cool@example.com',
    r'CN=D\27Artagnan\20Saint-Andr\E9,O=Example\20\26\20Company,\20Inc.,DC=example,DC=com@st.example.com',
    r'somenick!user\22\26\27\2f\3a\3c\3e\5c3address@example.com',

    # https://en.wikipedia.org/wiki/E-mail_address#Internationalization_examples
    # Do note that these are *e-mail* addresses and might not all be vaild JIDs.
    'Pelé@example.com',
    'δοκιμή@παράδειγμα.δοκιμή',
    '我買@屋企.香港',
    '二ノ宮@黒川.日本',
    'медведь@с-балалайкой.рф',
    #'संपर्क@डाटामेल.भारत',  fails because of the 2 combining chars in localpart
]

NONJIDS = [
    '',
    '@',

    # These are from https://rfc-editor.org/rfc/rfc7622#section-3.5
    '"juliet"@example.com',
    'foo bar@example.com',  # search is expected to find 'bar@example.com'
    '@example.com',
    'henryⅣ@example.com',  # localpart has a compatibility-decomposable cp
    '♚@example.com',        # localpart has a symbol cp
    'juliet@',
]


URIS_WITH_TEXT = [
    ('write to my email mailto:foo@bar.com.uk (but not to mailto:bar@foo.com)',
     ['mailto:foo@bar.com.uk', 'mailto:bar@foo.com']),
    ('write to my email mailtomailto:foo@bar.com.uk (but not to mailto:bar@foo.com)',
     ['mailtomailto:foo@bar.com.uk', 'mailto:bar@foo.com']),
    ('see this http://userid@example.com/ link', ['http://userid@example.com/']),
    ('see this http://userid@example.com/, and ..', ['http://userid@example.com/']),
    ('<http://userid@example.com/>', ['http://userid@example.com/']),
    ('"http://userid@example.com/"', ['http://userid@example.com/']),
    ('regexes are useless (see https://en.wikipedia.org/wiki/Recursion_(computer_science)), but comfy', ['https://en.wikipedia.org/wiki/Recursion_(computer_science)']),
]


class Test(unittest.TestCase):
    @staticmethod
    def wrap(link: str) -> str:
        return f'Prologue (link: {link}), and epilogue!'

    def test_styling(self):
        for _name, params in STYLING.items():
            result = styling.process(params['input'])
            self.assertEqual(result.blocks, params['tokens'])

    def test_uris(self):
        for uri in URIS:
            text = self.wrap(uri)
            hlinks = process_uris(text)
            self.assertEqual(len(hlinks), 1, text)
            self.assertEqual(hlinks[0].uri, uri, text)

    def test_invalid_uris(self):
        for foo in NONURIS + UNACCEPTABLE_URIS:
            text = self.wrap(foo)
            hlinks = process_uris(text)
            if len(hlinks) == 0:
                continue
            self.assertEqual(len(hlinks), 1, text)
            self.assertNotEqual(hlinks[0].text, foo, text)

    def test_jids(self):
        for jid in JIDS:
            text = self.wrap(jid)
            hlinks = process_uris(text)
            self.assertEqual(len(hlinks), 1, text)
            self.assertEqual(hlinks[0].text, jid, text)
            self.assertEqual(hlinks[0].uri, jid_to_iri(jid), text)

    def test_nonjids(self):
        for foo in NONJIDS:
            text = self.wrap(foo)
            hlinks = process_uris(text)
            if len(hlinks) == 0:
                continue
            self.assertEqual(len(hlinks), 1, text)
            self.assertNotEqual(hlinks[0].text, foo, text)

    def test_uris_with_text(self):
        for text, result in URIS_WITH_TEXT:
            hlinks = process_uris(text)
            self.assertEqual(len(hlinks), len(result), text)
            for i, res in enumerate(result):
                self.assertEqual(hlinks[i].text, res, text)


if __name__ == "__main__":
    unittest.main()