Welcome to mirror list, hosted at ThFree Co, Russian Federation.

dev.gajim.org/gajim/gajim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndré Apitzsch <git@apitzsch.eu>2021-02-13 14:44:06 +0300
committerlovetox <philipp@hoerist.com>2021-02-27 14:53:26 +0300
commit93fabbc18078732443a6d60e5f4b86ce80e092f1 (patch)
treede1876c4b2e32a20c04cdce6d96639db8e805495
parent84e9b08121d7f39c17d8fa9f3c34fab1e0092c93 (diff)
Move regular expressions to separate file
-rw-r--r--gajim/common/helpers.py6
-rw-r--r--gajim/common/regex.py88
-rw-r--r--gajim/conversation_textview.py21
-rw-r--r--gajim/gtk/message_input.py3
-rw-r--r--gajim/gui_interface.py119
5 files changed, 111 insertions, 126 deletions
diff --git a/gajim/common/helpers.py b/gajim/common/helpers.py
index 679e41cab..9f96b119f 100644
--- a/gajim/common/helpers.py
+++ b/gajim/common/helpers.py
@@ -81,6 +81,8 @@ from gajim.common.const import URIType
from gajim.common.const import URIAction
from gajim.common.const import GIO_TLS_ERRORS
from gajim.common.const import SHOW_LIST
+from gajim.common.regex import INVALID_XML_CHARS_REGEX
+from gajim.common.regex import STH_AT_STH_DOT_STH_REGEX
from gajim.common.structs import URI
@@ -633,7 +635,7 @@ def get_auth_sha(sid, initiator, target):
def remove_invalid_xml_chars(string_):
if string_:
- string_ = re.sub(app.interface.invalid_XML_chars_re, '', string_)
+ string_ = re.sub(INVALID_XML_CHARS_REGEX, '', string_)
return string_
def get_random_string(count=16):
@@ -1068,7 +1070,7 @@ def parse_uri(uri):
uri = uri[4:]
return URI(type=URIType.TEL, data=uri)
- if app.interface.sth_at_sth_dot_sth_re.match(uri):
+ if STH_AT_STH_DOT_STH_REGEX.match(uri):
return URI(type=URIType.AT, data=uri)
if uri.startswith('geo:'):
diff --git a/gajim/common/regex.py b/gajim/common/regex.py
new file mode 100644
index 000000000..402596522
--- /dev/null
+++ b/gajim/common/regex.py
@@ -0,0 +1,88 @@
+import re
+
+def _get_link_pattern():
+ # regexp meta characters are: . ^ $ * + ? { } [ ] \ | ( )
+ # one escapes the metachars with \
+ # \S matches anything but ' ' '\t' '\n' '\r' '\f' and '\v'
+ # \s matches any whitespace character
+ # \w any alphanumeric character
+ # \W any non-alphanumeric character
+ # \b means word boundary. This is a zero-width assertion that
+ # matches only at the beginning or end of a word.
+ # ^ matches at the beginning of lines
+ #
+ # * means 0 or more times
+ # + means 1 or more times
+ # ? means 0 or 1 time
+ # | means or
+ # [^*] anything but '*' (inside [] you don't have to escape metachars)
+ # [^\s*] anything but whitespaces and '*'
+ # (?<!\S) is a one char lookbehind assertion and asks for any leading
+ # whitespace
+ # and matches beginning of lines so we have correct formatting detection
+ # even if the text is just '*foo*'
+ # (?!\S) is the same thing but it's a lookahead assertion
+ # \S*[^\s\W] --> in the matching string don't match ? or ) etc.. if at
+ # the end
+ # so http://be) will match http://be and http://be)be) will match
+ # http://be)be
+
+ legacy_prefixes = r"((?<=\()(www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$"\
+ r"&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+(?=\)))"\
+ r"|((www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]"\
+ r"|%[A-Fa-f0-9]{2})+"\
+ r"\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+)"
+ # NOTE: it's ok to catch www.gr such stuff exist!
+
+ # FIXME: recognize xmpp: and treat it specially
+ links = r"((?<=\()[A-Za-z][A-Za-z0-9\+\.\-]*:"\
+ r"([\w\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+"\
+ r"(?=\)))|(\w[\w\+\.\-]*:([^<>\s]|%[A-Fa-f0-9]{2})+)"
+
+ # 2nd one: at_least_one_char@at_least_one_char.at_least_one_char
+ mail = r'\bmailto:\S*[^\s\W]|' r'\b\S+@\S+\.\S*[^\s\W]'
+
+ link_pattern = links + '|' + mail + '|' + legacy_prefixes
+ return link_pattern
+
+def _get_basic_pattern():
+ basic_pattern = _get_link_pattern()
+ # detects eg. *b* *bold* *bold bold* test *bold* *bold*! (*bold*)
+ # doesn't detect (it's a feature :P) * bold* *bold * * bold * test*bold*
+ formatting = r'|(?<!\w)' r'\*[^\s*]' r'([^*]*[^\s*])?' r'\*(?!\w)|'\
+ r'(?<!\S)' r'~[^\s~]' r'([^~]*[^\s~])?' r'~(?!\S)|'\
+ r'(?<!\w)' r'_[^\s_]' r'([^_]*[^\s_])?' r'_(?!\w)'
+ return basic_pattern + formatting
+
+def _get_emot_and_basic_pattern(use_ascii_formatting=True):
+ from gajim.gui.emoji_data import emoji_data
+ # because emoticons match later (in the string) they need to be after
+ # basic matches that may occur earlier
+ emoticons = emoji_data.get_regex()
+
+ if use_ascii_formatting:
+ pattern = _get_basic_pattern()
+ else:
+ pattern = _get_link_pattern()
+
+ return '%s|%s' % (pattern, emoticons)
+
+LINK_REGEX = re.compile(_get_link_pattern(), re.I | re.U)
+
+# link pattern + ASCII formatting
+BASIC_REGEX = re.compile(_get_basic_pattern(), re.IGNORECASE)
+
+# emoticons + link pattern
+EMOT_AND_LINK_REGEX = re.compile(_get_emot_and_basic_pattern(False),
+ re.IGNORECASE)
+
+# emoticons + link pattern + ASCII formatting
+EMOT_AND_BASIC_REGEX = re.compile(_get_emot_and_basic_pattern(True),
+ re.IGNORECASE)
+
+INVALID_XML_CHARS_REGEX = re.compile(
+ '[\x00-\x08]|[\x0b-\x0c]|[\x0e-\x1f]|[\ud800-\udfff]|[\ufffe-\uffff]')
+
+# at least one character in 3 parts (before @, after @, after .)
+STH_AT_STH_DOT_STH_REGEX = re.compile(
+ r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$')
diff --git a/gajim/conversation_textview.py b/gajim/conversation_textview.py
index a14c72c08..55a950c08 100644
--- a/gajim/conversation_textview.py
+++ b/gajim/conversation_textview.py
@@ -44,6 +44,11 @@ from gajim.common.const import StyleAttr
from gajim.common.const import Trust
from gajim.common.const import URI_SCHEMES
from gajim.common.helpers import to_user_string
+from gajim.common.regex import STH_AT_STH_DOT_STH_REGEX
+from gajim.common.regex import BASIC_REGEX
+from gajim.common.regex import LINK_REGEX
+from gajim.common.regex import EMOT_AND_BASIC_REGEX
+from gajim.common.regex import EMOT_AND_LINK_REGEX
from gajim.gui import util
from gajim.gui.util import get_cursor
@@ -584,9 +589,17 @@ class ConversationTextview(GObject.GObject):
# basic: links + mail + formatting is always checked (we like that)
if app.settings.get('emoticons_theme') and graphics:
# search for emoticons & urls
- iterator = app.interface.emot_and_basic_re.finditer(otext)
- else: # search for just urls + mail + formatting
- iterator = app.interface.basic_pattern_re.finditer(otext)
+ if app.settings.get('ascii_formatting'):
+ regex = EMOT_AND_BASIC_REGEX
+ else:
+ regex = EMOT_AND_LINK_REGEX
+ else:
+ if app.settings.get('ascii_formatting'):
+ # search for just urls + mail + formatting
+ regex = BASIC_REGEX
+ else: # search for just urls + mail
+ regex = LINK_REGEX
+ iterator = regex.finditer(otext)
if iter_:
end_iter = iter_
else:
@@ -693,7 +706,7 @@ class ConversationTextview(GObject.GObject):
tags.append('mail')
elif special_text.startswith('xmpp:') and not is_xhtml_link:
tags.append('xmpp')
- elif app.interface.sth_at_sth_dot_sth_re.match(special_text) and\
+ elif STH_AT_STH_DOT_STH_REGEX.match(special_text) and \
not is_xhtml_link:
# it's a JID or mail
tags.append('sth_at_sth')
diff --git a/gajim/gtk/message_input.py b/gajim/gtk/message_input.py
index bf7a99305..1141eee3f 100644
--- a/gajim/gtk/message_input.py
+++ b/gajim/gtk/message_input.py
@@ -28,6 +28,7 @@ from nbxmpp.modules.misc import build_xhtml_body
from gajim.common import app
from gajim.common.i18n import _
from gajim.common.const import StyleAttr
+from gajim.common.regex import LINK_REGEX
from .util import scroll_to_end
@@ -207,7 +208,7 @@ class MessageInputTextView(Gtk.TextView):
index = 0
new_text = ''
- iterator = app.interface.link_pattern_re.finditer(text)
+ iterator = LINK_REGEX.finditer(text)
for match in iterator:
start, end = match.span()
url = text[start:end]
diff --git a/gajim/gui_interface.py b/gajim/gui_interface.py
index da0bbaefa..1fefdf613 100644
--- a/gajim/gui_interface.py
+++ b/gajim/gui_interface.py
@@ -33,7 +33,6 @@
import os
import sys
-import re
import time
import json
import logging
@@ -101,7 +100,6 @@ from gajim.gui.dialogs import ConfirmationCheckDialog
from gajim.gui.dialogs import InputDialog
from gajim.gui.dialogs import PassphraseDialog
from gajim.gui.filechoosers import FileChooserDialog
-from gajim.gui.emoji_data import emoji_data
from gajim.gui.filetransfer import FileTransfersWindow
from gajim.gui.filetransfer_progress import FileTransferProgress
from gajim.gui.roster_item_exchange import RosterItemExchangeWindow
@@ -1297,112 +1295,6 @@ class Interface:
ctrl.scroll_to_end()
################################################################################
-### Methods dealing with emoticons
-################################################################################
-
- @property
- def basic_pattern_re(self):
- if not self._basic_pattern_re:
- self._basic_pattern_re = re.compile(self.basic_pattern,
- re.IGNORECASE)
- return self._basic_pattern_re
-
- @property
- def emot_and_basic_re(self):
- if not self._emot_and_basic_re:
- self._emot_and_basic_re = re.compile(
- self.emot_and_basic, re.IGNORECASE)
- return self._emot_and_basic_re
-
- @property
- def sth_at_sth_dot_sth_re(self):
- if not self._sth_at_sth_dot_sth_re:
- self._sth_at_sth_dot_sth_re = re.compile(self.sth_at_sth_dot_sth)
- return self._sth_at_sth_dot_sth_re
-
- @property
- def invalid_XML_chars_re(self):
- if not self._invalid_XML_chars_re:
- self._invalid_XML_chars_re = re.compile(self.invalid_XML_chars)
- return self._invalid_XML_chars_re
-
- def make_regexps(self):
- # regexp meta characters are: . ^ $ * + ? { } [ ] \ | ( )
- # one escapes the metachars with \
- # \S matches anything but ' ' '\t' '\n' '\r' '\f' and '\v'
- # \s matches any whitespace character
- # \w any alphanumeric character
- # \W any non-alphanumeric character
- # \b means word boundary. This is a zero-width assertion that
- # matches only at the beginning or end of a word.
- # ^ matches at the beginning of lines
- #
- # * means 0 or more times
- # + means 1 or more times
- # ? means 0 or 1 time
- # | means or
- # [^*] anything but '*' (inside [] you don't have to escape metachars)
- # [^\s*] anything but whitespaces and '*'
- # (?<!\S) is a one char lookbehind assertion and asks for any leading
- # whitespace
- # and matches beginning of lines so we have correct formatting detection
- # even if the text is just '*foo*'
- # (?!\S) is the same thing but it's a lookahead assertion
- # \S*[^\s\W] --> in the matching string don't match ? or ) etc.. if at
- # the end
- # so http://be) will match http://be and http://be)be) will match
- # http://be)be
-
- self._basic_pattern_re = None
- self._emot_and_basic_re = None
- self._sth_at_sth_dot_sth_re = None
- self._invalid_XML_chars_re = None
-
- legacy_prefixes = r"((?<=\()(www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$"\
- r"&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+(?=\)))"\
- r"|((www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]"\
- r"|%[A-Fa-f0-9]{2})+"\
- r"\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+)"
- # NOTE: it's ok to catch www.gr such stuff exist!
-
- # FIXME: recognize xmpp: and treat it specially
- links = r"((?<=\()[A-Za-z][A-Za-z0-9\+\.\-]*:"\
- r"([\w\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+"\
- r"(?=\)))|(\w[\w\+\.\-]*:([^<>\s]|%[A-Fa-f0-9]{2})+)"
-
- # 2nd one: at_least_one_char@at_least_one_char.at_least_one_char
- mail = r'\bmailto:\S*[^\s\W]|' r'\b\S+@\S+\.\S*[^\s\W]'
-
- # detects eg. *b* *bold* *bold bold* test *bold* *bold*! (*bold*)
- # doesn't detect (it's a feature :P) * bold* *bold * * bold * test*bold*
- formatting = r'|(?<!\w)' r'\*[^\s*]' r'([^*]*[^\s*])?' r'\*(?!\w)|'\
- r'(?<!\S)' r'~[^\s~]' r'([^~]*[^\s~])?' r'~(?!\S)|'\
- r'(?<!\w)' r'_[^\s_]' r'([^_]*[^\s_])?' r'_(?!\w)'
-
- basic_pattern = links + '|' + mail + '|' + legacy_prefixes
-
- link_pattern = basic_pattern
- self.link_pattern_re = re.compile(link_pattern, re.I | re.U)
-
- if app.settings.get('ascii_formatting'):
- basic_pattern += formatting
- self.basic_pattern = basic_pattern
-
- # because emoticons match later (in the string) they need to be after
- # basic matches that may occur earlier
- emoticons = emoji_data.get_regex()
-
- self.emot_and_basic = '%s|%s' % (basic_pattern, emoticons)
-
- # at least one character in 3 parts (before @, after @, after .)
- self.sth_at_sth_dot_sth = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
-
- # Invalid XML chars
- self.invalid_XML_chars = '[\x00-\x08]|[\x0b-\x0c]|[\x0e-\x1f]|'\
- '[\ud800-\udfff]|[\ufffe-\uffff]'
-
-
-################################################################################
### Methods for opening new messages controls
################################################################################
@@ -2114,15 +2006,6 @@ class Interface:
self.handlers = {}
self.roster = None
- self._invalid_XML_chars_re = None
- self._basic_pattern_re = None
- self._emot_and_basic_re = None
- self._sth_at_sth_dot_sth_re = None
- self.link_pattern_re = None
- self.invalid_XML_chars = None
- self.basic_pattern = None
- self.emot_and_basic = None
- self.sth_at_sth_dot_sth = None
self.avatar_storage = AvatarStorage()
@@ -2199,8 +2082,6 @@ class Interface:
from gajim.gui.emoji_chooser import emoji_chooser
emoji_chooser.load()
- self.make_regexps()
-
self.last_ftwindow_update = 0
self._network_monitor = Gio.NetworkMonitor.get_default()