Welcome to mirror list, hosted at ThFree Co, Russian Federation.

dev.gajim.org/gajim/gajim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwurstsalat <mailtrash@posteo.de>2022-08-26 09:52:14 +0300
committerwurstsalat <mailtrash@posteo.de>2022-08-26 23:55:45 +0300
commit2303790445a5d73c0a6867e4af936f5451a50a37 (patch)
treeff5d15e7f5c2065f7abcc6edbffc8250d223622d
parentf9a3eaad543d8236c8c1a473b16d6afd0c9e4985 (diff)
fix: Preview: Sanitize filename from disallowed charssanitize-filenames
Fixes #11105, #10752
-rw-r--r--gajim/common/helpers.py58
-rw-r--r--gajim/common/preview_helpers.py8
-rw-r--r--test/no_gui/test_sanitize_filename.py61
3 files changed, 98 insertions, 29 deletions
diff --git a/gajim/common/helpers.py b/gajim/common/helpers.py
index b3243575e..2f103d203 100644
--- a/gajim/common/helpers.py
+++ b/gajim/common/helpers.py
@@ -37,7 +37,6 @@ import sys
import re
import os
import subprocess
-import base64
import hashlib
import shlex
import socket
@@ -58,10 +57,10 @@ from datetime import datetime
from datetime import timedelta
from urllib.parse import unquote
from urllib.parse import urlparse
-from encodings.punycode import punycode_encode
from functools import wraps
from pathlib import Path
from packaging.version import Version as V
+import unicodedata
from nbxmpp.namespaces import Namespace
from nbxmpp.const import Role
@@ -358,28 +357,41 @@ def get_file_path_from_dnd_dropped_uri(uri: str) -> str:
def sanitize_filename(filename: str) -> str:
'''
- Make sure the filename we will write does contain only acceptable and latin
- characters, and is not too long (in that case hash it)
+ Sanitize filename of elements not allowed on Windows
+ https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
+ Limit filename length to 50 chars on all systems
'''
- # 48 is the limit
- if len(filename) > 48:
- hash_ = hashlib.md5(filename.encode('utf-8'))
- filename = base64.b64encode(hash_.digest()).decode('utf-8')
-
- # make it latin chars only
- filename = punycode_encode(filename).decode('utf-8')
- filename = filename.replace('/', '_')
- if os.name == 'nt':
- filename = filename.replace('?', '_')\
- .replace(':', '_')\
- .replace('\\', '_')\
- .replace('"', "'")\
- .replace('|', '_')\
- .replace('*', '_')\
- .replace('<', '_')\
- .replace('>', '_')
-
- return filename
+ if sys.platform == 'win32':
+ blacklist = ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '\0']
+ reserved_filenames = [
+ 'CON', 'PRN', 'AUX', 'NUL', 'COM1', 'COM2', 'COM3', 'COM4', 'COM5',
+ 'COM6', 'COM7', 'COM8', 'COM9', 'LPT1', 'LPT2', 'LPT3', 'LPT4',
+ 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9',
+ ]
+ filename = ''.join(char for char in filename if char not in blacklist)
+
+ filename = ''.join(char for char in filename if 31 < ord(char))
+
+ filename = unicodedata.normalize('NFKD', filename)
+ filename = filename.rstrip('. ')
+ filename = filename.strip()
+
+ if all(char == '.' for char in filename):
+ filename = f'__{filename}'
+ if filename in reserved_filenames:
+ filename = f'__{filename}'
+ if len(filename) == 0:
+ filename = '__'
+
+ extension = Path(filename).suffix[:10]
+ filename = Path(filename).stem
+ final_length = 50 - len(extension)
+
+ if len(filename) > final_length:
+ # Many Filesystems have a limit on filename length: keep it short
+ filename = filename[:final_length]
+
+ return f'{filename}{extension}'
def get_contact_dict_for_account(account: str) -> dict[str, types.BareContact]:
diff --git a/gajim/common/preview_helpers.py b/gajim/common/preview_helpers.py
index 2de47e589..30f166200 100644
--- a/gajim/common/preview_helpers.py
+++ b/gajim/common/preview_helpers.py
@@ -45,6 +45,7 @@ from cryptography.hazmat.primitives.ciphers import Cipher
from cryptography.hazmat.primitives.ciphers import algorithms
from cryptography.hazmat.primitives.ciphers.modes import GCM
+from gajim.common.helpers import sanitize_filename
from gajim.common.i18n import _
log = logging.getLogger('gajim.c.preview_helpers')
@@ -308,12 +309,7 @@ def get_image_paths(uri: str,
web_stem = path.stem
extension = path.suffix
- if len(web_stem) > 90:
- # Many Filesystems have a limit on filename length
- # Most have 255, some encrypted ones only 143
- # We add around 50 chars for the hash,
- # so the filename should not exceed 90
- web_stem = web_stem[:90]
+ web_stem = sanitize_filename(web_stem)
name_hash = hashlib.sha1(str(uri).encode()).hexdigest()
diff --git a/test/no_gui/test_sanitize_filename.py b/test/no_gui/test_sanitize_filename.py
new file mode 100644
index 000000000..46e156037
--- /dev/null
+++ b/test/no_gui/test_sanitize_filename.py
@@ -0,0 +1,61 @@
+
+import sys
+import unittest
+from unittest.mock import patch
+from gajim.common.helpers import sanitize_filename
+
+
+class SanitizeTest(unittest.TestCase):
+ '''Tests for the sanitize_filename function.'''
+
+ @patch.object(sys, 'platform', 'win32')
+ def test_invalid_chars(self):
+ '''Make sure invalid characters are removed in filenames'''
+ self.assertEqual(sanitize_filename('A/B/C'), 'ABC')
+ self.assertEqual(sanitize_filename('A*C.d'), 'AC.d')
+ self.assertEqual(sanitize_filename('A?C.d'), 'AC.d')
+
+ @patch.object(sys, 'platform', 'win32')
+ def test_invalid_suffix(self):
+ '''Dots are not allowed at the end'''
+ self.assertEqual(sanitize_filename('def.'), 'def')
+ self.assertEqual(sanitize_filename('def.ghi'), 'def.ghi')
+ self.assertTrue(sanitize_filename('X' * 1000 + '.').endswith('X'))
+
+ @patch.object(sys, 'platform', 'win32')
+ def test_reserved_words(self):
+ '''Make sure reserved Windows words are prefixed'''
+ self.assertEqual(sanitize_filename('NUL'), '__NUL')
+ self.assertEqual(sanitize_filename('..'), '__')
+
+ @patch.object(sys, 'platform', 'win32')
+ def test_long_names(self):
+ '''Make sure long names are truncated'''
+ self.assertEqual(len(sanitize_filename('X' * 300)), 50)
+ self.assertEqual(len(sanitize_filename(
+ '.'.join(['X' * 100, 'X' * 100, 'X' * 100]))), 50)
+ self.assertEqual(len(sanitize_filename(
+ '.'.join(['X' * 300, 'X' * 300, 'X' * 300]))), 50)
+ self.assertEqual(len(sanitize_filename('.' * 300 + '.txt')), 50)
+
+ @patch.object(sys, 'platform', 'win32')
+ def test_unicode_normalization(self):
+ '''Names should be NFKD normalized'''
+ self.assertEqual(sanitize_filename('ў'), chr(1091) + chr(774))
+
+ @patch.object(sys, 'platform', 'win32')
+ def test_extensions(self):
+ '''Filename extensions should be preserved when possible.'''
+ really_long_name = 'X' * 1000 + '.pdf'
+ self.assertTrue(sanitize_filename(really_long_name).endswith('.pdf'))
+ self.assertTrue(sanitize_filename('X' * 1000).endswith('X'))
+ self.assertTrue(sanitize_filename(
+ 'X' * 100 + '.' + 'X' * 100 + '.pdf').endswith('.pdf'))
+ self.assertTrue(sanitize_filename(
+ 'X' * 100 + '.' + 'X' * 400).endswith('X'))
+ self.assertTrue(sanitize_filename(
+ 'X' * 100 + '.' + 'X' * 400 + '.pdf').endswith('.pdf'))
+
+
+if __name__ == '__main__':
+ unittest.main()