diff options
author | wurstsalat <mailtrash@posteo.de> | 2022-08-26 09:52:14 +0300 |
---|---|---|
committer | wurstsalat <mailtrash@posteo.de> | 2022-08-26 23:55:45 +0300 |
commit | 2303790445a5d73c0a6867e4af936f5451a50a37 (patch) | |
tree | ff5d15e7f5c2065f7abcc6edbffc8250d223622d | |
parent | f9a3eaad543d8236c8c1a473b16d6afd0c9e4985 (diff) |
fix: Preview: Sanitize filename from disallowed charssanitize-filenames
Fixes #11105, #10752
-rw-r--r-- | gajim/common/helpers.py | 58 | ||||
-rw-r--r-- | gajim/common/preview_helpers.py | 8 | ||||
-rw-r--r-- | test/no_gui/test_sanitize_filename.py | 61 |
3 files changed, 98 insertions, 29 deletions
diff --git a/gajim/common/helpers.py b/gajim/common/helpers.py index b3243575e..2f103d203 100644 --- a/gajim/common/helpers.py +++ b/gajim/common/helpers.py @@ -37,7 +37,6 @@ import sys import re import os import subprocess -import base64 import hashlib import shlex import socket @@ -58,10 +57,10 @@ from datetime import datetime from datetime import timedelta from urllib.parse import unquote from urllib.parse import urlparse -from encodings.punycode import punycode_encode from functools import wraps from pathlib import Path from packaging.version import Version as V +import unicodedata from nbxmpp.namespaces import Namespace from nbxmpp.const import Role @@ -358,28 +357,41 @@ def get_file_path_from_dnd_dropped_uri(uri: str) -> str: def sanitize_filename(filename: str) -> str: ''' - Make sure the filename we will write does contain only acceptable and latin - characters, and is not too long (in that case hash it) + Sanitize filename of elements not allowed on Windows + https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file + Limit filename length to 50 chars on all systems ''' - # 48 is the limit - if len(filename) > 48: - hash_ = hashlib.md5(filename.encode('utf-8')) - filename = base64.b64encode(hash_.digest()).decode('utf-8') - - # make it latin chars only - filename = punycode_encode(filename).decode('utf-8') - filename = filename.replace('/', '_') - if os.name == 'nt': - filename = filename.replace('?', '_')\ - .replace(':', '_')\ - .replace('\\', '_')\ - .replace('"', "'")\ - .replace('|', '_')\ - .replace('*', '_')\ - .replace('<', '_')\ - .replace('>', '_') - - return filename + if sys.platform == 'win32': + blacklist = ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '\0'] + reserved_filenames = [ + 'CON', 'PRN', 'AUX', 'NUL', 'COM1', 'COM2', 'COM3', 'COM4', 'COM5', + 'COM6', 'COM7', 'COM8', 'COM9', 'LPT1', 'LPT2', 'LPT3', 'LPT4', + 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9', + ] + filename = ''.join(char for char in filename if char not in blacklist) + + filename = ''.join(char for char in filename if 31 < ord(char)) + + filename = unicodedata.normalize('NFKD', filename) + filename = filename.rstrip('. ') + filename = filename.strip() + + if all(char == '.' for char in filename): + filename = f'__{filename}' + if filename in reserved_filenames: + filename = f'__{filename}' + if len(filename) == 0: + filename = '__' + + extension = Path(filename).suffix[:10] + filename = Path(filename).stem + final_length = 50 - len(extension) + + if len(filename) > final_length: + # Many Filesystems have a limit on filename length: keep it short + filename = filename[:final_length] + + return f'{filename}{extension}' def get_contact_dict_for_account(account: str) -> dict[str, types.BareContact]: diff --git a/gajim/common/preview_helpers.py b/gajim/common/preview_helpers.py index 2de47e589..30f166200 100644 --- a/gajim/common/preview_helpers.py +++ b/gajim/common/preview_helpers.py @@ -45,6 +45,7 @@ from cryptography.hazmat.primitives.ciphers import Cipher from cryptography.hazmat.primitives.ciphers import algorithms from cryptography.hazmat.primitives.ciphers.modes import GCM +from gajim.common.helpers import sanitize_filename from gajim.common.i18n import _ log = logging.getLogger('gajim.c.preview_helpers') @@ -308,12 +309,7 @@ def get_image_paths(uri: str, web_stem = path.stem extension = path.suffix - if len(web_stem) > 90: - # Many Filesystems have a limit on filename length - # Most have 255, some encrypted ones only 143 - # We add around 50 chars for the hash, - # so the filename should not exceed 90 - web_stem = web_stem[:90] + web_stem = sanitize_filename(web_stem) name_hash = hashlib.sha1(str(uri).encode()).hexdigest() diff --git a/test/no_gui/test_sanitize_filename.py b/test/no_gui/test_sanitize_filename.py new file mode 100644 index 000000000..46e156037 --- /dev/null +++ b/test/no_gui/test_sanitize_filename.py @@ -0,0 +1,61 @@ + +import sys +import unittest +from unittest.mock import patch +from gajim.common.helpers import sanitize_filename + + +class SanitizeTest(unittest.TestCase): + '''Tests for the sanitize_filename function.''' + + @patch.object(sys, 'platform', 'win32') + def test_invalid_chars(self): + '''Make sure invalid characters are removed in filenames''' + self.assertEqual(sanitize_filename('A/B/C'), 'ABC') + self.assertEqual(sanitize_filename('A*C.d'), 'AC.d') + self.assertEqual(sanitize_filename('A?C.d'), 'AC.d') + + @patch.object(sys, 'platform', 'win32') + def test_invalid_suffix(self): + '''Dots are not allowed at the end''' + self.assertEqual(sanitize_filename('def.'), 'def') + self.assertEqual(sanitize_filename('def.ghi'), 'def.ghi') + self.assertTrue(sanitize_filename('X' * 1000 + '.').endswith('X')) + + @patch.object(sys, 'platform', 'win32') + def test_reserved_words(self): + '''Make sure reserved Windows words are prefixed''' + self.assertEqual(sanitize_filename('NUL'), '__NUL') + self.assertEqual(sanitize_filename('..'), '__') + + @patch.object(sys, 'platform', 'win32') + def test_long_names(self): + '''Make sure long names are truncated''' + self.assertEqual(len(sanitize_filename('X' * 300)), 50) + self.assertEqual(len(sanitize_filename( + '.'.join(['X' * 100, 'X' * 100, 'X' * 100]))), 50) + self.assertEqual(len(sanitize_filename( + '.'.join(['X' * 300, 'X' * 300, 'X' * 300]))), 50) + self.assertEqual(len(sanitize_filename('.' * 300 + '.txt')), 50) + + @patch.object(sys, 'platform', 'win32') + def test_unicode_normalization(self): + '''Names should be NFKD normalized''' + self.assertEqual(sanitize_filename('ў'), chr(1091) + chr(774)) + + @patch.object(sys, 'platform', 'win32') + def test_extensions(self): + '''Filename extensions should be preserved when possible.''' + really_long_name = 'X' * 1000 + '.pdf' + self.assertTrue(sanitize_filename(really_long_name).endswith('.pdf')) + self.assertTrue(sanitize_filename('X' * 1000).endswith('X')) + self.assertTrue(sanitize_filename( + 'X' * 100 + '.' + 'X' * 100 + '.pdf').endswith('.pdf')) + self.assertTrue(sanitize_filename( + 'X' * 100 + '.' + 'X' * 400).endswith('X')) + self.assertTrue(sanitize_filename( + 'X' * 100 + '.' + 'X' * 400 + '.pdf').endswith('.pdf')) + + +if __name__ == '__main__': + unittest.main() |