diff options
author | Patrick Sean Klein <patrick@libklein.com> | 2022-05-21 17:21:33 +0300 |
---|---|---|
committer | Jonathan White <support@dmapps.us> | 2022-06-06 04:58:52 +0300 |
commit | e16c007d43b0ca6ed8e4d1b492e595ea8be64393 (patch) | |
tree | 5fc5fde6f6134708a38dca4b990597dcdc7e17b8 | |
parent | 924eb6dbc48ed02c5fff9223b0a8e499d5cbea51 (diff) |
Correct regex escape logic
* Fixes #7776
Implement QRegularExpression::escape within Tools::convertToRegex to allow usage on older Qt versions.
Also wrap EXACT_MODIFIER patterns in a non-capture group to prevent misinterpreted regex.
-rw-r--r-- | src/core/Tools.cpp | 65 | ||||
-rw-r--r-- | src/core/Tools.h | 27 | ||||
-rw-r--r-- | tests/TestEntrySearcher.cpp | 4 | ||||
-rw-r--r-- | tests/TestFdoSecrets.cpp | 5 | ||||
-rw-r--r-- | tests/TestTools.cpp | 51 | ||||
-rw-r--r-- | tests/TestTools.h | 2 |
6 files changed, 136 insertions, 18 deletions
diff --git a/src/core/Tools.cpp b/src/core/Tools.cpp index 867d8c174..657797116 100644 --- a/src/core/Tools.cpp +++ b/src/core/Tools.cpp @@ -1,6 +1,10 @@ /* * Copyright (C) 2012 Felix Geyer <debfx@fobos.de> * Copyright (C) 2017 Lennart Glauer <mail@lennart-glauer.de> + * Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>. + * Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, + * author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> + * Copyright (C) 2021 The Qt Company Ltd. * Copyright (C) 2021 KeePassXC Team <team@keepassxc.org> * * This program is free software: you can redistribute it and/or modify @@ -296,8 +300,59 @@ namespace Tools return true; } - // Escape regex symbols - auto regexEscape = QRegularExpression(R"re(([-[\]{}()+.,\\\/^$#|*?]))re"); + /**************************************************************************** + * + * Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>. + * Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, + * author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> + * Copyright (C) 2021 The Qt Company Ltd. Contact: https://www.qt.io/licensing/ + * + * This function is part of the QtCore module of the Qt Toolkit. And subject to the + * following licenses. + * + * GNU General Public License Usage + * Alternatively, this function may be used under the terms of the GNU + * General Public License version 2.0 or (at your option) the GNU General + * Public license version 3 or any later version approved by the KDE Free + * Qt Foundation. The licenses are as published by the Free Software + * Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 + * included in the packaging of this file. Please review the following + * information to ensure the GNU General Public License requirements will + * be met: https://www.gnu.org/licenses/gpl-2.0.html and + * https://www.gnu.org/licenses/gpl-3.0.html. + */ + QString escapeRegex(const QString& str) + { + QString result; + const auto count = str.size(); + result.reserve(count * 2); + + // everything but [a-zA-Z0-9_] gets escaped, + // cf. perldoc -f quotemeta + for (int i = 0; i < count; ++i) { + const QChar current = str.at(i); + + if (current == QChar::Null) { + // unlike Perl, a literal NUL must be escaped with + // "\\0" (backslash + 0) and not "\\\0" (backslash + NUL), + // because pcre16_compile uses a NUL-terminated string + result.append(u'\\'); + result.append(u'0'); + } else if ((current < u'a' || current > u'z') && (current < u'A' || current > u'Z') + && (current < u'0' || current > u'9') && current != u'_') { + result.append(u'\\'); + result.append(current); + if (current.isHighSurrogate() && i < (count - 1)) { + result.append(str.at(++i)); + } + } else { + result.append(current); + } + } + + result.squeeze(); + return result; + } QRegularExpression convertToRegex(const QString& string, int opts) { @@ -305,7 +360,7 @@ namespace Tools // Wildcard support (*, ?, |) if (opts & RegexConvertOpts::WILDCARD_ALL || opts & RegexConvertOpts::ESCAPE_REGEX) { - pattern.replace(regexEscape, "\\\\1"); + pattern = escapeRegex(pattern); if (opts & RegexConvertOpts::WILDCARD_UNLIMITED_MATCH) { pattern.replace("\\*", ".*"); @@ -318,9 +373,9 @@ namespace Tools } } - // Exact modifier if (opts & RegexConvertOpts::EXACT_MATCH) { - pattern = "^" + pattern + "$"; + // Exact modifier + pattern = "^(?:" + pattern + ")$"; } auto regex = QRegularExpression(pattern); diff --git a/src/core/Tools.h b/src/core/Tools.h index d8a1371f1..a8094d0a3 100644 --- a/src/core/Tools.h +++ b/src/core/Tools.h @@ -45,18 +45,33 @@ namespace Tools QString envSubstitute(const QString& filepath, QProcessEnvironment environment = QProcessEnvironment::systemEnvironment()); + /** + * Escapes all characters in regex such that they do not receive any special treatment when used + * in a regular expression. Essentially, this function escapes any characters not in a-zA-Z0-9. + * @param regex The unescaped regular expression string. + * @return An escaped string safe to use in a regular expression. + */ + QString escapeRegex(const QString& regex); + enum RegexConvertOpts { DEFAULT = 0, - WILDCARD_UNLIMITED_MATCH = 0x1, - WILDCARD_SINGLE_MATCH = 0x2, - WILDCARD_LOGICAL_OR = 0x4, + WILDCARD_UNLIMITED_MATCH = 1, + WILDCARD_SINGLE_MATCH = 1 << 2, + WILDCARD_LOGICAL_OR = 1 << 3, WILDCARD_ALL = WILDCARD_UNLIMITED_MATCH | WILDCARD_SINGLE_MATCH | WILDCARD_LOGICAL_OR, - EXACT_MATCH = 0x8, - CASE_SENSITIVE = 0x16, - ESCAPE_REGEX = 0x32, + EXACT_MATCH = 1 << 4, + CASE_SENSITIVE = 1 << 5, + ESCAPE_REGEX = 1 << 6, }; + /** + * Converts input string to a regular expression according to the options specified in opts. + * Note that, unless ESCAPE_REGEX is set, convertToRegex assumes a proper regular expression as input. + * @param string The input string. Assumed to be a proper regular expression unless ESCAPE_REGEX is set. + * @param opts Tools::RegexConvertOpts options the regex will be converted with. + * @return The regular expression built from string and opts. + */ QRegularExpression convertToRegex(const QString& string, int opts = RegexConvertOpts::DEFAULT); template <typename RandomAccessIterator, typename T> diff --git a/tests/TestEntrySearcher.cpp b/tests/TestEntrySearcher.cpp index fc091f1a5..cc19a0c25 100644 --- a/tests/TestEntrySearcher.cpp +++ b/tests/TestEntrySearcher.cpp @@ -224,7 +224,7 @@ void TestEntrySearcher::testSearchTermParser() QCOMPARE(terms.length(), 2); QCOMPARE(terms[0].field, EntrySearcher::Field::Url); - QCOMPARE(terms[0].regex.pattern(), QString("^.*\\.google\\.com$")); + QCOMPARE(terms[0].regex.pattern(), QString("^(?:.*\\.google\\.com)$")); QCOMPARE(terms[1].field, EntrySearcher::Field::Username); QCOMPARE(terms[1].regex.pattern(), QString("\\d+\\w{2}")); @@ -237,7 +237,7 @@ void TestEntrySearcher::testSearchTermParser() QCOMPARE(terms[0].field, EntrySearcher::Field::AttributeValue); QCOMPARE(terms[0].word, QString("abc")); - QCOMPARE(terms[0].regex.pattern(), QString("^efg$")); + QCOMPARE(terms[0].regex.pattern(), QString("^(?:efg)$")); QCOMPARE(terms[1].field, EntrySearcher::Field::AttributeValue); QCOMPARE(terms[1].word, QString("def")); diff --git a/tests/TestFdoSecrets.cpp b/tests/TestFdoSecrets.cpp index e4d8cca22..0f4c374a4 100644 --- a/tests/TestFdoSecrets.cpp +++ b/tests/TestFdoSecrets.cpp @@ -82,6 +82,11 @@ void TestFdoSecrets::testSpecialCharsInAttributeValue() QCOMPARE(res.count(), 1); QCOMPARE(res[0]->title(), QStringLiteral("titleB")); } + { + const auto term = Collection::attributeToTerm("testAttribute", "v|"); + const auto res = EntrySearcher().search({term}, root.data()); + QCOMPARE(res.count(), 0); + } } void TestFdoSecrets::testDBusPathParse() diff --git a/tests/TestTools.cpp b/tests/TestTools.cpp index 438606c7e..f1cba482b 100644 --- a/tests/TestTools.cpp +++ b/tests/TestTools.cpp @@ -165,6 +165,34 @@ void TestTools::testBackupFilePatternSubstitution() QCOMPARE(Tools::substituteBackupFilePath(pattern, dbFilePath), expectedSubstitution); } +void TestTools::testEscapeRegex_data() +{ + QTest::addColumn<QString>("input"); + QTest::addColumn<QString>("expected"); + + QString all_regular_characters = "0123456789"; + for (char c = 'a'; c != 'z'; ++c) { + all_regular_characters += QChar::fromLatin1(c); + } + for (char c = 'A'; c != 'Z'; ++c) { + all_regular_characters += QChar::fromLatin1(c); + } + + QTest::newRow("Regular characters should not be escaped") << all_regular_characters << all_regular_characters; + QTest::newRow("Special characters should be escaped") << R"(.^$*+-?()[]{}|\)" + << R"(\.\^\$\*\+\-\?\(\)\[\]\{\}\|\\)"; + QTest::newRow("Null character") << QString::fromLatin1("ab\0c", 4) << "ab\\0c"; +} + +void TestTools::testEscapeRegex() +{ + QFETCH(QString, input); + QFETCH(QString, expected); + + auto actual = Tools::escapeRegex(input); + QCOMPARE(actual, expected); +} + void TestTools::testConvertToRegex() { QFETCH(QString, input); @@ -185,16 +213,29 @@ void TestTools::testConvertToRegex_data() QTest::newRow("No Options") << input << static_cast<int>(Tools::RegexConvertOpts::DEFAULT) << QString(R"(te|st*t?[5]^(test);',.)"); + // Escape regex + QTest::newRow("Escape Regex") << input << static_cast<int>(Tools::RegexConvertOpts::ESCAPE_REGEX) + << Tools::escapeRegex(input); + QTest::newRow("Escape Regex and exact match") + << input << static_cast<int>(Tools::RegexConvertOpts::ESCAPE_REGEX | Tools::RegexConvertOpts::EXACT_MATCH) + << "^(?:" + Tools::escapeRegex(input) + ")$"; + + // Exact match does not escape the pattern QTest::newRow("Exact Match") << input << static_cast<int>(Tools::RegexConvertOpts::EXACT_MATCH) - << QString(R"(^te|st*t?[5]^(test);',.$)"); + << QString(R"(^(?:te|st*t?[5]^(test);',.)$)"); + + // Exact match with improper regex + QTest::newRow("Exact Match") << ")av(" << static_cast<int>(Tools::RegexConvertOpts::EXACT_MATCH) + << QString(R"(^(?:)av()$)"); + QTest::newRow("Exact Match & Wildcard") << input << static_cast<int>(Tools::RegexConvertOpts::EXACT_MATCH | Tools::RegexConvertOpts::WILDCARD_ALL) - << QString(R"(^te|st.*t.\[5\]\^\(test\);'\,\.$)"); + << QString(R"(^(?:te|st.*t.\[5\]\^\(test\)\;\'\,\.)$)"); QTest::newRow("Wildcard Single Match") << input << static_cast<int>(Tools::RegexConvertOpts::WILDCARD_SINGLE_MATCH) - << QString(R"(te\|st\*t.\[5\]\^\(test\);'\,\.)"); + << QString(R"(te\|st\*t.\[5\]\^\(test\)\;\'\,\.)"); QTest::newRow("Wildcard OR") << input << static_cast<int>(Tools::RegexConvertOpts::WILDCARD_LOGICAL_OR) - << QString(R"(te|st\*t\?\[5\]\^\(test\);'\,\.)"); + << QString(R"(te|st\*t\?\[5\]\^\(test\)\;\'\,\.)"); QTest::newRow("Wildcard Unlimited Match") << input << static_cast<int>(Tools::RegexConvertOpts::WILDCARD_UNLIMITED_MATCH) - << QString(R"(te\|st.*t\?\[5\]\^\(test\);'\,\.)"); + << QString(R"(te\|st.*t\?\[5\]\^\(test\)\;\'\,\.)"); } diff --git a/tests/TestTools.h b/tests/TestTools.h index f590a53a1..2e8cbb8bb 100644 --- a/tests/TestTools.h +++ b/tests/TestTools.h @@ -31,6 +31,8 @@ private slots: void testValidUuid(); void testBackupFilePatternSubstitution_data(); void testBackupFilePatternSubstitution(); + void testEscapeRegex(); + void testEscapeRegex_data(); void testConvertToRegex(); void testConvertToRegex_data(); }; |