From 14b9a044798ebb3858a1f1a1377309a3d6054ac8 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 23 Mar 2023 18:25:39 +0100 Subject: grep: work around UTF-8 related JIT bug in PCRE2 <= 10.34 Stephane is reporting[1] a regression introduced in git v2.40.0 that leads to 'git grep' segfaulting in his CI pipeline. It turns out, he's using an older version of libpcre2 that triggers a wild pointer dereference in the generated JIT code that was fixed in PCRE2 10.35. Instead of completely disabling the JIT compiler for the buggy version, just mask out the Unicode property handling as we used to do prior to commit acabd2048ee0 ("grep: correctly identify utf-8 characters with \{b,w} in -P"). [1] https://lore.kernel.org/git/7E83DAA1-F9A9-4151-8D07-D80EA6D59EEA@clumio.com/ Reported-by: Stephane Odul Signed-off-by: Mathias Krause Signed-off-by: Junio C Hamano --- grep.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'grep.c') diff --git a/grep.c b/grep.c index 1687f65b64..00b3cf0e18 100644 --- a/grep.c +++ b/grep.c @@ -295,6 +295,15 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt if (!opt->ignore_locale && is_utf8_locale() && !literal) options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF); +#ifndef GIT_PCRE2_VERSION_10_35_OR_HIGHER + /* + * Work around a JIT bug related to invalid Unicode character handling + * fixed in 10.35: + * https://github.com/PCRE2Project/pcre2/commit/c21bd977547d + */ + options &= ~PCRE2_UCP; +#endif + #ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */ if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS)) -- cgit v1.2.3