From 0bdc764b421b56ac2961ce54f538d4a71f38b724 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Thu, 16 Mar 2023 12:44:32 +0100 Subject: Cygwin: regex: wgetnext: Re-add kludge to be more glibc compatible Add comment to explain. Signed-off-by: Corinna Vinschen --- winsup/cygwin/regex/regcomp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'winsup/cygwin/regex') diff --git a/winsup/cygwin/regex/regcomp.c b/winsup/cygwin/regex/regcomp.c index 3c7359310..59da896a9 100644 --- a/winsup/cygwin/regex/regcomp.c +++ b/winsup/cygwin/regex/regcomp.c @@ -1528,6 +1528,18 @@ wgetnext(struct parse *p) wint_t wc; size_t n; +#ifdef __CYGWIN__ + /* Kludge for more glibc compatibility. On Cygwin as well as on + Linux, mbrtowc returns -1 if the current local's codeset is ASCII + and the character is >= 0x80. Nevertheless, glibc's regcomp allows + any char value, even stuff like [\xc0-\xff], if the locale's codeset + is ASCII, so in regcomp it ignores the fact that chars >= 0x80 are + invalid ASCII chars. To be more Linux-compatible, we align the + behaviour to glibc here. Allow any character value if the current + local's codeset is ASCII. */ + if (*__current_locale_charset () == 'A') /* SCII */ + return (wint_t) (unsigned char) *p->next++; +#endif memset(&mbs, 0, sizeof(mbs)); n = mbrtowi(&wc, p->next, p->end - p->next, &mbs); if (n == (size_t)-1 || n == (size_t)-2) { -- cgit v1.2.3