Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2023-02-16 00:00:39 +0300
committerCorinna Vinschen <corinna@vinschen.de>2023-02-16 00:00:39 +0300
commitb5f9b0241a36b8e0197405fc5ab23cbf0ba43e03 (patch)
tree3bf9b31b20e3cfa3f1be8b9087e253aa1a60e9f9 /winsup/cygwin/nlsfuncs.cc
parentbe67844f5da8cb6fb4c39cfdb3460e6955660b8e (diff)
Cygwin: is_unicode_equiv: implement Unicode equivalence class check
is_unicode_equiv compares two UTF-32 values and returns 1 if both are member of the same Unicode equivalence class, 0 otherwise. Note that this function only works with precomposed characters per Unicode normalization form C. It doesn't handle decomposed characters, just like its counterpart in glibc. I.e., equivalence class comparison using decomposed chars won't work. Example: fnmatch("[=n=]", "ñ") == 0 fnmatch("[=ñ=]", "n") == 0 but fnmatch("[=n=]", "n\x0303") == 1 fnmatch("[=n\x0303=]", "n") == 1 fnmatch("[=n\x0303=]", "n\x0303") == 1 Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
Diffstat (limited to 'winsup/cygwin/nlsfuncs.cc')
-rw-r--r--winsup/cygwin/nlsfuncs.cc48
1 files changed, 48 insertions, 0 deletions
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc
index 0d204929d..f3701312b 100644
--- a/winsup/cygwin/nlsfuncs.cc
+++ b/winsup/cygwin/nlsfuncs.cc
@@ -11,6 +11,7 @@ details. */
#include <stdlib.h>
#include <locale.h>
#include <wchar.h>
+#include <wctype.h>
#include "path.h"
#include "fhandler.h"
#include "dtable.h"
@@ -1110,6 +1111,7 @@ __collate_load_locale (struct __locale_t *locale, const char *name,
/* We use the Windows functions for locale-specific string comparison and
transformation. The advantage is that we don't need any files with
collation information. */
+
extern "C" int
wcscoll_l (const wchar_t *__restrict ws1, const wchar_t *__restrict ws2,
struct __locale_t *locale)
@@ -1193,6 +1195,52 @@ __collate_range_cmp (int c1, int c2)
return wcscoll (s1, s2);
}
+/* Check if UTF-32 input character `test' is in the same equivalence class
+ as the multibyte char in `equiv'.
+ Note that we only recognize input in Unicode normalization form C, that
+ is, we expect all letters to be composed. A single character is all we
+ look at.
+ To check equivalence, decompose pattern letter and input letter and check
+ the base character for equality. Also, convert all digits to the ASCII
+ digits 0 - 9 and compare. */
+extern "C" int
+is_unicode_equiv (wint_t test, wint_t eqv)
+{
+ wchar_t decomp_testc[5] = { 0 };
+ wchar_t decomp_eqvc[5] = { 0 };
+ wchar_t testc[3] = { 0 };
+ wchar_t eqvc[3] = { 0 };
+
+ /* For equivalence classes, case doesn't matter. However, be careful.
+ Only convert chars which have a "upper" to "lower". */
+ if (iswupper (eqv))
+ eqv = towlower (eqv);
+ if (iswupper (test))
+ test = towlower (test);
+ /* Convert to UTF-16 string */
+ if (eqv > 0x10000) {
+ eqvc[0] = ((eqv - 0x10000) >> 10) + 0xd800;
+ eqvc[1] = ((eqv - 0x10000) & 0x3ff) + 0xdc00;
+ } else
+ eqvc[0] = eqv;
+ if (test > 0x10000) {
+ testc[0] = ((test - 0x10000) >> 10) + 0xd800;
+ testc[1] = ((test - 0x10000) & 0x3ff) + 0xdc00;
+ } else
+ testc[0] = test;
+ /* Convert to denormalized form */
+ FoldStringW (MAP_COMPOSITE | MAP_FOLDDIGITS, eqvc, -1, decomp_eqvc, 5);
+ FoldStringW (MAP_COMPOSITE | MAP_FOLDDIGITS, testc, -1, decomp_testc, 5);
+ /* If they are equivalent, the base char must be the same. */
+ if (decomp_eqvc[0] != decomp_testc[0])
+ return 0;
+ /* If it's a surrogate pair, check the second char, too */
+ if (decomp_eqvc[0] >= 0xd800 && decomp_eqvc[0] <= 0xdbff &&
+ decomp_eqvc[1] != decomp_testc[1])
+ return 0;
+ return 1;
+}
+
extern "C" size_t
wcsxfrm_l (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn,
struct __locale_t *locale)