[libcxx] Fix the ctype `is` (pointer version) function for Windows

Previously, this test snippet would report incorrect information: F::mask m; std::wstring in(L"\u00DA"); // LATIN CAPITAL LETTER U WITH ACUTE f.is(in.data(), in.data() + 1, &m); // m & F::lower would be set The single-character version of the `is` function wasn't affected by this issue though. Define `_LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA` for Windows, as the `alpha` / `_ALPHA` constant is a mask consisting of multiple bits set, which avoids setting `alpha` whenver any of the bits is set, in the `do_is` implementation. On Windows, with the "C" locale, wchars are classified according to their Unicode interpretation, just as in the en_US.UTF-8 locale on all platforms. Due to the differing classification of some characters, the `scan_is` and `scan_not` tests are quite annoying to fix, thus just ifdef out some of the tests for the "C" locale there - the code gets tested with the more standard en_US.UTF-8 locale anyway. Differential Revision: https://reviews.llvm.org/D120796
author: Martin Storsjö <martin@martin.st> 2022-01-22 02:21:31 +0300
committer: Martin Storsjö <martin@martin.st> 2022-03-05 01:47:19 +0300
commit: 45415ef91be5311939dfb0bf11a87b1722f68d02 (patch)
tree: a569cf98f6eea9f3b59d60d67daf25b6aae1473a /libcxx
parent: 3347e7d40fd83ae762dcdb7c6161550e4190d6cf (diff)
5 files changed, 39 insertions, 13 deletions
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index 67fc9d1f58e6..e1781986fd35 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -454,6 +454,7 @@ public:
     static const mask blank  = _BLANK;
     static const mask __regex_word = 0x4000; // 0x8000 and 0x0100 and 0x00ff are used
 # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT
+# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA
 #elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__)
 # ifdef __APPLE__
     typedef __uint32_t mask;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp
index 04ab9101a544..392e37373f81 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp
@@ -13,7 +13,6 @@
 // bool is(mask m, charT c) const;
 
 // REQUIRES: locale.en_US.UTF-8
-// XFAIL: LIBCXX-WINDOWS-FIXME
 // XFAIL: libcpp-has-no-wide-characters
 
 #include <locale>
@@ -107,8 +106,15 @@ int main(int, char**)
             assert(f.is(F::graph, L'.'));
             assert(!f.is(F::graph,  L'\x07'));
 
+#if defined(_WIN32)
+            // On Windows, these wchars are classified according to their
+            // Unicode interpretation even in the "C" locale.
+            assert(f.is(F::alpha, L'\x00DA'));
+            assert(f.is(F::upper, L'\x00DA'));
+#else
             assert(!f.is(F::alpha, L'\x00DA'));
             assert(!f.is(F::upper, L'\x00DA'));
+#endif
         }
     }
 
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp
index 4f163d7f2462..1c7c1f81fc39 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp
@@ -13,7 +13,6 @@
 // const charT* do_is(const charT* low, const charT* high, mask* vec) const;
 
 // REQUIRES: locale.en_US.UTF-8
-// XFAIL: LIBCXX-WINDOWS-FIXME
 // XFAIL: libcpp-has-no-wide-characters
 
 #include <locale>
@@ -149,17 +148,27 @@ int main(int, char**)
 
             // L'\x00DA'
             assert(!(m[0] & F::space));
-            assert(!(m[0] & F::print));
             assert(!(m[0] & F::cntrl));
-            assert(!(m[0] & F::upper));
             assert(!(m[0] & F::lower));
-            assert(!(m[0] & F::alpha));
             assert(!(m[0] & F::digit));
             assert(!(m[0] & F::punct));
             assert(!(m[0] & F::xdigit));
             assert(!(m[0] & F::blank));
+#if defined(_WIN32)
+            // On Windows, these wchars are classified according to their
+            // Unicode interpretation even in the "C" locale.
+            assert( (m[0] & F::alpha));
+            assert( (m[0] & F::upper));
+            assert( (m[0] & F::print));
+            assert( (m[0] & F::alnum));
+            assert( (m[0] & F::graph));
+#else
+            assert(!(m[0] & F::alpha));
+            assert(!(m[0] & F::upper));
+            assert(!(m[0] & F::print));
             assert(!(m[0] & F::alnum));
             assert(!(m[0] & F::graph));
+#endif
 
             // L' '
             assert( (m[1] & F::space));
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp
index 24d001000af7..163bd7a501d2 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp
@@ -13,7 +13,6 @@
 // const charT* scan_is(mask m, const charT* low, const charT* high) const;
 
 // REQUIRES: locale.en_US.UTF-8
-// XFAIL: LIBCXX-WINDOWS-FIXME
 // XFAIL: libcpp-has-no-wide-characters
 
 #include <locale>
@@ -57,17 +56,23 @@ int main(int, char**)
             const std::wstring in(L"\x00DA A\x07.a1");
             std::vector<F::mask> m(in.size());
             assert(f.scan_is(F::space, in.data(), in.data() + in.size()) - in.data() == 1);
-            assert(f.scan_is(F::print, in.data(), in.data() + in.size()) - in.data() == 1);
             assert(f.scan_is(F::cntrl, in.data(), in.data() + in.size()) - in.data() == 3);
-            assert(f.scan_is(F::upper, in.data(), in.data() + in.size()) - in.data() == 2);
             assert(f.scan_is(F::lower, in.data(), in.data() + in.size()) - in.data() == 5);
-            assert(f.scan_is(F::alpha, in.data(), in.data() + in.size()) - in.data() == 2);
             assert(f.scan_is(F::digit, in.data(), in.data() + in.size()) - in.data() == 6);
             assert(f.scan_is(F::punct, in.data(), in.data() + in.size()) - in.data() == 4);
             assert(f.scan_is(F::xdigit, in.data(), in.data() + in.size()) - in.data() == 2);
             assert(f.scan_is(F::blank, in.data(), in.data() + in.size()) - in.data() == 1);
+#if !defined(_WIN32)
+            // On Windows, these wchars are classified according to their
+            // Unicode interpretation even in the "C" locale, where
+            // the scan_is function returns the same as above for the
+            // en_US.UTF-8 locale.
+            assert(f.scan_is(F::print, in.data(), in.data() + in.size()) - in.data() == 1);
+            assert(f.scan_is(F::upper, in.data(), in.data() + in.size()) - in.data() == 2);
+            assert(f.scan_is(F::alpha, in.data(), in.data() + in.size()) - in.data() == 2);
             assert(f.scan_is(F::alnum, in.data(), in.data() + in.size()) - in.data() == 2);
             assert(f.scan_is(F::graph, in.data(), in.data() + in.size()) - in.data() == 2);
+#endif
         }
     }
 
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp
index 7e3c8183b151..e2c34f2527fd 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp
@@ -13,7 +13,6 @@
 // const charT* scan_not(mask m, const charT* low, const charT* high) const;
 
 // REQUIRES: locale.en_US.UTF-8
-// XFAIL: LIBCXX-WINDOWS-FIXME
 // XFAIL: libcpp-has-no-wide-characters
 
 #include <locale>
@@ -57,17 +56,23 @@ int main(int, char**)
             const std::wstring in(L"\x00DA A\x07.a1");
             std::vector<F::mask> m(in.size());
             assert(f.scan_not(F::space, in.data(), in.data() + in.size()) - in.data() == 0);
-            assert(f.scan_not(F::print, in.data(), in.data() + in.size()) - in.data() == 0);
             assert(f.scan_not(F::cntrl, in.data(), in.data() + in.size()) - in.data() == 0);
-            assert(f.scan_not(F::upper, in.data(), in.data() + in.size()) - in.data() == 0);
             assert(f.scan_not(F::lower, in.data(), in.data() + in.size()) - in.data() == 0);
-            assert(f.scan_not(F::alpha, in.data(), in.data() + in.size()) - in.data() == 0);
             assert(f.scan_not(F::digit, in.data(), in.data() + in.size()) - in.data() == 0);
             assert(f.scan_not(F::punct, in.data(), in.data() + in.size()) - in.data() == 0);
             assert(f.scan_not(F::xdigit, in.data(), in.data() + in.size()) - in.data() == 0);
             assert(f.scan_not(F::blank, in.data(), in.data() + in.size()) - in.data() == 0);
+#if !defined(_WIN32)
+            // On Windows, these wchars are classified according to their
+            // Unicode interpretation even in the "C" locale, where
+            // the scan_is function returns the same as above for the
+            // en_US.UTF-8 locale.
+            assert(f.scan_not(F::print, in.data(), in.data() + in.size()) - in.data() == 0);
+            assert(f.scan_not(F::upper, in.data(), in.data() + in.size()) - in.data() == 0);
+            assert(f.scan_not(F::alpha, in.data(), in.data() + in.size()) - in.data() == 0);
             assert(f.scan_not(F::alnum, in.data(), in.data() + in.size()) - in.data() == 0);
             assert(f.scan_not(F::graph, in.data(), in.data() + in.size()) - in.data() == 0);
+#endif
         }
     }
author	Martin Storsjö <martin@martin.st>	2022-01-22 02:21:31 +0300
committer	Martin Storsjö <martin@martin.st>	2022-03-05 01:47:19 +0300
commit	45415ef91be5311939dfb0bf11a87b1722f68d02 (patch)
tree	a569cf98f6eea9f3b59d60d67daf25b6aae1473a /libcxx
parent	3347e7d40fd83ae762dcdb7c6161550e4190d6cf (diff)