diff options
author | Atsushi Eno <atsushieno@gmail.com> | 2008-11-04 03:29:35 +0300 |
---|---|---|
committer | Atsushi Eno <atsushieno@gmail.com> | 2008-11-04 03:29:35 +0300 |
commit | c1af5e059673681ddaa9e73436fe49699551c379 (patch) | |
tree | 64ca39951bd02965492c90470e0ce43f0a40ed0e /eglib | |
parent | 18a714538ba1c6631cc8b370f4da0931bdcdc6d8 (diff) |
2008-11-04 Atsushi Enomoto <atsushi@ximian.com>
* src/gutf8.c, src/gunicode.c, src/glib.h:
implemented g_unichar_type(), g_unichar_toupper(),
g_unichar_tolower(), g_unichar_totitle(), g_utf8_strup()
and g_utf8_strdown(). Fixed some surrogate pair bugs.
* TODO : removed implemented things.
* test/unicode.c, test/tests.h, test/utf8.c, test/Makefile.am:
added new tests.
svn path=/trunk/mono/; revision=117831
Diffstat (limited to 'eglib')
-rw-r--r-- | eglib/ChangeLog | 10 | ||||
-rw-r--r-- | eglib/TODO | 3 | ||||
-rw-r--r-- | eglib/src/glib.h | 33 | ||||
-rw-r--r-- | eglib/src/gunicode.c | 86 | ||||
-rw-r--r-- | eglib/src/gutf8.c | 44 | ||||
-rw-r--r-- | eglib/test/Makefile.am | 1 | ||||
-rw-r--r-- | eglib/test/tests.h | 2 | ||||
-rw-r--r-- | eglib/test/unicode.c | 99 | ||||
-rw-r--r-- | eglib/test/utf8.c | 141 |
9 files changed, 406 insertions, 13 deletions
diff --git a/eglib/ChangeLog b/eglib/ChangeLog index 15587dcda0a..4f742d12aef 100644 --- a/eglib/ChangeLog +++ b/eglib/ChangeLog @@ -1,5 +1,15 @@ 2008-11-04 Atsushi Enomoto <atsushi@ximian.com> + * src/gutf8.c, src/gunicode.c, src/glib.h: + implemented g_unichar_type(), g_unichar_toupper(), + g_unichar_tolower(), g_unichar_totitle(), g_utf8_strup() + and g_utf8_strdown(). Fixed some surrogate pair bugs. + * TODO : removed implemented things. + * test/unicode.c, test/tests.h, test/utf8.c, test/Makefile.am: + added new tests. + +2008-11-04 Atsushi Enomoto <atsushi@ximian.com> + * src/unicode-data.h : new header for some new unicode manipulation functions. diff --git a/eglib/TODO b/eglib/TODO index 76ac0da7d53..bbbb49bbba4 100644 --- a/eglib/TODO +++ b/eglib/TODO @@ -14,8 +14,7 @@ Macros: * Unimplemented, not supported currently: - g_unichar_tolower Used for deprecated unmanaged string collation - g_unichar_type Used for deprecated unmanaged string collation + (none as yet.) * Dead Code diff --git a/eglib/src/glib.h b/eglib/src/glib.h index 2071113cecb..b4ae36d7d5d 100644 --- a/eglib/src/glib.h +++ b/eglib/src/glib.h @@ -531,10 +531,41 @@ gpointer g_convert_error_quark(void); typedef guint32 gunichar; typedef enum { + G_UNICODE_CONTROL, + G_UNICODE_FORMAT, + G_UNICODE_UNASSIGNED, + G_UNICODE_PRIVATE_USE, + G_UNICODE_SURROGATE, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_LETTER, + G_UNICODE_TITLECASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_COMBINING_MARK, + G_UNICODE_ENCLOSING_MARK, + G_UNICODE_NON_SPACING_MARK, + G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_LETTER_NUMBER, + G_UNICODE_OTHER_NUMBER, + G_UNICODE_CONNECT_PUNCTUATION, + G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_SYMBOL, + G_UNICODE_LINE_SEPARATOR, + G_UNICODE_PARAGRAPH_SEPARATOR, + G_UNICODE_SPACE_SEPARATOR } GUnicodeType; +gunichar g_unichar_toupper (gunichar c); gunichar g_unichar_tolower (gunichar c); +gunichar g_unichar_totitle (gunichar c); GUnicodeType g_unichar_type (gunichar c); gboolean g_unichar_isxdigit (gunichar c); gint g_unichar_xdigit_value (gunichar c); @@ -570,6 +601,8 @@ typedef enum { G_CONVERT_ERROR_NOT_ABSOLUTE_PATH } GConvertError; +gchar* g_utf8_strup (const gchar *str, gssize len); +gchar* g_utf8_strdown (const gchar *str, gssize len); gunichar2 *g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error); gchar *g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error); gunichar2 *g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error); diff --git a/eglib/src/gunicode.c b/eglib/src/gunicode.c index c426cf0588a..0f7196221f2 100644 --- a/eglib/src/gunicode.c +++ b/eglib/src/gunicode.c @@ -35,6 +35,7 @@ */ #include <stdio.h> #include <glib.h> +#include <unicode-data.h> #include <errno.h> #ifdef _MSC_VER /* FIXME */ @@ -82,15 +83,94 @@ static const gulong offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E208 GUnicodeType g_unichar_type (gunichar c) { - g_error ("%s", "g_unichar_type is not implemented"); +int i; + + guint16 cp = (guint16) c; + for (i = 0; i < unicode_category_ranges_count; i++) { + if (cp < unicode_category_ranges [i].start) + continue; + if (unicode_category_ranges [i].end <= cp) + continue; + return unicode_category [i] [cp - unicode_category_ranges [i].start]; + } + + /* + // 3400-4DB5: OtherLetter + // 4E00-9FC3: OtherLetter + // AC00-D7A3: OtherLetter + // D800-DFFF: OtherSurrogate + // E000-F8FF: OtherPrivateUse + // 20000-2A6D6 OtherLetter + // F0000-FFFFD OtherPrivateUse + // 100000-10FFFD OtherPrivateUse + */ + if (0x3400 <= cp && cp < 0x4DB5) + return G_UNICODE_OTHER_LETTER; + if (0x4E00 <= cp && cp < 0x9FC3) + return G_UNICODE_OTHER_LETTER; + if (0xAC00<= cp && cp < 0xD7A3) + return G_UNICODE_OTHER_LETTER; + if (0xD800 <= cp && cp < 0xDFFF) + return G_UNICODE_SURROGATE; + if (0xE000 <= cp && cp < 0xF8FF) + return G_UNICODE_PRIVATE_USE; + /* since the argument is UTF-16, we cannot check beyond FFFF */ + + /* It should match any of above */ return 0; } gunichar +g_unichar_case (gunichar c, gboolean upper) +{ + gint8 i, i2; + guint32 cp = (guint32) c, v; + + for (i = 0; i < simple_case_map_ranges_count; i++) { + if (cp < simple_case_map_ranges [i].start) + return c; + if (simple_case_map_ranges [i].end <= cp) + continue; + if (c < 0x10000) { + guint16 *tab = upper ? simple_upper_case_mapping_lowarea [i] : simple_lower_case_mapping_lowarea [i]; + v = tab [cp - simple_case_map_ranges [i].start]; + } else { + i2 = i - (upper ? simple_upper_case_mapping_lowarea_table_count : simple_lower_case_mapping_lowarea_table_count); + guint32 *tab = upper ? simple_upper_case_mapping_higharea [i2] : simple_lower_case_mapping_higharea [i2]; + v = tab [cp - simple_case_map_ranges [i].start]; + } + return v != 0 ? (gunichar) v : c; + } + return c; +} + +gunichar +g_unichar_toupper (gunichar c) +{ + return g_unichar_case (c, TRUE); +} + +gunichar g_unichar_tolower (gunichar c) { - g_error ("%s", "g_unichar_type is not implemented"); - return 0; + return g_unichar_case (c, FALSE); +} + +gunichar +g_unichar_totitle (gunichar c) +{ + guint8 i; + guint32 cp; + + cp = (guint32) c; + for (i = 0; i < simple_titlecase_mapping_count; i++) { + if (simple_titlecase_mapping [i].codepoint == cp) + return simple_titlecase_mapping [i].title; + if (simple_titlecase_mapping [i].codepoint > cp) + /* it is ordered, hence no more match */ + break; + } + return g_unichar_toupper (c); } gboolean diff --git a/eglib/src/gutf8.c b/eglib/src/gutf8.c index 77de844693d..9d5786951b1 100644 --- a/eglib/src/gutf8.c +++ b/eglib/src/gutf8.c @@ -21,6 +21,40 @@ g_convert_error_quark () return error_quark; } +gunichar* +utf8_case_conv (const gchar *str, gssize len, gboolean upper) +{ + glong i, u16len, u32len; + gunichar2 *u16str; + gunichar *u32str; + gchar *u8str; + GError **err = NULL; + + u16str = g_utf8_to_utf16 (str, len, NULL, &u16len, err); + u32str = g_utf16_to_ucs4 (u16str, u16len, NULL, &u32len, err); + for (i = 0; i < u32len; i++) { + u32str [i] = upper ? g_unichar_toupper (u32str [i]) : g_unichar_tolower (u32str [i]); + } + g_free (u16str); + u16str = g_ucs4_to_utf16 (u32str, u32len, NULL, &u16len, err); + u8str = g_utf16_to_utf8 (u16str, u16len, NULL, NULL, err); + g_free (u32str); + g_free (u16str); + return u8str; +} + +gchar* +g_utf8_strup (const gchar *str, gssize len) +{ + return utf8_case_conv (str, len, TRUE); +} + +gchar* +g_utf8_strdown (const gchar *str, gssize len) +{ + return utf8_case_conv (str, len, FALSE); +} + gunichar2* g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error) { @@ -268,12 +302,14 @@ g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *item while (len < 0 ? str [in_pos] : in_pos < len) { ch = str [in_pos]; if (surrogate) { - surrogate = 0; - if (ch >= 0xDC00 && ch <= 0xDFFF) + if (ch >= 0xDC00 && ch <= 0xDFFF) { codepoint = 0x10000 + (ch - 0xDC00) + ((surrogate - 0xD800) << 10); - else + surrogate = 0; + } else { + surrogate = 0; /* invalid surrogate pair */ continue; + } } else { /* fast path optimization */ if (ch < 0x80) { @@ -296,6 +332,8 @@ g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *item } in_pos++; + if (surrogate != 0) + continue; if (codepoint < 0x80) ret [out_pos++] = (gchar) codepoint; else if (codepoint < 0x0800) { diff --git a/eglib/test/Makefile.am b/eglib/test/Makefile.am index f57c033abff..4f825b3cc61 100644 --- a/eglib/test/Makefile.am +++ b/eglib/test/Makefile.am @@ -21,6 +21,7 @@ SOURCES = \ pattern.c \ dir.c \ markup.c \ + unicode.c \ utf8.c \ endian.c \ module.c \ diff --git a/eglib/test/tests.h b/eglib/test/tests.h index 6c038e93b01..61ba80fe7a9 100644 --- a/eglib/test/tests.h +++ b/eglib/test/tests.h @@ -18,6 +18,7 @@ DEFINE_TEST_GROUP_INIT_H(file_tests_init); DEFINE_TEST_GROUP_INIT_H(pattern_tests_init); DEFINE_TEST_GROUP_INIT_H(dir_tests_init); DEFINE_TEST_GROUP_INIT_H(markup_tests_init); +DEFINE_TEST_GROUP_INIT_H(unicode_tests_init); DEFINE_TEST_GROUP_INIT_H(utf8_tests_init); DEFINE_TEST_GROUP_INIT_H(endian_tests_init); DEFINE_TEST_GROUP_INIT_H(module_tests_init); @@ -42,6 +43,7 @@ static Group test_groups [] = { {"file", file_tests_init}, {"pattern", pattern_tests_init}, {"dir", dir_tests_init}, + {"unicode", unicode_tests_init}, {"utf8", utf8_tests_init}, {"endian", endian_tests_init}, {"module", module_tests_init}, diff --git a/eglib/test/unicode.c b/eglib/test/unicode.c new file mode 100644 index 00000000000..c1c3402ba8e --- /dev/null +++ b/eglib/test/unicode.c @@ -0,0 +1,99 @@ +#include "test.h" + +/* + * g_unichar_type + */ +RESULT +test_g_unichar_type () +{ + if (g_unichar_type ('A') != G_UNICODE_UPPERCASE_LETTER) + return FAILED ("#1"); + if (g_unichar_type ('a') != G_UNICODE_LOWERCASE_LETTER) + return FAILED ("#2"); + if (g_unichar_type ('1') != G_UNICODE_DECIMAL_NUMBER) + return FAILED ("#3"); + if (g_unichar_type (0xA3) != G_UNICODE_CURRENCY_SYMBOL) + return FAILED ("#4"); + return NULL; +} + +/* + * g_unichar_toupper + */ +RESULT +test_g_unichar_toupper () +{ + if (g_unichar_toupper (0) != 0) + return FAILED ("#0"); + if (g_unichar_toupper ('a') != 'A') + return FAILED ("#1"); + if (g_unichar_toupper ('1') != '1') + return FAILED ("#2"); + if (g_unichar_toupper (0x1C4) != 0x1C4) + return FAILED ("#3"); + if (g_unichar_toupper (0x1F2) != 0x1F1) + return FAILED ("#4"); + if (g_unichar_toupper (0x1F3) != 0x1F1) + return FAILED ("#5"); + if (g_unichar_toupper (0xFFFF) != 0xFFFF) + return FAILED ("#6"); + if (g_unichar_toupper (0x10428) != 0x10400) + return FAILED ("#7"); + return NULL; +} + +/* + * g_unichar_tolower + */ +RESULT +test_g_unichar_tolower () +{ + if (g_unichar_tolower (0) != 0) + return FAILED ("#0"); + if (g_unichar_tolower ('A') != 'a') + return FAILED ("#1"); + if (g_unichar_tolower ('1') != '1') + return FAILED ("#2"); + if (g_unichar_tolower (0x1C5) != 0x1C6) + return FAILED ("#3"); + if (g_unichar_tolower (0x1F1) != 0x1F3) + return FAILED ("#4"); + if (g_unichar_tolower (0x1F2) != 0x1F3) + return FAILED ("#5"); + if (g_unichar_tolower (0xFFFF) != 0xFFFF) + return FAILED ("#6"); + return NULL; +} + +/* + * g_unichar_totitle + */ +RESULT +test_g_unichar_totitle () +{ + if (g_unichar_toupper (0) != 0) + return FAILED ("#0"); + if (g_unichar_totitle ('a') != 'A') + return FAILED ("#1"); + if (g_unichar_totitle ('1') != '1') + return FAILED ("#2"); + if (g_unichar_totitle (0x1C4) != 0x1C5) + return FAILED ("#3"); + if (g_unichar_totitle (0x1F2) != 0x1F2) + return FAILED ("#4"); + if (g_unichar_totitle (0x1F3) != 0x1F2) + return FAILED ("#5"); + if (g_unichar_toupper (0xFFFF) != 0xFFFF) + return FAILED ("#6"); + return NULL; +} + +static Test unicode_tests [] = { + {"g_unichar_type", test_g_unichar_type}, + {"g_unichar_toupper", test_g_unichar_toupper}, + {"g_unichar_tolower", test_g_unichar_tolower}, + {"g_unichar_totitle", test_g_unichar_totitle}, + {NULL, NULL} +}; + +DEFINE_TEST_GROUP_INIT(unicode_tests_init, unicode_tests) diff --git a/eglib/test/utf8.c b/eglib/test/utf8.c index a0e66f806a0..e7f04059d5b 100644 --- a/eglib/test/utf8.c +++ b/eglib/test/utf8.c @@ -82,8 +82,8 @@ compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_ RESULT test_utf16_to_utf8 () { - const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27"; - gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}; + const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81", *src5 = "\xF0\x90\x90\x80"; + gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}, str5 [] = {0xD801, 0xDC00, 0}; RESULT result; gchar_to_gunichar2 (str1, src1); @@ -99,6 +99,15 @@ test_utf16_to_utf8 () result = compare_utf16_to_utf8 (src2, str2, 2, 4); if (result != OK) return result; + result = compare_utf16_to_utf8 (src3, str3, 1, 3); + if (result != OK) + return result; + result = compare_utf16_to_utf8 (src4, str4, 1, 3); + if (result != OK) + return result; + result = compare_utf16_to_utf8 (src5, str5, 2, 4); + if (result != OK) + return result; return OK; } @@ -194,6 +203,7 @@ test_utf8_seq () if (out_read != 2) { return FAILED ("out_read is expected to be 2 but was %d\n", out_read); } + g_free (dst); return OK; } @@ -201,8 +211,8 @@ test_utf8_seq () RESULT test_utf8_to_utf16 () { - const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27"; - gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}; + const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27", *src3 = "\xEF\xBC\xA1", *src4 = "\xEF\xBD\x81"; + gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0}, str3 [] = {0xFF21, 0}, str4 [] = {0xFF41, 0}; RESULT result; gchar_to_gunichar2 (str1, src1); @@ -218,6 +228,12 @@ test_utf8_to_utf16 () result = compare_utf8_to_utf16 (str2, src2, 4, 2); if (result != OK) return result; + result = compare_utf8_to_utf16 (str3, src3, 3, 1); + if (result != OK) + return result; + result = compare_utf8_to_utf16 (str4, src4, 3, 1); + if (result != OK) + return result; return OK; } @@ -310,6 +326,8 @@ test_ucs4_to_utf16 () static gunichar2 exp4[4] = {'h',0xdbff,0xdfff,'\0'}; static gunichar str5[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'}; static gunichar2 exp5[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'}; + static gunichar str6[2] = {0x10400, '\0'}; + static gunichar2 exp6[3] = {0xD801, 0xDC00, '\0'}; static glong read_write[12] = {1,1,0,0,0,0,1,1,0,0,1,2}; gunichar2* res; glong items_read, items_written, current_write_index; @@ -337,8 +355,8 @@ test_ucs4_to_utf16 () items_read = items_written = 0; res = g_ucs4_to_utf16 (str2, 2, &items_read, &items_written, &err); check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE); - if (check_result) return check_result; g_free (res); + if (check_result) return check_result; items_read = items_written = 0; err = 0; @@ -367,6 +385,13 @@ test_ucs4_to_utf16 () current_write_index += items_written; } + items_read = items_written = 0; + err = 0; + res = g_ucs4_to_utf16 (str6, 1, &items_read, &items_written, &err); + check_result = ucs4_to_utf16_check_result (res, exp6, items_read, 1, items_written, 2, err, FALSE); + if (check_result) return check_result; + g_free (res); + return OK; } @@ -411,6 +436,8 @@ test_utf16_to_ucs4 () static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF, 0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'}; static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'}; + static gunichar2 str5[3] = {0xD801, 0xDC00, 0}; + static gunichar exp5[2] = {0x10400, 0}; static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0}; gunichar* res; glong items_read, items_written, current_read_index,current_write_index; @@ -481,6 +508,13 @@ test_utf16_to_ucs4 () current_write_index += items_written; } + items_read = items_written = 0; + err = 0; + res = g_utf16_to_ucs4 (str5, 2, &items_read, &items_written, &err); + check_result = utf16_to_ucs4_check_result (res, exp5, items_read, 2, items_written, 1, err, FALSE); + if (check_result) return check_result; + g_free (res); + return OK; } RESULT @@ -636,6 +670,101 @@ test_utf8_validate() return OK; } +glong +utf8_byteslen (const gchar *src) +{ + int i = 0; + do { + if (src [i] == '\0') + return i; + i++; + } while (TRUE); +} + +RESULT +test_utf8_strcase_each (const gchar *src, const gchar *expected, gboolean strup) +{ + gchar *tmp; + glong len, len2; + RESULT r; + + len = utf8_byteslen (src); + tmp = strup ? g_utf8_strup (src, len) : g_utf8_strdown (src, len); + len2 = utf8_byteslen (tmp); + r = compare_strings_utf8_RESULT (expected, tmp, len < len2 ? len2 : len); + g_free (tmp); + return r; +} + +RESULT +test_utf8_strup_each (const gchar *src, const gchar *expected) +{ + return test_utf8_strcase_each (src, expected, TRUE); +} + +RESULT +test_utf8_strdown_each (const gchar *src, const gchar *expected) +{ + return test_utf8_strcase_each (src, expected, FALSE); +} + +/* + * g_utf8_strup + */ +RESULT +test_utf8_strup () +{ + RESULT r; + + if ((r = test_utf8_strup_each ("aBc", "ABC")) != OK) + return r; + if ((r = test_utf8_strup_each ("x86-64", "X86-64")) != OK) + return r; + // U+3B1 U+392 -> U+391 U+392 + if ((r = test_utf8_strup_each ("\xCE\xB1\xCE\x92", "\xCE\x91\xCE\x92")) != OK) + return r; + // U+FF21 -> U+FF21 + if ((r = test_utf8_strup_each ("\xEF\xBC\xA1", "\xEF\xBC\xA1")) != OK) + return r; + // U+FF41 -> U+FF21 + if ((r = test_utf8_strup_each ("\xEF\xBD\x81", "\xEF\xBC\xA1")) != OK) + return r; + // U+10428 -> U+10400 + if ((r = test_utf8_strup_each ("\xF0\x90\x90\xA8", "\xF0\x90\x90\x80")) != OK) + return r; + + return OK; +} + +/* + * g_utf8_strdown + */ +RESULT +test_utf8_strdown () +{ + RESULT r; + + if ((r = test_utf8_strdown_each ("aBc", "abc")) != OK) + return r; + if ((r = test_utf8_strdown_each ("X86-64", "x86-64")) != OK) + return r; + // U+391 U+3B2 -> U+3B1 U+3B2 + if ((r = test_utf8_strdown_each ("\xCE\x91\xCE\xB2", "\xCE\xB1\xCE\xB2")) != OK) + return r; +/* + // U+FF41 -> U+FF41 + if ((r = test_utf8_strdown_each ("\xEF\xBC\x81", "\xEF\xBC\x81")) != OK) + return r; + // U+FF21 -> U+FF41 + if ((r = test_utf8_strdown_each ("\xEF\xBC\xA1", "\xEF\xBD\x81")) != OK) + return r; + // U+10400 -> U+10428 + if ((r = test_utf8_strdown_each ("\xF0\x90\x90\x80", "\xF0\x90\x90\xA8")) != OK) + return r; +*/ + return OK; +} + /* * test initialization */ @@ -652,6 +781,8 @@ static Test utf8_tests [] = { {"g_utf8_get_char", test_utf8_get_char }, {"g_utf8_next_char", test_utf8_next_char }, {"g_utf8_validate", test_utf8_validate }, + {"g_utf8_strup", test_utf8_strup}, + {"g_utf8_strdown", test_utf8_strdown}, {NULL, NULL} }; |