Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/mono.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/eglib
diff options
context:
space:
mode:
authorAtsushi Eno <atsushieno@gmail.com>2006-10-31 07:01:04 +0300
committerAtsushi Eno <atsushieno@gmail.com>2006-10-31 07:01:04 +0300
commit0afb42d28e8817715b3eadeec95c41591a5dcd74 (patch)
tree91d1affe8919cc1134ebecd9f357a45dfae6bb83 /eglib
parent9388e8a295f8f86d35e31f824d95acb0fe17fd60 (diff)
2006-10-30 Atsushi Enomoto <atsushi@ximian.com>
* src/gutf8.c : several fixes: - fixed incorrect mb_size clear and mb_remain computation. - initialize items_written to 0 for error case. - in utf8_to_utf16_len differentiate error messages completely. - in g_utf8_to_utf16 use guchar instead of gchar. * test/utf8.c : added test case string in test_utf8_seq() to both test_utf8_to_utf16() and test_utf16_to_utf8(). svn path=/trunk/mono/; revision=67173
Diffstat (limited to 'eglib')
-rw-r--r--eglib/ChangeLog10
-rw-r--r--eglib/src/gutf8.c46
-rw-r--r--eglib/test/utf8.c30
3 files changed, 59 insertions, 27 deletions
diff --git a/eglib/ChangeLog b/eglib/ChangeLog
index 71bb3890f1c..5e246afbad7 100644
--- a/eglib/ChangeLog
+++ b/eglib/ChangeLog
@@ -1,3 +1,13 @@
+2006-10-30 Atsushi Enomoto <atsushi@ximian.com>
+
+ * src/gutf8.c : several fixes:
+ - fixed incorrect mb_size clear and mb_remain computation.
+ - initialize items_written to 0 for error case.
+ - in utf8_to_utf16_len differentiate error messages completely.
+ - in g_utf8_to_utf16 use guchar instead of gchar.
+ * test/utf8.c : added test case string in test_utf8_seq() to
+ both test_utf8_to_utf16() and test_utf16_to_utf8().
+
2006-10-21 Miguel de Icaza <miguel@novell.com>
* src/gunicode.c (g_filename_from_utf8): Use g_strlcpy here.
diff --git a/eglib/src/gutf8.c b/eglib/src/gutf8.c
index 6239178b04b..77f96bedbb6 100644
--- a/eglib/src/gutf8.c
+++ b/eglib/src/gutf8.c
@@ -29,7 +29,7 @@ g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_wr
the conversion core below simply resets erroreous bits */
glong utf16_len;
gunichar2 *ret;
- gchar ch, mb_size, mb_remain;
+ guchar ch, mb_size, mb_remain;
guint32 codepoint;
glong in_pos, out_pos;
@@ -42,6 +42,8 @@ g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_wr
if (error)
*error = NULL;
+ if (items_written)
+ *items_written = 0;
utf16_len = utf8_to_utf16_len (str, len, items_read, error);
if (error)
if (*error)
@@ -54,35 +56,37 @@ g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_wr
for (in_pos = 0; len < 0 ? str [in_pos] : in_pos < len; in_pos++) {
ch = (guchar) str [in_pos];
if (mb_size == 0) {
- if (0 < ch)
+ if (ch < 0x80)
ret [out_pos++] = ch;
else if ((ch & 0xE0) == 0xC0) {
codepoint = ch & 0x1F;
- mb_remain = mb_size = 2;
+ mb_size = 2;
} else if ((ch & 0xF0) == 0xE0) {
codepoint = ch & 0x0F;
- mb_remain = mb_size = 3;
+ mb_size = 3;
} else if ((ch & 0xF8) == 0xF0) {
codepoint = ch & 7;
- mb_remain = mb_size = 4;
+ mb_size = 4;
} else if ((ch & 0xFC) == 0xF8) {
codepoint = ch & 3;
- mb_remain = mb_size = 5;
+ mb_size = 5;
} else if ((ch & 0xFE) == 0xFC) {
codepoint = ch & 3;
- mb_remain = mb_size = 6;
+ mb_size = 6;
} else {
/* invalid utf-8 sequence */
codepoint = 0;
mb_remain = mb_size = 0;
}
+ if (mb_size > 1)
+ mb_remain = mb_size - 1;
} else {
if ((ch & 0xC0) == 0x80) {
codepoint = (codepoint << 6) | (ch & 0x3F);
if (--mb_remain == 0) {
/* multi byte character is fully consumed now. */
if (codepoint < 0x10000) {
- ret [out_pos++] = codepoint;
+ ret [out_pos++] = codepoint % 0x10000;
} else if (codepoint < 0x110000) {
/* surrogate pair */
codepoint -= 0x10000;
@@ -91,8 +95,9 @@ g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_wr
} else {
/* invalid utf-8 sequence (excess) */
codepoint = 0;
- mb_remain = mb_size = 0;
+ mb_remain = 0;
}
+ mb_size = 0;
}
} else {
/* invalid utf-8 sequence */
@@ -130,23 +135,23 @@ utf8_to_utf16_len (const gchar *str, glong len, glong *items_read, GError **erro
ret++;
else if ((ch & 0xE0) == 0xC0) {
codepoint = ch & 0x1F;
- mb_remain = mb_size = 2;
+ mb_size = 2;
} else if ((ch & 0xF0) == 0xE0) {
codepoint = ch & 0x0F;
- mb_remain = mb_size = 3;
+ mb_size = 3;
} else if ((ch & 0xF8) == 0xF0) {
codepoint = ch & 7;
- mb_remain = mb_size = 4;
+ mb_size = 4;
} else if ((ch & 0xFC) == 0xF8) {
codepoint = ch & 3;
- mb_remain = mb_size = 5;
+ mb_size = 5;
} else if ((ch & 0xFE) == 0xFC) {
codepoint = ch & 3;
- mb_remain = mb_size = 6;
+ mb_size = 6;
} else {
/* invalid utf-8 sequence */
if (error) {
- g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid utf-8 sequence at %d", in_pos);
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid utf-8 sequence at %d (illegal first byte)", in_pos);
if (items_read)
*items_read = in_pos;
return -1;
@@ -155,6 +160,8 @@ utf8_to_utf16_len (const gchar *str, glong len, glong *items_read, GError **erro
mb_remain = mb_size = 0;
}
}
+ if (mb_size > 1)
+ mb_remain = mb_size - 1;
} else {
if ((ch & 0xC0) == 0x80) {
codepoint = (codepoint << 6) | (ch & 0x3F);
@@ -187,7 +194,7 @@ utf8_to_utf16_len (const gchar *str, glong len, glong *items_read, GError **erro
return -1;
} else {
codepoint = 0;
- mb_remain = mb_size = 0;
+ mb_remain = 0;
overlong = FALSE;
}
}
@@ -205,14 +212,15 @@ utf8_to_utf16_len (const gchar *str, glong len, glong *items_read, GError **erro
return -1;
} else {
codepoint = 0;
- mb_remain = mb_size = 0;
+ mb_remain = 0;
}
}
+ mb_size = 0;
}
} else {
/* invalid utf-8 sequence */
if (error) {
- g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid utf-8 sequence at %d", in_pos);
+ g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, "invalid utf-8 sequence at %d (illegal following bytes)", in_pos);
if (items_read)
*items_read = in_pos;
return -1;
@@ -246,6 +254,8 @@ g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *item
out_pos = 0;
surrogate = FALSE;
+ if (items_written)
+ *items_written = 0;
utf8_len = utf16_to_utf8_len (str, len, items_read, error);
if (error)
if (*error)
diff --git a/eglib/test/utf8.c b/eglib/test/utf8.c
index eff6ef3d46d..5b4c2608634 100644
--- a/eglib/test/utf8.c
+++ b/eglib/test/utf8.c
@@ -82,11 +82,10 @@ compare_utf16_to_utf8 (const gchar *expected, const gunichar2 *utf16, glong len_
RESULT
test_utf16_to_utf8 ()
{
- const gchar *src0 = "", *src1 = "ABCDE";
- gunichar2 str0 [1], str1 [6];
+ const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
+ gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
RESULT result;
- str0 [0] = 0;
gchar_to_gunichar2 (str1, src1);
/* empty string */
@@ -97,6 +96,9 @@ test_utf16_to_utf8 ()
result = compare_utf16_to_utf8 (src1, str1, 5, 5);
if (result != OK)
return result;
+ result = compare_utf16_to_utf8 (src2, str2, 2, 4);
+ if (result != OK)
+ return result;
return OK;
}
@@ -174,28 +176,35 @@ compare_utf8_to_utf16 (const gunichar2 *expected, const gchar *utf8, glong len_i
RESULT
test_utf8_seq ()
{
- const gchar *src = "\345\271\264\47";
+ const gchar *src = "\xE5\xB9\xB4\x27";
glong in_read, out_read;
//gunichar2 expected [6];
GError *error = NULL;
+ gunichar2 *dst;
printf ("got: %s\n", src);
- g_utf8_to_utf16 (src, strlen (src), &in_read, &out_read, &error);
+ dst = g_utf8_to_utf16 (src, strlen (src), &in_read, &out_read, &error);
if (error != NULL){
return error->message;
}
-
+
+ if (in_read != 4) {
+ return FAILED ("in_read is expected to be 4 but was %d\n", in_read);
+ }
+ if (out_read != 2) {
+ return FAILED ("out_read is expected to be 2 but was %d\n", out_read);
+ }
+
return OK;
}
RESULT
test_utf8_to_utf16 ()
{
- const gchar *src0 = "", *src1 = "ABCDE";
- gunichar2 str0 [1], str1 [6];
+ const gchar *src0 = "", *src1 = "ABCDE", *src2 = "\xE5\xB9\xB4\x27";
+ gunichar2 str0 [] = {0}, str1 [6], str2 [] = {0x5E74, 39, 0};
RESULT result;
- str0 [0] = 0;
gchar_to_gunichar2 (str1, src1);
/* empty string */
@@ -206,6 +215,9 @@ test_utf8_to_utf16 ()
result = compare_utf8_to_utf16 (str1, src1, 5, 5);
if (result != OK)
return result;
+ result = compare_utf8_to_utf16 (str2, src2, 4, 2);
+ if (result != OK)
+ return result;
return OK;
}