Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'newlib/libc/stdlib/mbtowc_r.c')
-rw-r--r--newlib/libc/stdlib/mbtowc_r.c411
1 files changed, 74 insertions, 337 deletions
diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c
index 7f0dd11aa..4bf302359 100644
--- a/newlib/libc/stdlib/mbtowc_r.c
+++ b/newlib/libc/stdlib/mbtowc_r.c
@@ -1,16 +1,13 @@
-#include <newlib.h>
#include <stdlib.h>
#include <locale.h>
#include "mbctype.h"
-#include <wchar.h>
-#include <string.h>
-#ifdef _MB_CAPABLE
+#ifdef MB_CAPABLE
typedef enum { ESCAPE, DOLLAR, BRACKET, AT, B, J,
NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
-typedef enum { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
- INV, JIS_S_NUM } JIS_STATE;
-typedef enum { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
+typedef enum { ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
+ J2_ESC, J2_ESC_BR, DONE, INV, JIS_S_NUM } JIS_STATE;
+typedef enum { COPY_A, COPY_J, COPY_J2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR } JIS_ACTION;
/**************************************************************************************
* state/action tables for processing JIS encoding
@@ -21,31 +18,32 @@ typedef enum { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
-/* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
+/* ASCII */ { A_ESC, DONE, DONE, DONE, DONE, DONE, DONE, DONE, DONE },
+/* A_ESC */ { DONE, A_ESC_DL, DONE, DONE, DONE, DONE, DONE, DONE, DONE },
+/* A_ESC_DL */{ DONE, DONE, DONE, JIS, JIS, DONE, DONE, DONE, DONE },
/* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV },
-/* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII },
-/* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII },
-/* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV },
+/* JIS_1 */ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2, INV },
+/* JIS_2 */ { J2_ESC, DONE, DONE, DONE, DONE, DONE, INV, DONE, DONE },
/* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
/* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
+/* J2_ESC */ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
+/* J2_ESC_BR*/{ INV, INV, INV, INV, DONE, DONE, INV, INV, INV },
};
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
/* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A},
-/* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR },
/* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A},
-/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A},
-/* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR },
+/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, MAKE_J, MAKE_J, COPY_A, COPY_A, COPY_A, COPY_A},
+/* JIS */ { NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
+/* JIS_1 */ { ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
+/* JIS_2 */ { NOOP, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, COPY_J2},
/* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
-/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR },
+/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
+/* J2_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
+/* J2_ESC_BR*/{ ERROR, ERROR, ERROR, ERROR, COPY_J, COPY_J, ERROR, ERROR, ERROR },
};
-#endif /* _MB_CAPABLE */
-
-/* we override the mbstate_t __count field for more complex encodings and use it store a state value */
-#define __state __count
-
-extern char __lc_ctype[12];
+#endif /* MB_CAPABLE */
int
_DEFUN (_mbtowc_r, (r, pwc, s, n, state),
@@ -53,7 +51,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
wchar_t *pwc _AND
const char *s _AND
size_t n _AND
- mbstate_t *state)
+ int *state)
{
wchar_t dummy;
unsigned char *t = (unsigned char *)s;
@@ -62,330 +60,67 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
pwc = &dummy;
if (s != NULL && n == 0)
- return -2;
+ return -1;
-#ifdef _MB_CAPABLE
- if (__lc_ctype == NULL ||
- (strlen (__lc_ctype) <= 1))
+#ifdef MB_CAPABLE
+ if (r->_current_locale == NULL ||
+ (strlen (r->_current_locale) <= 1))
{ /* fall-through */ }
- else if (!strcmp (__lc_ctype, "C-UTF-8"))
- {
- int ch;
- int i = 0;
-
- if (s == NULL)
- return 0; /* UTF-8 character encodings are not state-dependent */
-
- if (state->__count == 0)
- ch = t[i++];
- else
- {
- ++n;
- ch = state->__value.__wchb[0];
- }
-
- if (ch == '\0')
- {
- *pwc = 0;
- state->__count = 0;
- return 0; /* s points to the null character */
- }
-
- if (ch >= 0x0 && ch <= 0x7f)
- {
- /* single-byte sequence */
- state->__count = 0;
- *pwc = ch;
- return 1;
- }
- else if (ch >= 0xc0 && ch <= 0xdf)
- {
- /* two-byte sequence */
- state->__value.__wchb[0] = ch;
- state->__count = 1;
- if (n < 2)
- return -2;
- ch = t[i++];
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- if (state->__value.__wchb[0] < 0xc2)
- /* overlong UTF-8 sequence */
- return -1;
- state->__count = 0;
- *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
- | (wchar_t)(ch & 0x3f);
- return i;
- }
- else if (ch >= 0xe0 && ch <= 0xef)
- {
- /* three-byte sequence */
- wchar_t tmp;
- state->__value.__wchb[0] = ch;
- if (state->__count == 0)
- state->__count = 1;
- else
- ++n;
- if (n < 2)
- return -2;
- ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
- if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
- /* overlong UTF-8 sequence */
- return -1;
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__value.__wchb[1] = ch;
- state->__count = 2;
- if (n < 3)
- return -2;
- ch = t[i++];
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__count = 0;
- tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
- | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
- | (wchar_t)(ch & 0x3f);
-
- if (tmp >= 0xd800 && tmp <= 0xdfff)
- return -1;
- *pwc = tmp;
- return i;
- }
- else if (ch >= 0xf0 && ch <= 0xf7)
- {
- /* four-byte sequence */
- if (sizeof(wchar_t) < 4)
- return -1; /* we can't store such a value */
- state->__value.__wchb[0] = ch;
- if (state->__count == 0)
- state->__count = 1;
- else
- ++n;
- if (n < 2)
- return -2;
- ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
- if (state->__value.__wchb[0] == 0xf0 && ch < 0x90)
- /* overlong UTF-8 sequence */
- return -1;
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__value.__wchb[1] = ch;
- if (state->__count == 1)
- state->__count = 2;
- else
- ++n;
- if (n < 3)
- return -2;
- ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__value.__wchb[2] = ch;
- state->__count = 3;
- if (n < 4)
- return -2;
- ch = t[i++];
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- *pwc = (wchar_t)((state->__value.__wchb[0] & 0x07) << 18)
- | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 12)
- | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 6)
- | (wchar_t)(ch & 0x3f);
-
- state->__count = 0;
- return i;
- }
- else if (ch >= 0xf8 && ch <= 0xfb)
- {
- /* five-byte sequence */
- if (sizeof(wchar_t) < 4)
- return -1; /* we can't store such a value */
- state->__value.__wchb[0] = ch;
- if (state->__count == 0)
- state->__count = 1;
- else
- ++n;
- if (n < 2)
- return -2;
- ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
- if (state->__value.__wchb[0] == 0xf8 && ch < 0x88)
- /* overlong UTF-8 sequence */
- return -1;
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__value.__wchb[1] = ch;
- if (state->__count == 1)
- state->__count = 2;
- else
- ++n;
- if (n < 3)
- return -2;
- ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__value.__wchb[2] = ch;
- if (state->__count == 2)
- state->__count = 3;
- else
- ++n;
- if (n < 4)
- return -2;
- ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3];
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__value.__wchb[3] = ch;
- state->__count = 4;
- if (n < 5)
- return -2;
- ch = t[i++];
- *pwc = (wchar_t)((state->__value.__wchb[0] & 0x03) << 24)
- | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 18)
- | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 12)
- | (wchar_t)((state->__value.__wchb[3] & 0x3f) << 6)
- | (wchar_t)(ch & 0x3f);
-
- state->__count = 0;
- return i;
- }
- else if (ch >= 0xfc && ch <= 0xfd)
- {
- /* six-byte sequence */
- int ch2;
- if (sizeof(wchar_t) < 4)
- return -1; /* we can't store such a value */
- state->__value.__wchb[0] = ch;
- if (state->__count == 0)
- state->__count = 1;
- else
- ++n;
- if (n < 2)
- return -2;
- ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
- if (state->__value.__wchb[0] == 0xfc && ch < 0x84)
- /* overlong UTF-8 sequence */
- return -1;
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__value.__wchb[1] = ch;
- if (state->__count == 1)
- state->__count = 2;
- else
- ++n;
- if (n < 3)
- return -2;
- ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__value.__wchb[2] = ch;
- if (state->__count == 2)
- state->__count = 3;
- else
- ++n;
- if (n < 4)
- return -2;
- ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3];
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- state->__value.__wchb[3] = ch;
- if (state->__count == 3)
- state->__count = 4;
- else
- ++n;
- if (n < 5)
- return -2;
- if (n == 5)
- return -1; /* at this point we can't save enough to restart */
- ch = t[i++];
- if (ch < 0x80 || ch > 0xbf)
- return -1;
- ch2 = t[i++];
- *pwc = (wchar_t)((state->__value.__wchb[0] & 0x01) << 30)
- | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 24)
- | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 18)
- | (wchar_t)((state->__value.__wchb[3] & 0x3f) << 12)
- | (wchar_t)((ch & 0x3f) << 6)
- | (wchar_t)(ch2 & 0x3f);
-
- state->__count = 0;
- return i;
- }
- else
- return -1;
- }
- else if (!strcmp (__lc_ctype, "C-SJIS"))
+ else if (!strcmp (r->_current_locale, "C-SJIS"))
{
- int ch;
- int i = 0;
+ int char1;
if (s == NULL)
return 0; /* not state-dependent */
- ch = t[i++];
- if (state->__count == 0)
- {
- if (_issjis1 (ch))
- {
- state->__value.__wchb[0] = ch;
- state->__count = 1;
- if (n <= 1)
- return -2;
- ch = t[i++];
- }
- }
- if (state->__count == 1)
- {
- if (_issjis2 (ch))
- {
- *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
- state->__count = 0;
- return i;
- }
- else
- return -1;
- }
+ char1 = *t;
+ if (_issjis1 (char1))
+ {
+ int char2 = t[1];
+ if (n <= 1)
+ return -1;
+ if (_issjis2 (char2))
+ {
+ *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ return 2;
+ }
+ else
+ return -1;
+ }
}
- else if (!strcmp (__lc_ctype, "C-EUCJP"))
+ else if (!strcmp (r->_current_locale, "C-EUCJP"))
{
- int ch;
- int i = 0;
+ int char1;
if (s == NULL)
return 0; /* not state-dependent */
- ch = t[i++];
- if (state->__count == 0)
- {
- if (_iseucjp (ch))
- {
- state->__value.__wchb[0] = ch;
- state->__count = 1;
- if (n <= 1)
- return -2;
- ch = t[i++];
- }
- }
- if (state->__count == 1)
- {
- if (_iseucjp (ch))
- {
- *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
- state->__count = 0;
- return i;
- }
- else
- return -1;
- }
+ char1 = *t;
+ if (_iseucjp (char1))
+ {
+ int char2 = t[1];
+ if (n <= 1)
+ return -1;
+ if (_iseucjp (char2))
+ {
+ *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ return 2;
+ }
+ else
+ return -1;
+ }
}
- else if (!strcmp (__lc_ctype, "C-JIS"))
+ else if (!strcmp (r->_current_locale, "C-JIS"))
{
JIS_STATE curr_state;
JIS_ACTION action;
JIS_CHAR_TYPE ch;
unsigned char *ptr;
- unsigned int i;
- int curr_ch;
+ int i, curr_ch;
if (s == NULL)
{
- state->__state = ASCII;
+ *state = 0;
return 1; /* state-dependent */
}
- curr_state = state->__state;
+ curr_state = (*state == 0 ? ASCII : JIS);
ptr = t;
for (i = 0; i < n; ++i)
@@ -429,21 +164,23 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
case NOOP:
break;
case EMPTY:
- state->__state = ASCII;
+ *state = 0;
*pwc = (wchar_t)0;
- return 0;
+ return i;
case COPY_A:
- state->__state = ASCII;
+ *state = 0;
*pwc = (wchar_t)*ptr;
return (i + 1);
- case COPY_J1:
- state->__value.__wchb[0] = t[i];
- break;
- case COPY_J2:
- state->__state = JIS;
- *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
+ case COPY_J:
+ *state = 0;
+ *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
return (i + 1);
+ case COPY_J2:
+ *state = 1;
+ *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+ return (ptr - t) + 2;
case MAKE_A:
+ case MAKE_J:
ptr = (char *)(t + i + 1);
break;
case ERROR:
@@ -453,10 +190,9 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
}
- state->__state = curr_state;
- return -2; /* n < bytes needed */
+ return -1; /* n < bytes needed */
}
-#endif /* _MB_CAPABLE */
+#endif /* MB_CAPABLE */
/* otherwise this must be the "C" locale or unknown locale */
if (s == NULL)
@@ -469,3 +205,4 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
return 1;
}
+