From 9c64d2a7ba6feb196099ee8b65bba163191008c0 Mon Sep 17 00:00:00 2001 From: Jeff Johnston Date: Mon, 9 Sep 2002 21:42:14 +0000 Subject: 2002-09-09 Jeff Johnston * libc/include/sys/_types.h (_mbstate_t): Changed to use unsigned char internally. * libc/sys/linux/sys/_types.h: Ditto. * libc/include/sys/reent.h * libc/stdlib/mblen.c (mblen): Use function-specific state value from default reentrancy structure. * libc/stdlib/mblen_r.c (_mblen_r): If return code from _mbtowc_r is less than 0, reset state __count value and return -1. * libc/stdlib/mbrlen.c (mbrlen): If the input state pointer is NULL, use the function-specific pointer provided in the default reentrancy structure. * libc/stdlib/mbrtowc.c: Add reentrant form of function. If input state pointer is NULL, use function-specific area provided in reentrancy structure. * libc/stdlib/mbsrtowcs.c: Ditto. * libc/stdlib/wcrtomb.c: Ditto. * libc/stdlib/wcsrtombs.c: Ditto. * libc/stdlib/mbstowcs.c: Reformat. * libc/stdlib/wcstombs.c: Ditto. * libc/stdlib/mbstowcs_r.c (_mbstowcs_r): If an error occurs, reset the state's __count value and return -1. * libc/stdlib/mbtowc.c: Ditto. * libc/stdlib/mbtowc_r.c (_mbtowc_r): Add restartable functionality. If number of bytes is used up before completing a valid multibyte character, return -2 and save the state. * libc/stdlib/wctomb_r.c (_wctomb_r): Define __state as __count and change some __count references to __state for clarity. --- newlib/libc/stdlib/mblen.c | 33 ++- newlib/libc/stdlib/mblen_r.c | 20 +- newlib/libc/stdlib/mbrlen.c | 10 +- newlib/libc/stdlib/mbrtowc.c | 38 ++- newlib/libc/stdlib/mbsrtowcs.c | 64 ++++- newlib/libc/stdlib/mbstowcs.c | 32 +-- newlib/libc/stdlib/mbstowcs_r.c | 5 +- newlib/libc/stdlib/mbtowc.c | 38 +-- newlib/libc/stdlib/mbtowc_r.c | 548 +++++++++++++++++++++++----------------- newlib/libc/stdlib/wcrtomb.c | 34 ++- newlib/libc/stdlib/wcsrtombs.c | 74 +++++- newlib/libc/stdlib/wcstombs.c | 30 +-- newlib/libc/stdlib/wctomb_r.c | 11 +- 13 files changed, 592 insertions(+), 345 deletions(-) (limited to 'newlib/libc/stdlib') diff --git a/newlib/libc/stdlib/mblen.c b/newlib/libc/stdlib/mblen.c index a0ed29ac1..b1310956f 100644 --- a/newlib/libc/stdlib/mblen.c +++ b/newlib/libc/stdlib/mblen.c @@ -52,21 +52,26 @@ _DEFUN (mblen, (s, n), size_t n) { #ifdef MB_CAPABLE - int retval = 0; - _REENT_CHECK_MISC(_REENT); - - retval = _mbtowc_r (_REENT, NULL, s, n, &(_REENT_MBLEN_STATE(_REENT))); - if (retval < 0) - return -1; - else - return retval; - + int retval = 0; + mbstate_t *state; + + _REENT_CHECK_MISC(_REENT); + state = &(_REENT_MBLEN_STATE(_REENT)); + retval = _mbtowc_r (_REENT, NULL, s, n, state); + if (retval < 0) + { + state->__count = 0; + return -1; + } + else + return retval; + #else /* not MB_CAPABLE */ - if (s == NULL || *s == '\0') - return 0; - if (n == 0) - return -1; - return 1; + if (s == NULL || *s == '\0') + return 0; + if (n == 0) + return -1; + return 1; #endif /* not MB_CAPABLE */ } diff --git a/newlib/libc/stdlib/mblen_r.c b/newlib/libc/stdlib/mblen_r.c index cff739524..1d6659743 100644 --- a/newlib/libc/stdlib/mblen_r.c +++ b/newlib/libc/stdlib/mblen_r.c @@ -54,14 +54,22 @@ _DEFUN (_mblen_r, (r, s, n, state), mbstate_t *state) { #ifdef MB_CAPABLE + int retval; + retval = _mbtowc_r (r, NULL, s, n, state); - return _mbtowc_r (r, NULL, s, n, state); + if (retval < 0) + { + state->__count = 0; + return -1; + } + + return retval; #else /* not MB_CAPABLE */ - if (s == NULL || *s == '\0') - return 0; - if (n == 0) - return -1; - return 1; + if (s == NULL || *s == '\0') + return 0; + if (n == 0) + return -1; + return 1; #endif /* not MB_CAPABLE */ } diff --git a/newlib/libc/stdlib/mbrlen.c b/newlib/libc/stdlib/mbrlen.c index 8f0c648b9..504348d38 100644 --- a/newlib/libc/stdlib/mbrlen.c +++ b/newlib/libc/stdlib/mbrlen.c @@ -7,7 +7,13 @@ size_t mbrlen(const char *s, size_t n, mbstate_t *ps) { - mbstate_t internal; +#ifdef MB_CAPABLE + if (ps == NULL) + { + _REENT_CHECK_MISC(_REENT); + ps = &(_REENT_MBRLEN_STATE(_REENT)); + } +#endif - return mbrtowc(NULL, s, n, ps != NULL ? ps : &internal); + return mbrtowc(NULL, s, n, ps); } diff --git a/newlib/libc/stdlib/mbrtowc.c b/newlib/libc/stdlib/mbrtowc.c index 7934ca117..0db1208d0 100644 --- a/newlib/libc/stdlib/mbrtowc.c +++ b/newlib/libc/stdlib/mbrtowc.c @@ -6,24 +6,46 @@ #include size_t -mbrtowc(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +_DEFUN (_mbrtowc_r, (ptr, pwc, s, n, ps), + struct _reent *ptr _AND + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + mbstate_t *ps) { int retval = 0; - _REENT_CHECK_MISC(_REENT); + +#ifdef MB_CAPABLE + if (ps == NULL) + { + _REENT_CHECK_MISC(ptr); + ps = &(_REENT_MBRTOWC_STATE(ptr)); + } +#endif if (s == NULL) - retval = _mbtowc_r (_REENT, pwc, "", 1, ps); + retval = _mbtowc_r (ptr, pwc, "", 1, ps); else - retval = _mbtowc_r (_REENT, pwc, s, n, ps); - - if (*pwc == NULL) - memset (ps, '\0', sizeof (mbstate_t)); + retval = _mbtowc_r (ptr, pwc, s, n, ps); if (retval == -1) { - _REENT->_errno = EILSEQ; + ps->__count = 0; + ptr->_errno = EILSEQ; return (size_t)(-1); } else return (size_t)retval; } + +#ifndef _REENT_ONLY +size_t +_DEFUN (mbrtowc, (pwc, s, n, ps), + wchar_t *pwc _AND + const char *s _AND + size_t n _AND + mbstate_t *ps) +{ + return _mbrtowc_r (_REENT, pwc, s, n, ps); +} +#endif /* !_REENT_ONLY */ diff --git a/newlib/libc/stdlib/mbsrtowcs.c b/newlib/libc/stdlib/mbsrtowcs.c index 256ab0d7b..08d9d0c18 100644 --- a/newlib/libc/stdlib/mbsrtowcs.c +++ b/newlib/libc/stdlib/mbsrtowcs.c @@ -5,20 +5,62 @@ #include size_t -mbsrtowcs(wchar_t *dst, const char **src, size_t len, mbstate_t *ps) +_DEFUN (_mbsrtowcs_r, (r, dst, src, n, ps), + struct _reent *r _AND + wchar_t *dst _AND + const char **src _AND + size_t n _AND + mbstate_t *ps) { - int retval = 0; - mbstate_t internal; + wchar_t *ptr = dst; + size_t max = n; + int bytes; - _REENT_CHECK_MISC(_REENT); - - retval = _mbstowcs_r (_REENT, dst, *src, len, ps != NULL ? ps : &internal); +#ifdef MB_CAPABLE + if (ps == NULL) + { + _REENT_CHECK_MISC(r); + ps = &(_REENT_MBSRTOWCS_STATE(r)); + } +#endif - if (retval == -1) + while (n > 0) { - _REENT->_errno = EILSEQ; - return (size_t)(-1); + bytes = _mbtowc_r (r, ptr, *src, MB_CUR_MAX, ps); + if (bytes > 0) + { + *src += bytes; + ++ptr; + --n; + } + else if (bytes == -2) + { + *src += MB_CUR_MAX; + } + else if (bytes == 0) + { + *src = NULL; + return (size_t)(ptr - dst); + } + else + { + ps->__count = 0; + r->_errno = EILSEQ; + return (size_t)-1; + } } - else - return (size_t)retval; + + return (size_t)max; +} + +#ifndef _REENT_ONLY +size_t +_DEFUN (mbsrtowcs, (dst, src, len, ps), + wchar_t *dst _AND + const char **src _AND + size_t len _AND + mbstate_t *ps) +{ + return _mbsrtowcs_r (_REENT, dst, src, len, ps); } +#endif /* !_REENT_ONLY */ diff --git a/newlib/libc/stdlib/mbstowcs.c b/newlib/libc/stdlib/mbstowcs.c index eb0ccd75b..cb09f31d0 100644 --- a/newlib/libc/stdlib/mbstowcs.c +++ b/newlib/libc/stdlib/mbstowcs.c @@ -59,23 +59,23 @@ _DEFUN (mbstowcs, (pwcs, s, n), size_t n) { #ifdef MB_CAPABLE - mbstate_t state; - state.__count = 0; - - return _mbstowcs_r (_REENT, pwcs, s, n, &state); + mbstate_t state; + state.__count = 0; + + return _mbstowcs_r (_REENT, pwcs, s, n, &state); #else /* not MB_CAPABLE */ - - int count = 0; - - if (n != 0) { - do { - if ((*pwcs++ = (wchar_t) *s++) == 0) - break; - count++; - } while (--n != 0); - } - - return count; + + int count = 0; + + if (n != 0) { + do { + if ((*pwcs++ = (wchar_t) *s++) == 0) + break; + count++; + } while (--n != 0); + } + + return count; #endif /* not MB_CAPABLE */ } diff --git a/newlib/libc/stdlib/mbstowcs_r.c b/newlib/libc/stdlib/mbstowcs_r.c index 89cda0371..c6130b2bd 100644 --- a/newlib/libc/stdlib/mbstowcs_r.c +++ b/newlib/libc/stdlib/mbstowcs_r.c @@ -18,7 +18,10 @@ _DEFUN (_mbstowcs_r, (reent, pwcs, s, n, state), { bytes = _mbtowc_r (r, ptr, t, MB_CUR_MAX, state); if (bytes < 0) - return -1; + { + state->__count = 0; + return -1; + } else if (bytes == 0) return ptr - pwcs; t += bytes; diff --git a/newlib/libc/stdlib/mbtowc.c b/newlib/libc/stdlib/mbtowc.c index 6da735aeb..e1e725dbf 100644 --- a/newlib/libc/stdlib/mbtowc.c +++ b/newlib/libc/stdlib/mbtowc.c @@ -52,6 +52,7 @@ effects vary with the locale. #ifndef _REENT_ONLY #include +#include int _DEFUN (mbtowc, (pwc, s, n), @@ -60,23 +61,28 @@ _DEFUN (mbtowc, (pwc, s, n), size_t n) { #ifdef MB_CAPABLE - int retval = 0; - _REENT_CHECK_MISC(_REENT); - - retval = _mbtowc_r (_REENT, pwc, s, n, &(_REENT_MBTOWC_STATE(_REENT))); - - if (retval < 0) - return -1; - else - return retval; + int retval = 0; + mbstate_t *ps; + + _REENT_CHECK_MISC(_REENT); + ps = &(_REENT_MBTOWC_STATE(_REENT)); + + retval = _mbtowc_r (_REENT, pwc, s, n, ps); + + if (retval < 0) + { + ps->__count = 0; + return -1; + } + return retval; #else /* not MB_CAPABLE */ - if (s == NULL) - return 0; - if (n == 0) - return -1; - if (pwc) - *pwc = (wchar_t) *s; - return (*s != '\0'); + if (s == NULL) + return 0; + if (n == 0) + return -1; + if (pwc) + *pwc = (wchar_t) *s; + return (*s != '\0'); #endif /* not MB_CAPABLE */ } diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c index b7909d318..26141b4c2 100644 --- a/newlib/libc/stdlib/mbtowc_r.c +++ b/newlib/libc/stdlib/mbtowc_r.c @@ -7,9 +7,9 @@ #ifdef MB_CAPABLE typedef enum { ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE; -typedef enum { ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR, - J2_ESC, J2_ESC_BR, DONE, INV, JIS_S_NUM } JIS_STATE; -typedef enum { COPY_A, COPY_J, COPY_J2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR } JIS_ACTION; +typedef enum { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR, + INV, JIS_S_NUM } JIS_STATE; +typedef enum { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION; /************************************************************************************** * state/action tables for processing JIS encoding @@ -20,33 +20,30 @@ typedef enum { COPY_A, COPY_J, COPY_J2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR } JIS static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = { /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */ -/* ASCII */ { A_ESC, DONE, DONE, DONE, DONE, DONE, DONE, DONE, DONE }, -/* A_ESC */ { DONE, A_ESC_DL, DONE, DONE, DONE, DONE, DONE, DONE, DONE }, -/* A_ESC_DL */{ DONE, DONE, DONE, JIS, JIS, DONE, DONE, DONE, DONE }, +/* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII }, /* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV }, -/* JIS_1 */ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2, INV }, -/* JIS_2 */ { J2_ESC, DONE, DONE, DONE, DONE, DONE, INV, DONE, DONE }, +/* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII }, +/* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII }, +/* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV }, /* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV }, /* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV }, -/* J2_ESC */ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV }, -/* J2_ESC_BR*/{ INV, INV, INV, INV, DONE, DONE, INV, INV, INV }, }; static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = { /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */ /* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A}, +/* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR }, /* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A}, -/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, MAKE_J, MAKE_J, COPY_A, COPY_A, COPY_A, COPY_A}, -/* JIS */ { NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR }, -/* JIS_1 */ { ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR }, -/* JIS_2 */ { NOOP, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, COPY_J2}, +/* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A}, +/* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR }, /* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR }, -/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR }, -/* J2_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR }, -/* J2_ESC_BR*/{ ERROR, ERROR, ERROR, ERROR, COPY_J, COPY_J, ERROR, ERROR, ERROR }, +/* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR }, }; #endif /* MB_CAPABLE */ +/* we override the mbstate_t __count field for more complex encodings and use it store a state value */ +#define __state __count + int _DEFUN (_mbtowc_r, (r, pwc, s, n, state), struct _reent *r _AND @@ -70,230 +67,305 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state), { /* fall-through */ } else if (!strcmp (r->_current_locale, "C-UTF-8")) { - wchar_t char1 = 0; + int ch; + int i = 0; if (s == NULL) return 0; /* UTF-8 character encodings are not state-dependent */ - /* we know n >= 1 if we get here */ - *pwc = 0; - char1 = (wchar_t)*t; - - if (char1 == '\0') - return 0; /* s points to the null character */ - - if (char1 >= 0x0 && char1 <= 0x7f) - { - /* single-byte sequence */ - *pwc = char1; - return 1; - } - else if (char1 >= 0xc0 && char1 <= 0xdf) - { - /* two-byte sequence */ - if (n >= 2) - { - wchar_t char2 = (wchar_t)*(t+1); - - if (char2 < 0x80 || char2 > 0xbf) - return -1; - - if (char1 < 0xc2) - /* overlong UTF-8 sequence */ - return -1; - - *pwc = ((char1 & 0x1f) << 6) - | (char2 & 0x3f); - return 2; - } - else - return -1; - } - else if (char1 >= 0xe0 && char1 <= 0xef) - { - /* three-byte sequence */ - if (n >= 3) - { - wchar_t char2 = (wchar_t)*(t+1); - wchar_t char3 = (wchar_t)*(t+2); - - if (char2 < 0x80 || char2 > 0xbf) - return -1; - if (char3 < 0x80 || char3 > 0xbf) - return -1; - - if (char1 == 0xe0) - { - if (char2 < 0xa0) - /* overlong UTF-8 sequence */ - return -1; - } - - *pwc = ((char1 & 0x0f) << 12) - | ((char2 & 0x3f) << 6) - | (char3 & 0x3f); - - if (*pwc >= 0xd800 && *pwc <= 0xdfff) - { - return -1; - } - else - return 3; - } - else - return -2; - } - else if (char1 >= 0xf0 && char1 <= 0xf7) - { - /* four-byte sequence */ - if (n >= 4) - { - wchar_t char2 = (wchar_t)*(t+1); - wchar_t char3 = (wchar_t)*(t+2); - wchar_t char4 = (wchar_t)*(t+3); - - if (char2 < 0x80 || char2 > 0xbf) - return -1; - if (char3 < 0x80 || char3 > 0xbf) - return -1; - if (char4 < 0x80 || char4 > 0xbf) - return -1; - - if (char1 == 0xf0) - { - if (char2 < 0x90) - /* overlong UTF-8 sequence */ - return -1; - } - - *pwc = ((char1 & 0x07) << 18) - | ((char2 & 0x3f) << 12) - | ((char3 & 0x3f) << 6) - | (char4 & 0x3f); - - return 4; - } - else - return -2; - } - else if (char1 >= 0xf8 && char1 <= 0xfb) - { - /* five-byte sequence */ - if (n >= 5) - { - wchar_t char2 = (wchar_t)*(t+1); - wchar_t char3 = (wchar_t)*(t+2); - wchar_t char4 = (wchar_t)*(t+3); - wchar_t char5 = (wchar_t)*(t+4); - - if (char2 < 0x80 || char2 > 0xbf) - return -1; - if (char3 < 0x80 || char3 > 0xbf) - return -1; - if (char4 < 0x80 || char4 > 0xbf) - return -1; - if (char5 < 0x80 || char5 > 0xbf) - return -1; - - if (char1 == 0xf8) - { - if (char2 < 0x88) - /* overlong UTF-8 sequence */ - return -1; - } - - *pwc = ((char1 & 0x03) << 24) - | ((char2 & 0x3f) << 18) - | ((char3 & 0x3f) << 12) - | ((char4 & 0x3f) << 6) - | (char5 & 0x3f); - return 5; - } - else - return -2; - } - else if (char1 >= 0xfc && char1 <= 0xfd) + if (state->__count == 0) + ch = t[i++]; + else + { + ++n; + ch = state->__value.__wchb[0]; + } + + if (ch == '\0') + { + *pwc = 0; + state->__count = 0; + return 0; /* s points to the null character */ + } + + if (ch >= 0x0 && ch <= 0x7f) + { + /* single-byte sequence */ + state->__count = 0; + *pwc = ch; + return 1; + } + else if (ch >= 0xc0 && ch <= 0xdf) + { + /* two-byte sequence */ + state->__value.__wchb[0] = ch; + state->__count = 1; + if (n < 2) + return -2; + ch = t[i++]; + if (ch < 0x80 || ch > 0xbf) + return -1; + if (state->__value.__wchb[0] < 0xc2) + /* overlong UTF-8 sequence */ + return -1; + state->__count = 0; + *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6) + | (wchar_t)(ch & 0x3f); + return i; + } + else if (ch >= 0xe0 && ch <= 0xef) + { + /* three-byte sequence */ + wchar_t tmp; + state->__value.__wchb[0] = ch; + if (state->__count == 0) + state->__count = 1; + else + ++n; + if (n < 2) + return -2; + ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; + if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0) + /* overlong UTF-8 sequence */ + return -1; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__value.__wchb[1] = ch; + state->__count = 2; + if (n < 3) + return -2; + ch = t[i++]; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__count = 0; + tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12) + | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6) + | (wchar_t)(ch & 0x3f); + + if (tmp >= 0xd800 && tmp <= 0xdfff) + return -1; + *pwc = tmp; + return i; + } + else if (ch >= 0xf0 && ch <= 0xf7) + { + /* four-byte sequence */ + if (sizeof(wchar_t) < 4) + return -1; /* we can't store such a value */ + state->__value.__wchb[0] = ch; + if (state->__count == 0) + state->__count = 1; + else + ++n; + if (n < 2) + return -2; + ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; + if (state->__value.__wchb[0] == 0xf0 && ch < 0x90) + /* overlong UTF-8 sequence */ + return -1; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__value.__wchb[1] = ch; + if (state->__count == 1) + state->__count = 2; + else + ++n; + if (n < 3) + return -2; + ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2]; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__value.__wchb[2] = ch; + state->__count = 3; + if (n < 4) + return -2; + ch = t[i++]; + if (ch < 0x80 || ch > 0xbf) + return -1; + *pwc = (wchar_t)((state->__value.__wchb[0] & 0x07) << 18) + | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 12) + | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 6) + | (wchar_t)(ch & 0x3f); + + state->__count = 0; + return i; + } + else if (ch >= 0xf8 && ch <= 0xfb) + { + /* five-byte sequence */ + if (sizeof(wchar_t) < 4) + return -1; /* we can't store such a value */ + state->__value.__wchb[0] = ch; + if (state->__count == 0) + state->__count = 1; + else + ++n; + if (n < 2) + return -2; + ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; + if (state->__value.__wchb[0] == 0xf8 && ch < 0x88) + /* overlong UTF-8 sequence */ + return -1; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__value.__wchb[1] = ch; + if (state->__count == 1) + state->__count = 2; + else + ++n; + if (n < 3) + return -2; + ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2]; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__value.__wchb[2] = ch; + if (state->__count == 2) + state->__count = 3; + else + ++n; + if (n < 4) + return -2; + ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3]; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__value.__wchb[3] = ch; + state->__count = 4; + if (n < 5) + return -2; + ch = t[i++]; + *pwc = (wchar_t)((state->__value.__wchb[0] & 0x03) << 24) + | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 18) + | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 12) + | (wchar_t)((state->__value.__wchb[3] & 0x3f) << 6) + | (wchar_t)(ch & 0x3f); + + state->__count = 0; + return i; + } + else if (ch >= 0xfc && ch <= 0xfd) { /* six-byte sequence */ - if (n >= 6) - { - wchar_t char2 = (wchar_t)*(t+1); - wchar_t char3 = (wchar_t)*(t+2); - wchar_t char4 = (wchar_t)*(t+3); - wchar_t char5 = (wchar_t)*(t+4); - wchar_t char6 = (wchar_t)*(t+5); - - if (char2 < 0x80 || char2 > 0xbf) - return -1; - if (char3 < 0x80 || char3 > 0xbf) - return -1; - if (char4 < 0x80 || char4 > 0xbf) - return -1; - if (char5 < 0x80 || char5 > 0xbf) - return -1; - if (char6 < 0x80 || char6 > 0xbf) - return -1; - - if (char1 == 0xfc) - { - if (char2 < 0x84) - /* overlong UTF-8 sequence */ - return -1; - } - - *pwc = ((char1 & 0x01) << 30) - | ((char2 & 0x3f) << 24) - | ((char3 & 0x3f) << 18) - | ((char4 & 0x3f) << 12) - | ((char5 & 0x3f) << 6) - | (char6 & 0x3f); - return 6; - } - else - return -2; - } + int ch2; + if (sizeof(wchar_t) < 4) + return -1; /* we can't store such a value */ + state->__value.__wchb[0] = ch; + if (state->__count == 0) + state->__count = 1; + else + ++n; + if (n < 2) + return -2; + ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; + if (state->__value.__wchb[0] == 0xfc && ch < 0x84) + /* overlong UTF-8 sequence */ + return -1; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__value.__wchb[1] = ch; + if (state->__count == 1) + state->__count = 2; + else + ++n; + if (n < 3) + return -2; + ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2]; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__value.__wchb[2] = ch; + if (state->__count == 2) + state->__count = 3; + else + ++n; + if (n < 4) + return -2; + ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3]; + if (ch < 0x80 || ch > 0xbf) + return -1; + state->__value.__wchb[3] = ch; + if (state->__count == 3) + state->__count = 4; + else + ++n; + if (n < 5) + return -2; + if (n == 5) + return -1; /* at this point we can't save enough to restart */ + ch = t[i++]; + if (ch < 0x80 || ch > 0xbf) + return -1; + ch2 = t[i++]; + *pwc = (wchar_t)((state->__value.__wchb[0] & 0x01) << 30) + | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 24) + | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 18) + | (wchar_t)((state->__value.__wchb[3] & 0x3f) << 12) + | (wchar_t)((ch & 0x3f) << 6) + | (wchar_t)(ch2 & 0x3f); + + state->__count = 0; + return i; + } else - return -1; + return -1; } else if (!strcmp (r->_current_locale, "C-SJIS")) { - int char1; + int ch; + int i = 0; if (s == NULL) return 0; /* not state-dependent */ - char1 = *t; - if (_issjis1 (char1)) - { - int char2 = t[1]; - if (n <= 1) - return -2; - if (_issjis2 (char2)) - { - *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1)); - return 2; - } - else - return -1; - } + ch = t[i++]; + if (state->__count == 0) + { + if (_issjis1 (ch)) + { + state->__value.__wchb[0] = ch; + state->__count = 1; + if (n <= 1) + return -2; + ch = t[i++]; + } + } + if (state->__count == 1) + { + if (_issjis2 (ch)) + { + *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; + state->__count = 0; + return i; + } + else + return -1; + } } else if (!strcmp (r->_current_locale, "C-EUCJP")) { - int char1; + int ch; + int i = 0; if (s == NULL) return 0; /* not state-dependent */ - char1 = *t; - if (_iseucjp (char1)) - { - int char2 = t[1]; - if (n <= 1) - return -2; - if (_iseucjp (char2)) - { - *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1)); - return 2; - } - else - return -1; - } + ch = t[i++]; + if (state->__count == 0) + { + if (_iseucjp (ch)) + { + state->__value.__wchb[0] = ch; + state->__count = 1; + if (n <= 1) + return -2; + ch = t[i++]; + } + } + if (state->__count == 1) + { + if (_iseucjp (ch)) + { + *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; + state->__count = 0; + return i; + } + else + return -1; + } } else if (!strcmp (r->_current_locale, "C-JIS")) { @@ -301,15 +373,16 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state), JIS_ACTION action; JIS_CHAR_TYPE ch; unsigned char *ptr; - int i, curr_ch; + unsigned int i; + int curr_ch; if (s == NULL) { - state->__count = 0; + state->__state = ASCII; return 1; /* state-dependent */ } - curr_state = (state->__count == 0 ? ASCII : JIS); + curr_state = state->__state; ptr = t; for (i = 0; i < n; ++i) @@ -353,23 +426,21 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state), case NOOP: break; case EMPTY: - state->__count = 0; + state->__state = ASCII; *pwc = (wchar_t)0; - return i; + return 0; case COPY_A: - state->__count = 0; + state->__state = ASCII; *pwc = (wchar_t)*ptr; return (i + 1); - case COPY_J: - state->__count = 0; - *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1)); - return (i + 1); + case COPY_J1: + state->__value.__wchb[0] = t[i]; + break; case COPY_J2: - state->__count = 1; - *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1)); - return (ptr - t) + 2; + state->__state = JIS; + *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]); + return (i + 1); case MAKE_A: - case MAKE_J: ptr = (char *)(t + i + 1); break; case ERROR: @@ -379,6 +450,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state), } + state->__state = curr_state; return -2; /* n < bytes needed */ } #endif /* MB_CAPABLE */ diff --git a/newlib/libc/stdlib/wcrtomb.c b/newlib/libc/stdlib/wcrtomb.c index f68533cbd..0eb59ca94 100644 --- a/newlib/libc/stdlib/wcrtomb.c +++ b/newlib/libc/stdlib/wcrtomb.c @@ -5,21 +5,45 @@ #include size_t -wcrtomb(char *s, wchar_t wc, mbstate_t *ps) +_DEFUN (_wcrtomb_r, (ptr, s, wc, ps), + struct _reent *ptr _AND + char *s _AND + wchar_t wc _AND + mbstate_t *ps) { int retval = 0; - _REENT_CHECK_MISC(_REENT); + char buf[10]; + +#ifdef MB_CAPABLE + if (ps == NULL) + { + _REENT_CHECK_MISC(ptr); + ps = &(_REENT_WCRTOMB_STATE(ptr)); + } +#endif if (s == NULL) - retval = _wctomb_r (_REENT, "", wc, ps); + retval = _wctomb_r (ptr, buf, L'\0', ps); else - retval = _wctomb_r (_REENT, s, wc, ps); + retval = _wctomb_r (ptr, s, wc, ps); if (retval == -1) { - _REENT->_errno = EILSEQ; + ps->__count = 0; + ptr->_errno = EILSEQ; return (size_t)(-1); } else return (size_t)retval; } + +#ifndef _REENT_ONLY +size_t +_DEFUN (wcrtomb, (s, wc, ps), + char *s _AND + wchar_t wc _AND + mbstate_t *ps) +{ + return _wcrtomb_r (_REENT, s, wc, ps); +} +#endif /* !_REENT_ONLY */ diff --git a/newlib/libc/stdlib/wcsrtombs.c b/newlib/libc/stdlib/wcsrtombs.c index 22512c0b4..431347866 100644 --- a/newlib/libc/stdlib/wcsrtombs.c +++ b/newlib/libc/stdlib/wcsrtombs.c @@ -5,18 +5,74 @@ #include size_t -wcsrtombs (char *dst, const wchar_t **src, size_t len, mbstate_t *ps) +_DEFUN (_wcsrtombs_r, (r, dst, src, len, ps), + struct _reent *r _AND + char *dst _AND + const wchar_t **src _AND + size_t len _AND + mbstate_t *ps) { - int retval = 0; - _REENT_CHECK_MISC(_REENT); + char *ptr = dst; + char buff[10]; + int i, n; + int count; + wint_t wch; - retval = _wcstombs_r (_REENT, dst, *src, len, ps); +#ifdef MB_CAPABLE + if (ps == NULL) + { + _REENT_CHECK_MISC(r); + ps = &(_REENT_WCSRTOMBS_STATE(r)); + } +#endif - if (retval == -1) + n = (int)len; + + while (n > 0) { - _REENT->_errno = EILSEQ; - return (size_t)(-1); + wchar_t *pwcs = (wchar_t *)(*src); + int count = ps->__count; + wint_t wch = ps->__value.__wch; + int bytes = _wctomb_r (r, buff, *pwcs, ps); + if (bytes == -1) + { + r->_errno = EILSEQ; + ps->__count = 0; + return (size_t)-1; + } + if (bytes <= n) + { + for (i = 0; i < bytes; ++i) + *ptr++ = buff[i]; + + if (*pwcs == 0x00) + { + *src = NULL; + ps->__count = 0; + return (size_t)(ptr - dst - 1); + } + ++(*src); + } + else + { + /* not enough room, we must back up state to before _wctomb_r call */ + ps->__count = count; + ps->__value.__wch = wch; + } + n -= bytes; } - else - return (size_t)retval; + + return (size_t)(ptr - dst); +} + +#ifndef _REENT_ONLY +size_t +_DEFUN (wcsrtombs, (dst, src, len, ps), + char *dst _AND + const wchar_t **src _AND + size_t len _AND + mbstate_t *ps) +{ + return _wcsrtombs_r (_REENT, dst, src, len, ps); } +#endif /* !_REENT_ONLY */ diff --git a/newlib/libc/stdlib/wcstombs.c b/newlib/libc/stdlib/wcstombs.c index c984746c2..f02d4ab1f 100644 --- a/newlib/libc/stdlib/wcstombs.c +++ b/newlib/libc/stdlib/wcstombs.c @@ -60,22 +60,22 @@ _DEFUN (wcstombs, (s, pwcs, n), size_t n) { #ifdef MB_CAPABLE - mbstate_t state; - state.__count = 0; - - return _wcstombs_r (_REENT, s, pwcs, n, &state); + mbstate_t state; + state.__count = 0; + + return _wcstombs_r (_REENT, s, pwcs, n, &state); #else /* not MB_CAPABLE */ - int count = 0; - - if (n != 0) { - do { - if ((*s++ = (char) *pwcs++) == 0) - break; - count++; - } while (--n != 0); - } - - return count; + int count = 0; + + if (n != 0) { + do { + if ((*s++ = (char) *pwcs++) == 0) + break; + count++; + } while (--n != 0); + } + + return count; #endif /* not MB_CAPABLE */ } diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c index 7cd84b6fa..4bcabee5d 100644 --- a/newlib/libc/stdlib/wctomb_r.c +++ b/newlib/libc/stdlib/wctomb_r.c @@ -4,6 +4,9 @@ #include #include "mbctype.h" +/* for some conversions, we use the __count field as a place to store a state value */ +#define __state __count + int _DEFUN (_wctomb_r, (r, s, wchar, state), struct _reent *r _AND @@ -126,10 +129,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state), /* first byte is non-zero..validate multi-byte char */ if (_isjis (char1) && _isjis (char2)) { - if (state->__count == 0) + if (state->__state == 0) { /* must switch from ASCII to JIS state */ - state->__count = 1; + state->__state = 1; *s++ = ESC_CHAR; *s++ = '$'; *s++ = 'B'; @@ -144,10 +147,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state), } else { - if (state->__count != 0) + if (state->__state != 0) { /* must switch from JIS to ASCII state */ - state->__count = 0; + state->__state = 0; *s++ = ESC_CHAR; *s++ = '('; *s++ = 'B'; -- cgit v1.2.3