diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2023-07-29 00:33:52 +0300 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2023-07-31 23:39:09 +0300 |
commit | 68bb3deabd0381465e18d41ae7f722cfe95ba3c8 (patch) | |
tree | 2c064cb6b0acaef032d460b3c48fd3393c02289d /winsup/cygwin/strfuncs.cc | |
parent | 8a4318943875cd922601d34e54ce8a83ad2e733c (diff) |
Cygwin: fix GB18030 support
The changes to support GB18030 were insufficient and the underlying
Windows conversion functions just failed. Fix how the Windows functions
are called for GB18030.
Fixes: 5da71b605995 ("Cygwin: add support for GB18030 codeset")
Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
Diffstat (limited to 'winsup/cygwin/strfuncs.cc')
-rw-r--r-- | winsup/cygwin/strfuncs.cc | 150 |
1 files changed, 148 insertions, 2 deletions
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 76b7216cc..43ce93ed2 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -331,7 +331,55 @@ __gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) extern "C" int __gb18030_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { - return __db_wctomb (r,s, wchar, 54936); + int ret; + wchar_t wres[2]; + + if (s == NULL) + return 0; + + if (state->__count == 0) + { + if (wchar <= 0x7f) + { + *s = (char) wchar; + return 1; + } + + if (wchar >= 0xd800 && wchar <= 0xdbff) + { + /* First half of a surrogate pair */ + state->__count = 18030; + state->__value.__wch = wchar; + return 0; + } + ret = WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, &wchar, 1, s, + 4, NULL, NULL); + if (ret > 0) + return ret; + goto ilseq; + } + else if (state->__count == 18030 && state->__value.__wch >= 0xd800 + && state->__value.__wch <= 0xdbff) + { + if (wchar >= 0xdc00 && wchar <= 0xdfff) + { + /* Create multibyte sequence from full surrogate pair. */ + wres[0] = state->__value.__wch; + wres[1] = wchar; + ret = WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, wres, 2, s, 4, + NULL, NULL); + if (ret > 0) + { + state->__count = 0; + return ret; + } + } +ilseq: + _REENT_ERRNO(r) = EILSEQ; + return -1; + } + _REENT_ERRNO(r) = EINVAL; + return -1; } extern "C" int @@ -495,7 +543,105 @@ extern "C" int __gb18030_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, mbstate_t *state) { - return __db_mbtowc (r, pwc, s, n, 54936, state); + wchar_t wres[2], dummy; + unsigned char ch; + int ret, len, ocount; + size_t ncopy; + + if (state->__count < 0 || (state->__count > (int) sizeof state->__value.__wchb + && state->__count != 18030)) + { + errno = EINVAL; + return -1; + } + + if (s == NULL) + { + s = ""; + n = 1; + pwc = NULL; + } + + if (state->__count == 18030) + { + /* Return second half of the surrogate pair */ + *pwc = state->__value.__wch; + state->__count = 0; + return 1; + } + + ncopy = MIN (MIN (n, MB_CUR_MAX), + sizeof state->__value.__wchb - state->__count); + memcpy (state->__value.__wchb + state->__count, s, ncopy); + ocount = state->__count; + state->__count += ncopy; + s = (char *) state->__value.__wchb; + n = state->__count; + + if (n == 0) /* Incomplete multibyte sequence */ + return -2; + + if (!pwc) + pwc = &dummy; + + /* Check if input is a valid GB18030 char (per FreeBSD): + * Single byte: [00-7f] + * Two byte: [81-fe][40-7e,80-fe] + * Four byte: [81-fe][30-39][81-fe][30-39] + */ + ch = *(unsigned char *) s; + if (ch <= 0x7f) + { + *pwc = ch; + state->__count = 0; + return ch ? 1 : 0; + } + if (ch >= 0x81 && ch <= 0xfe) + { + if (n < 2) + return -2; + ch = (unsigned char) s[1]; + if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) + len = 2; + else if (ch >= 0x30 && ch <= 0x39) + { + if (n < 3) + return -2; + ch = (unsigned char) s[2]; + if (ch < 0x81 || ch > 0xfe) + goto ilseq; + if (n < 4) + return -2; + ch = (unsigned char) s[3]; + if (ch < 0x30 || ch > 0x39) + goto ilseq; + len = 4; + } + else + goto ilseq; + } + else + goto ilseq; + ret = MultiByteToWideChar (54936, MB_ERR_INVALID_CHARS, s, len, wres, 2); + if (ret) + { + *pwc = wres[0]; + if (ret == 2) + { + /* Surrogate pair. Store second half for later and return + first half. Return real count - 1, return 1 when the second + half of the pair is returned in the next run. */ + state->__count = 18030; + state->__value.__wch = wres[1]; + --len; + } + else + state->__count = 0; + return len - ocount; + } +ilseq: + _REENT_ERRNO(r) = EILSEQ; + return -1; } extern "C" int |