Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2016-07-20 23:05:59 +0300
committerCorinna Vinschen <corinna@vinschen.de>2016-08-15 11:56:57 +0300
commitd16a56306d63b4d94412b479a8ea83463a3514ab (patch)
treeb37c2954976f314628637e660e039f1be4034b1c /winsup/cygwin/strfuncs.cc
parent88208d3735821df0f5a66c5f8781282a7f5bf284 (diff)
Consolidate wctomb/mbtowc calls for POSIX-1.2008
- Remove charset parameter from low level __foo_wctomb/__foo_mbtowc calls. - Instead, create array of function for ISO and Windows codepages to point to function which does not require to evaluate the charset string on each call. Create matching helper functions. I.e., __iso_wctomb, __iso_mbtowc, __cp_wctomb and __cp_mbtowc are functions returning the right function pointer now. - Create __WCTOMB/__MBTOWC macros utilizing per-reent locale and replace calls to __wctomb/__mbtowc with calls to __WCTOMB/__MBTOWC. - Drop global __wctomb/__mbtowc vars. - Utilize aforementioned changes in Cygwin to get rid of charset in other, calling functions and simplify the code. - In Cygwin restrict global cygheap locale info to the job performed by internal_setlocale. Use UTF-8 instead of ASCII on the fly in internal conversion functions. - In Cygwin dll_entry, make sure to initialize a TLS area with a NULL _REENT->_locale pointer. Add comment to explain why. Signed-off by: Corinna Vinschen <corinna@vinschen.de>
Diffstat (limited to 'winsup/cygwin/strfuncs.cc')
-rw-r--r--winsup/cygwin/strfuncs.cc60
1 files changed, 29 insertions, 31 deletions
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index 40f2c2945..c962f7cf8 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -140,15 +140,13 @@ __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
}
extern "C" int
-__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
- mbstate_t *state)
+__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
{
return __db_wctomb (r,s, wchar, 932);
}
extern "C" int
-__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
- mbstate_t *state)
+__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
{
/* Unfortunately, the Windows eucJP codepage 20932 is not really 100%
compatible to eucJP. It's a cute approximation which makes it a
@@ -192,22 +190,19 @@ __eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
}
extern "C" int
-__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
- mbstate_t *state)
+__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
{
return __db_wctomb (r,s, wchar, 936);
}
extern "C" int
-__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
- mbstate_t *state)
+__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
{
return __db_wctomb (r,s, wchar, 949);
}
extern "C" int
-__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
- mbstate_t *state)
+__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state)
{
return __db_wctomb (r,s, wchar, 950);
}
@@ -268,14 +263,14 @@ __db_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, UINT cp,
extern "C" int
__sjis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
- const char *charset, mbstate_t *state)
+ mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 932, state);
}
extern "C" int
__eucjp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
- const char *charset, mbstate_t *state)
+ mbstate_t *state)
{
/* See comment in __eucjp_wctomb above. */
wchar_t dummy;
@@ -352,21 +347,21 @@ jis_x_0212:
extern "C" int
__gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
- const char *charset, mbstate_t *state)
+ mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 936, state);
}
extern "C" int
__kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
- const char *charset, mbstate_t *state)
+ mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 949, state);
}
extern "C" int
__big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
- const char *charset, mbstate_t *state)
+ mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 950, state);
}
@@ -408,7 +403,7 @@ __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
*/
static size_t __reg3
sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
- bool is_path)
+ bool is_path)
{
char buf[10];
char *ptr = dst;
@@ -416,9 +411,10 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
size_t n = 0;
mbstate_t ps;
save_errno save;
- wctomb_p f_wctomb = cygheap->locale.wctomb;
- const char *charset = cygheap->locale.charset;
+ wctomb_p f_wctomb = __WCTOMB;
+ if (f_wctomb == __ascii_wctomb)
+ f_wctomb = __utf8_wctomb;
memset (&ps, 0, sizeof ps);
if (dst == NULL)
len = (size_t) -1;
@@ -441,13 +437,13 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
}
else
{
- bytes = f_wctomb (_REENT, buf, pw, charset, &ps);
- if (bytes == -1 && *charset != 'U'/*TF-8*/)
+ bytes = f_wctomb (_REENT, buf, pw, &ps);
+ if (bytes == -1 && f_wctomb != __utf8_wctomb)
{
/* Convert chars invalid in the current codepage to a sequence
ASCII CAN; UTF-8 representation of invalid char. */
buf[0] = 0x18; /* ASCII CAN */
- bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps);
+ bytes = __utf8_wctomb (_REENT, buf + 1, pw, &ps);
if (bytes == -1)
{
++pwcs;
@@ -465,8 +461,7 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
ps.__count = 0;
continue;
}
- bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, charset,
- &ps);
+ bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, &ps);
nwc--;
}
}
@@ -557,8 +552,8 @@ sys_wcstombs_alloc_no_path (char **dst_p, int type, const wchar_t *src,
charset, which is the charset returned by GetConsoleCP (). Most of the
time this is used for box and line drawing characters. */
size_t __reg3
-sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
- size_t dlen, const char *src, size_t nms)
+sys_cp_mbstowcs (mbtowc_p f_mbtowc, wchar_t *dst, size_t dlen,
+ const char *src, size_t nms)
{
wchar_t *ptr = dst;
unsigned const char *pmbs = (unsigned const char *) src;
@@ -581,10 +576,11 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
next byte must be a valid UTF-8 start byte. If the charset
isn't UTF-8 anyway, try to convert the following bytes as UTF-8
sequence. */
- if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4 && *charset != 'U'/*TF-8*/)
+ if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4
+ && f_mbtowc != __utf8_mbtowc)
{
bytes = __utf8_mbtowc (_REENT, ptr, (const char *) pmbs + 1,
- nms - 1, charset, &ps);
+ nms - 1, &ps);
if (bytes < 0)
{
/* Invalid UTF-8 sequence? Treat the ASCII CAN character as
@@ -603,7 +599,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
wchar_t *ptr2 = dst ? ptr + 1 : NULL;
int bytes2 = __utf8_mbtowc (_REENT, ptr2,
(const char *) pmbs + bytes,
- nms - bytes, charset, &ps);
+ nms - bytes, &ps);
if (bytes2 < 0)
memset (&ps, 0, sizeof ps);
else
@@ -625,7 +621,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
}
}
else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms,
- charset, &ps)) < 0)
+ &ps)) < 0)
{
/* The technique is based on a discussion here:
http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html
@@ -668,8 +664,10 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
size_t __reg3
sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, size_t nms)
{
- return sys_cp_mbstowcs (cygheap->locale.mbtowc, cygheap->locale.charset,
- dst, dlen, src, nms);
+ mbtowc_p f_mbtowc = __MBTOWC;
+ if (f_mbtowc == __ascii_mbtowc)
+ f_mbtowc = __utf8_mbtowc;
+ return sys_cp_mbstowcs (f_mbtowc, dst, dlen, src, nms);
}
/* Same as sys_wcstombs_alloc, just backwards. */