Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2009-03-24 13:13:27 +0300
committerCorinna Vinschen <corinna@vinschen.de>2009-03-24 13:13:27 +0300
commit28186e81d947a830d9895cecc2d8e836a3cbccd0 (patch)
tree8ef79212f3476f0b1fbbef8fa46b97ea297c6fe3 /newlib/libc/locale/locale.c
parent0258b687228f0d9d5191615ba0a13f7496f09d3b (diff)
* libc/ctype/iswalpha.c: Handle all wchar_t as unicode on
_MB_CAPABLE systems. * libc/ctype/iswblank.c: Ditto. * libc/ctype/iswcntrl.c: Ditto. * libc/ctype/iswprint.c: Ditto. * libc/ctype/iswpunct.c: Ditto. * libc/ctype/iswspace.c: Ditto. * libc/ctype/jp2uc.c (__jp2uc): On Cygwin, just return c. Explain why. * libc/ctype/towlower.c: Ditto. * libc/ctype/towupper.c: Ditto. * libc/include/sys/config.h: Define _MB_EXTENDED_CHARSETS_ISO and _MB_EXTENDED_CHARSETS_WINDOWS if _MB_EXTENDED_CHARSETS_ALL is defined. Define _MB_EXTENDED_CHARSETS_ALL on Cygwin only for now. * libc/include/sys/reent.h (struct _reent): Mark _current_category and _current_locale as unused. * libc/locale/locale.c: Add new charset support to documentation. Include ../stdio/local.h from here. (lc_ctype_charset): Set to "ASCII" by default. (lc_message_charset): Ditto. (_setlocale_r): Don't set _current_category and _current_locale. (loadlocale): Add Cygwin codepage support. On _MB_CAPABLE systems, set __mbtowc and __wctomb function pointers to function corresponding with current charset. Don't allow non-existant ISO-8859-12 charset. Add support for Windows singlebyte codepages. On Cygwin, add support for GBK, CP949, and BIG5. On Cygwin, call __set_ctype() in case the catorgy is LC_CTYPE. Don't set _current_category and _current_locale. * libc/stdlib/Makefile.am (GENERAL_SOURCES): Add sb_charsets.c. * libc/stdlib/Makefile.in: Regenerate. * libc/stdlib/local.h: Add prototype for __locale_charset. Add prototypes for __mbtowc and __wctomb pointers. Add prototypes for charset-specific _wctomb_r and _mbtowc_r functions. Declare tables and functions from sb_charsets.c. * libc/stdlib/mbtowc_r.c (__mbtowc): Define. Set to __ascii_mbtowc by default. (_mbtowc_r): Just call __mbtowc from here. (__ascii_mbtowc): New function. (__iso_mbtowc): New function. (__cp_mbtowc): New function. (__utf8_mbtowc): New function. (__sjis_mbtowc): New function. Disable on Cygwin. (__eucjp_mbtowc): New function. Disable on Cygwin. (__jis_mbtowc): New function. Disable on Cygwin. * libc/stdlib/sb_charsets.c: New file, adding singlebyte to UTF conversion tables for all ISO and CP charsets. (__iso_8859_index): New function. (__cp_index): New function. * libc/stdlib/wctomb_r.c (__wctomb): Define. Set to __ascii_wctomb by default. (_wctomb_r): Just call __wctomb from here. (__ascii_wctomb): New function. (__utf8_wctomb): New function. (__sjis_wctomb): New function. Disable on Cygwin. (__eucjp_wctomb): New function. Disable on Cygwin. (__jis_wctomb): New function. Disable on Cygwin. (__iso_wctomb): New function. (__cp_wctomb): New function.
Diffstat (limited to 'newlib/libc/locale/locale.c')
-rw-r--r--newlib/libc/locale/locale.c169
1 files changed, 152 insertions, 17 deletions
diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c
index a4cd30eb9..d3644eb8a 100644
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@@ -47,11 +47,18 @@ and <<"C">> values for <[locale]>; strings representing other locales are not
honored unless _MB_CAPABLE is defined in which case POSIX locale strings
are allowed, plus five extensions supported for backward compatibility with
older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>,
-<<"C-SJIS">>, or <<"C-ISO-8859-x">> with 1 <= x <= 15. Even when using
-POSIX locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>,
-<<"EUCJP">>, <<"SJIS">>, or <<"ISO-8859-x">> with 1 <= x <= 15. (<<"">> is
-also accepted; if given, the settings are read from the corresponding
-LC_* environment variables and $LANG according to POSIX rules.
+<<"C-SJIS">>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with
+xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250,
+1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258]. Even when using POSIX
+locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>,
+<<"EUCJP">>, <<"SJIS">>, <<"ISO-8859-x">> with 1 <= x <= 15, or
+<<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866,
+874, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
+(<<"">> is also accepted; if given, the settings are read from the
+corresponding LC_* environment variables and $LANG according to POSIX rules.
+
+Under Cygwin, this implementation additionally supports the charsets <<"GBK">>,
+<<"CP949">>, and <<"BIG5">>.
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns
a pointer to the string representing the current locale (always
@@ -85,6 +92,9 @@ PORTABILITY
ANSI C requires <<setlocale>>, but the only locale required across all
implementations is the C locale.
+NOTES
+There is no ISO-8859-12 codepage. It's also refused by this implementation.
+
No supporting OS subroutines are required.
*/
@@ -129,6 +139,11 @@ No supporting OS subroutines are required.
#include <limits.h>
#include <reent.h>
#include <stdlib.h>
+#include <wchar.h>
+#include "../stdlib/local.h"
+#ifdef __CYGWIN__
+#include <windows.h>
+#endif
#define _LC_LAST 7
#define ENCODING_LEN 31
@@ -190,8 +205,8 @@ static const char *__get_locale_env(struct _reent *, int);
#endif
-static char lc_ctype_charset[ENCODING_LEN + 1] = "ISO-8859-1";
-static char lc_message_charset[ENCODING_LEN + 1] = "ISO-8859-1";
+static char lc_ctype_charset[ENCODING_LEN + 1] = "ASCII";
+static char lc_message_charset[ENCODING_LEN + 1] = "ASCII";
char *
_DEFUN(_setlocale_r, (p, category, locale),
@@ -205,8 +220,6 @@ _DEFUN(_setlocale_r, (p, category, locale),
if (strcmp (locale, "POSIX") && strcmp (locale, "C")
&& strcmp (locale, ""))
return NULL;
- p->_current_category = category;
- p->_current_locale = locale;
}
return "C";
#else
@@ -361,6 +374,11 @@ currentlocale()
#endif
#ifdef _MB_CAPABLE
+#ifdef __CYGWIN__
+extern void *__set_charset_from_codepage (unsigned int, char *charset);
+extern void __set_ctype (const char *charset);
+#endif /* __CYGWIN__ */
+
static char *
loadlocale(struct _reent *p, int category)
{
@@ -382,7 +400,7 @@ loadlocale(struct _reent *p, int category)
if (!strcmp (locale, "POSIX"))
strcpy (locale, "C");
if (!strcmp (locale, "C")) /* Default "C" locale */
- strcpy (charset, "ISO-8859-1");
+ strcpy (charset, "ASCII");
else if (locale[0] == 'C' && locale[1] == '-') /* Old newlib style */
strcpy (charset, locale + 2);
else /* POSIX style */
@@ -414,7 +432,11 @@ loadlocale(struct _reent *p, int category)
}
else if (c[0] == '\0' || c[0] == '@')
/* End of string or just a modifier */
+#ifdef __CYGWIN__
+ __set_charset_from_codepage (GetACP (), charset);
+#else
strcpy (charset, "ISO-8859-1");
+#endif
else
/* Invalid string */
return NULL;
@@ -426,42 +448,155 @@ loadlocale(struct _reent *p, int category)
if (strcmp (charset, "UTF-8"))
return NULL;
mbc_max = 6;
+#ifdef _MB_CAPABLE
+ __wctomb = __utf8_wctomb;
+ __mbtowc = __utf8_mbtowc;
+#endif
break;
case 'J':
if (strcmp (charset, "JIS"))
return NULL;
mbc_max = 8;
+#ifdef _MB_CAPABLE
+ __wctomb = __jis_wctomb;
+ __mbtowc = __jis_mbtowc;
+#endif
break;
case 'E':
- if (strcmp (charset, "EUCJP"))
+ if (strcmp (charset, "EUCJP") && strcmp (charset, "eucJP"))
return NULL;
+ strcpy (charset, "EUCJP");
mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __eucjp_wctomb;
+ __mbtowc = __eucjp_mbtowc;
+#endif
break;
case 'S':
if (strcmp (charset, "SJIS"))
return NULL;
mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __sjis_wctomb;
+ __mbtowc = __sjis_mbtowc;
+#endif
break;
case 'I':
- default:
- /* Must be exactly one of ISO-8859-1, [...] ISO-8859-15. */
+ /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
+ ISO-8859-12. */
if (strncmp (charset, "ISO-8859-", 9))
return NULL;
- val = strtol (charset + 9, &end, 10);
- if (val < 1 || val > 15 || *end)
+ val = _strtol_r (p, charset + 9, &end, 10);
+ if (val < 1 || val > 16 || val == 12 || *end)
return NULL;
mbc_max = 1;
+#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_ISO
+ __wctomb = __iso_wctomb;
+ __mbtowc = __iso_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_ISO */
+ __wctomb = __ascii_wctomb;
+ __mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_ISO */
+#endif
+ break;
+ case 'C':
+ if (charset[1] != 'P')
+ return NULL;
+ val = _strtol_r (p, charset + 2, &end, 10);
+ if (*end)
+ return NULL;
+ switch (val)
+ {
+ case 437:
+ case 720:
+ case 737:
+ case 775:
+ case 850:
+ case 852:
+ case 855:
+ case 857:
+ case 858:
+ case 862:
+ case 866:
+ case 874:
+ case 1125:
+ case 1250:
+ case 1251:
+ case 1252:
+ case 1253:
+ case 1254:
+ case 1255:
+ case 1256:
+ case 1257:
+ case 1258:
+ mbc_max = 1;
+#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+ __wctomb = __cp_wctomb;
+ __mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+ __wctomb = __ascii_wctomb;
+ __mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif
+ break;
+#ifdef __CYGWIN__
+ case 949:
+ mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __kr_wctomb;
+ __mbtowc = __kr_mbtowc;
+#endif
+ break;
+#endif
+ default:
+ return NULL;
+ }
+ break;
+ case 'A':
+ if (strcmp (charset, "ASCII"))
+ return NULL;
+ mbc_max = 1;
+#ifdef _MB_CAPABLE
+ __wctomb = __ascii_wctomb;
+ __mbtowc = __ascii_mbtowc;
+#endif
break;
+#ifdef __CYGWIN__
+ case 'G':
+ if (strcmp (charset, "GBK"))
+ return NULL;
+ mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __gbk_wctomb;
+ __mbtowc = __gbk_mbtowc;
+#endif
+ break;
+ case 'B':
+ if (strcmp (charset, "BIG5") && strcmp (charset, "Big5"))
+ return NULL;
+ strcpy (charset, "BIG5");
+ mbc_max = 2;
+#ifdef _MB_CAPABLE
+ __wctomb = __big5_wctomb;
+ __mbtowc = __big5_mbtowc;
+#endif
+ break;
+#endif /* __CYGWIN__ */
+ default:
+ return NULL;
}
if (category == LC_CTYPE)
{
strcpy (lc_ctype_charset, charset);
__mb_cur_max = mbc_max;
+#ifdef __CYGWIN__
+ __set_ctype (charset);
+#endif
}
else if (category == LC_MESSAGES)
strcpy (lc_message_charset, charset);
- p->_current_category = category;
- p->_current_locale = locale;
return strcpy(current_categories[category], new_categories[category]);
}