Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2010-02-06 21:28:33 +0300
committerCorinna Vinschen <corinna@vinschen.de>2010-02-06 21:28:33 +0300
commit5eb556c8497ddd680c28e53e04c4badfe612004a (patch)
treea9d05f6982f2d8cf3b670e1e0d00655d8eaeea99 /newlib/libc/locale/locale.c
parent38d9821daf5c631d2c8f12bc772961973edaba0c (diff)
* libc/ctype/ctype_cp.h (_CTYPE_GEORGIAN_PS_128_254): Define.
(_CTYPE_GEORGIAN_PS_255): Define. (_CTYPE_PT154_128_254): Define. (_CTYPE_PT154_255): Define. (__ctype_cp): Add array members for above ctype definitions. * libc/locale/locale.c (loadlocale): Make TIS-620 charset name available for all targets. Add guards for setting the conversion function pointers. Add support for GEORGIAN-PS and PT154 charsets. Change documentation to reflect current behaviour more closely. * libc/locale/nl_langinfo.c (nl_langinfo): On Cygwin, translate "CP101" to "GEORGIAN-PS" and "CP102" to "PT154". * libc/stdlib/sb_charsets.c (__cp_conv): Add conversion arrays for GEORGIAN-PS and PT154. (__cp_index): Map invalid Windows codepage number 101 to GEORGIAN-PS conversion array, 102 to PT154 conversion array.
Diffstat (limited to 'newlib/libc/locale/locale.c')
-rw-r--r--newlib/libc/locale/locale.c117
1 files changed, 86 insertions, 31 deletions
diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c
index 85069aefa..26283c5f4 100644
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@@ -56,34 +56,36 @@ for a given language, a three character string per ISO 639-3.
<<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
<<"modifier">> see below.
-Additionally to the POSIX specifier, seven extensions are supported for
-backward compatibility with older implementations using newlib:
-<<"C-UTF-8">>, <<"C-JIS">>, <<"C-eucJP">>, <<"C-SJIS">>, <<C-KOI8-R>>,
-<<C-KOI8-U>>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with
-xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 932,
-1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
-
-Instead of <<"C-">>, you can specify also <<"C.">>. Both variations allow
+Additionally to the POSIX specifier, the following extension is supported
+for backward compatibility with older implementations using newlib:
+<<"C-charset">>.
+Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
to specify language neutral locales while using other charsets than ASCII,
for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
but uses the UTF-8 charset.
-Even when using POSIX locale strings, the only charsets allowed are
+The following charsets are recogized:
<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
-<<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in
-[437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 932, 1125, 1250,
-1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
+<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
+1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
+857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
+1257, 1258].
+
Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
are equivalent. Charset names with dashes can also be written without
dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
<<"EUCKR"> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
+Full support for all of the above charsets requires that newlib has been
+build with multibyte support and support for all ISO and Windows Codepage.
+Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
+only newlib for Cygwin is built with full charset support by default.
+Under Cygwin, this implementation additionally supports the charsets
+<<"GBK">>, <<"eucKR">>, and <<"Big5">>. Cygwin does not support <<"JIS">>.
+
(<<"">> is also accepted; if given, the settings are read from the
corresponding LC_* environment variables and $LANG according to POSIX rules.
-Under Cygwin, this implementation additionally supports the charsets
-<<"GBK">>, <<"eucKR">>, <<"Big5">>, and <<"TIS-620">>.
-
This implementation also supports a single modifier, <<"cjknarrow">>.
Any other modifier is ignored. <<"cjknarrow">>, in conjunction with one
of the language specifiers <<"ja">>, <<"ko">>, and <<"zh">> specifies
@@ -720,38 +722,91 @@ loadlocale(struct _reent *p, int category)
l_mbtowc = __ascii_mbtowc;
#endif
break;
-#ifdef __CYGWIN__
case 'G':
case 'g':
- if (strcasecmp (charset, "GBK"))
- return NULL;
- strcpy (charset, "GBK");
- mbc_max = 2;
+#ifdef __CYGWIN__
+ if (!strcasecmp (charset, "GBK"))
+ {
+ strcpy (charset, "GBK");
+ mbc_max = 2;
#ifdef _MB_CAPABLE
- l_wctomb = __gbk_wctomb;
- l_mbtowc = __gbk_mbtowc;
+ l_wctomb = __gbk_wctomb;
+ l_mbtowc = __gbk_mbtowc;
#endif
+ }
+ else
+#endif /* __CYGWIN__ */
+ /* GEORGIAN-PS and the alias without dash */
+ if (!strncasecmp (charset, "GEORGIAN", 8))
+ {
+ c = charset + 8;
+ if (*c == '-')
+ ++c;
+ if (strcasecmp (c, "PS"))
+ return NULL;
+ strcpy (charset, "CP101");
+ mbc_max = 1;
+#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+ l_wctomb = __cp_wctomb;
+ l_mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+ l_wctomb = __ascii_wctomb;
+ l_mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif
+ }
+ else
+ return NULL;
break;
- case 'B':
- case 'b':
- if (strcasecmp (charset, "BIG5"))
- return NULL;
- strcpy (charset, "BIG5");
- mbc_max = 2;
+ case 'P':
+ case 'p':
+ /* PT154 */
+ if (strcasecmp (charset, "PT154"))
+ return NULL;
+ strcpy (charset, "CP102");
+ mbc_max = 1;
#ifdef _MB_CAPABLE
- l_wctomb = __big5_wctomb;
- l_mbtowc = __big5_mbtowc;
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+ l_wctomb = __cp_wctomb;
+ l_mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+ l_wctomb = __ascii_wctomb;
+ l_mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
#endif
break;
case 'T':
case 't':
- if (strcasecmp (charset, "TIS620") && strcasecmp (charset, "TIS-620"))
+ if (strncasecmp (charset, "TIS", 3))
+ return NULL;
+ c = charset + 3;
+ if (*c == '-')
+ ++c;
+ if (strcasecmp (c, "620"))
return NULL;
strcpy (charset, "CP874");
mbc_max = 1;
#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
l_wctomb = __cp_wctomb;
l_mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+ l_wctomb = __ascii_wctomb;
+ l_mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif
+ break;
+#ifdef __CYGWIN__
+ case 'B':
+ case 'b':
+ if (strcasecmp (charset, "BIG5"))
+ return NULL;
+ strcpy (charset, "BIG5");
+ mbc_max = 2;
+#ifdef _MB_CAPABLE
+ l_wctomb = __big5_wctomb;
+ l_mbtowc = __big5_mbtowc;
#endif
break;
#endif /* __CYGWIN__ */