From 162a23c5049fc63373543f6edd60713d52530e4b Mon Sep 17 00:00:00 2001
From: Jeff Johnston <jjohnstn@redhat.com>
Date: Mon, 24 Aug 2009 22:11:11 +0000
Subject: 2009-08-24  Corinna Vinschen  <corinna@vinschen.de>

        * libc/locale/locale.c: Update documentation.
        (loadlocale): Map "KOI8-R" and "KOI8-U" to CP20866 and CP21866.

2009-08-24  Andy Koppe  <andy.koppe@gmail.com>

        * libc/stdlib/sb_charsets.c (__cp_conv): Add KOI8-R (Russian, CP20866)
        and KOI8-U (Ukrainian, CP21866) to Windows codepage conversion tables.
        * libc/ctype/ctype_cp.h (__ctype_cp): Likewise for ctype tables.
---
 newlib/libc/locale/locale.c | 41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

(limited to 'newlib/libc/locale')
diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c
index 411a04cda..19159fbca 100644
--- a/newlib/libc/locale/locale.c
+++ b/newlib/libc/locale/locale.c
@@ -54,20 +54,21 @@ the form
 <<"language">> is a two character string per ISO 639.  <<"TERRITORY">> is a
 country code per ISO 3166.  For <<"charset">> and <<"modifier">> see below.
 
-Additionally to the POSIX specifier, five extensions are supported for
+Additionally to the POSIX specifier, seven extensions are supported for
 backward compatibility with older implementations using newlib:
-<<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>/<<"C-eucJP">>, <<"C-SJIS">>,
-<<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with xxx in [437,
-720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251,
-1252, 1253, 1254, 1255, 1256, 1257, 1258].
+<<"C-UTF-8">>, <<"C-JIS">>, <<"C-eucJP">>, <<"C-SJIS">>, <<C-KOI8-R>>,
+<<C-KOI8-U>>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with
+xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125,
+1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
 
 Even when using POSIX locale strings, the only charsets allowed are
-<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, <<"ISO-8859-x">>
-with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850,
-852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254,
-1255, 1256, 1257, 1258].  Charsets are case insensitive.  For instance,
-<<"UTF-8">> and <<"utf-8">> are equivalent.  <<"UTF-8">> can also be
-written without dash, as in <<"UTF8">> or <<"utf8">>.
+<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<KOI8-R>>, <<KOI8-U>>,
+<<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in
+[437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250,
+1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
+Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
+are equivalent.  <<"UTF-8">> can also be written without dash, as in
+<<"UTF8">> or <<"utf8">>.
 
 (<<"">> is also accepted; if given, the settings are read from the
 corresponding LC_* environment variables and $LANG according to POSIX rules.
@@ -615,6 +616,24 @@ loadlocale(struct _reent *p, int category)
 	  return NULL;
 	}
     break;
+    case 'K':
+    case 'k':
+      if (!strcasecmp (charset, "KOI8-R"))
+	strcpy (charset, "CP20866");
+      else if (!strcasecmp (charset, "KOI8-U"))
+	strcpy (charset, "CP21866");
+      else
+	return NULL;
+#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+      l_wctomb = __cp_wctomb;
+      l_mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+      l_wctomb = __ascii_wctomb;
+      l_mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif
+      break;
     case 'A':
     case 'a':
       if (strcasecmp (charset, "ASCII"))
-- 
cgit v1.2.3