Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'winsup/cygwin/nlsfuncs.cc')
-rw-r--r--winsup/cygwin/nlsfuncs.cc1588
1 files changed, 0 insertions, 1588 deletions
diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc
deleted file mode 100644
index 00c834e55..000000000
--- a/winsup/cygwin/nlsfuncs.cc
+++ /dev/null
@@ -1,1588 +0,0 @@
-/* nlsfuncs.cc: NLS helper functions
-
- Copyright 2010 Red Hat, Inc.
-
-This file is part of Cygwin.
-
-This software is a copyrighted work licensed under the terms of the
-Cygwin license. Please consult the file "CYGWIN_LICENSE" for
-details. */
-
-#include "winsup.h"
-#include <winnls.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <locale.h>
-#include <wchar.h>
-#include "path.h"
-#include "fhandler.h"
-#include "dtable.h"
-#include "cygheap.h"
-#include "tls_pbuf.h"
-/* Internal headers from newlib */
-#include "../locale/timelocal.h"
-#include "../locale/lctype.h"
-#include "../locale/lnumeric.h"
-#include "../locale/lmonetary.h"
-#include "../locale/lmessages.h"
-#include "lc_msg.h"
-#include "lc_era.h"
-
-#define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
-
-#define getlocaleinfo(category,type) \
- __getlocaleinfo(lcid,(type),_LC(category))
-#define eval_datetimefmt(type,flags) \
- __eval_datetimefmt(lcid,(type),(flags),&lc_time_ptr,\
- lc_time_end-lc_time_ptr)
-#define charfromwchar(category,in) \
- __charfromwchar (_##category##_locale->in,_LC(category),\
- f_wctomb,charset)
-
-#define has_modifier(x) ((x)[0] && !strcmp (modifier, (x)))
-
-/* Vista and later. Not defined in w32api yet. */
-extern "C" {
-WINBASEAPI LCID WINAPI LocaleNameToLCID (LPCWSTR, DWORD);
-};
-
-static char last_locale[ENCODING_LEN + 1];
-static LCID last_lcid;
-
-/* Fetch LCID from POSIX locale specifier.
- Return values:
-
- -1: Invalid locale
- 0: C or POSIX
- >0: LCID
-*/
-static LCID
-__get_lcid_from_locale (const char *name)
-{
- char locale[ENCODING_LEN + 1];
- char *c;
- LCID lcid;
-
- /* Speed up reusing the same locale as before, for instance in LC_ALL case. */
- if (!strcmp (name, last_locale))
- {
- debug_printf ("LCID=0x%04x", last_lcid);
- return last_lcid;
- }
- stpcpy (last_locale, name);
- stpcpy (locale, name);
- /* Store modifier for later use. */
- const char *modifier = strchr (last_locale, '@') ? : "";
- /* Drop charset and modifier */
- c = strchr (locale, '.');
- if (!c)
- c = strchr (locale, '@');
- if (c)
- *c = '\0';
- /* "POSIX" already converted to "C" in loadlocale. */
- if (!strcmp (locale, "C"))
- return last_lcid = 0;
- c = strchr (locale, '_');
- if (!c)
- return last_lcid = (LCID) -1;
- if (wincap.has_localenames ())
- {
- wchar_t wlocale[ENCODING_LEN + 1];
-
- /* Convert to RFC 4646 syntax which is the standard for the locale names
- replacing LCIDs starting with Vista. */
- *c = '-';
- mbstowcs (wlocale, locale, ENCODING_LEN + 1);
- lcid = LocaleNameToLCID (wlocale, 0);
- if (lcid == 0)
- {
- /* Unfortunately there are a couple of locales for which no form
- without a Script part per RFC 4646 exists.
- Linux also supports no_NO which is equivalent to nb_NO. */
- struct {
- const char *loc;
- const wchar_t *wloc;
- } sc_only_locale[] = {
- { "az-AZ" , L"az-Latn-AZ" },
- { "bs-BA" , L"bs-Latn-BA" },
- { "ha-NG" , L"ha-Latn-NG" },
- { "iu-CA" , L"iu-Latn-CA" },
- { "mn-CN" , L"mn-Mong-CN" },
- { "no-NO" , L"nb-NO" },
- { "sr-BA" , L"sr-Cyrl-BA" },
- { "sr-CS" , L"sr-Cyrl-CS" },
- { "sr-ME" , L"sr-Cyrl-ME" },
- { "sr-RS" , L"sr-Cyrl-RS" },
- { "tg-TJ" , L"tg-Cyrl-TJ" },
- { "tzm-DZ", L"tzm-Latn-DZ" },
- { "uz-UZ" , L"uz-Latn-UZ" },
- { NULL , NULL }
- };
- for (int i = 0; sc_only_locale[i].loc
- && sc_only_locale[i].loc[0] <= locale[0]; ++i)
- if (!strcmp (locale, sc_only_locale[i].loc))
- {
- lcid = LocaleNameToLCID (sc_only_locale[i].wloc, 0);
- if (!strncmp (locale, "sr-", 3))
- {
- /* Vista/2K8 is missing sr-ME and sr-RS. It has only the
- deprecated sr-CS. So we map ME and RS to CS here. */
- if (lcid == 0)
- lcid = LocaleNameToLCID (L"sr-Cyrl-CS", 0);
- /* "@latin" modifier for the sr_XY locales changes
- collation behaviour so lcid should accommodate that
- by being set to the Latin sublang. */
- if (lcid != 0 && has_modifier ("@latin"))
- lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
- }
- else if (!strncmp (locale, "uz-", 3))
- {
- /* Equivalent for "@cyrillic" modifier in uz_UZ locale */
- if (lcid != 0 && has_modifier ("@cyrillic"))
- lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) + 1);
- }
- break;
- }
- }
- last_lcid = lcid ?: (LCID) -1;
- debug_printf ("LCID=0x%04x", last_lcid);
- return last_lcid;
- }
- /* Pre-Vista we have to loop through the LCID values and see if they
- match language and TERRITORY. */
- *c++ = '\0';
- /* locale now points to the language, c points to the TERRITORY */
- const char *language = locale;
- const char *territory = c;
- LCID lang, sublang;
- char iso[10];
-
- /* In theory the lang part takes 10 bits (0x3ff), but up to Windows 2003 R2
- the highest lang value is 0x81. */
- for (lang = 1; lang <= 0x81; ++lang)
- if (GetLocaleInfo (lang, LOCALE_SISO639LANGNAME, iso, 10)
- && !strcmp (language, iso))
- break;
- if (lang > 0x81)
- lcid = 0;
- else if (!territory)
- lcid = lang;
- else
- {
- /* In theory the sublang part takes 7 bits (0x3f), but up to
- Windows 2003 R2 the highest sublang value is 0x14. */
- for (sublang = 1; sublang <= 0x14; ++sublang)
- {
- lcid = (sublang << 10) | lang;
- if (GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso, 10)
- && !strcmp (territory, iso))
- break;
- }
- if (sublang > 0x14)
- lcid = 0;
- }
- if (lcid == 0 && territory)
- {
- /* Unfortunately there are four language LCID number areas representing
- multiple languages. Fortunately only two of them already existed
- pre-Vista. The concealed languages have to be tested explicitly,
- since they are not catched by the above loops.
- This also enables the serbian ISO 3166 territory codes which have
- been changed post 2003, and maps them to the old wrong (SP was never
- a valid ISO 3166 code) territory code sr_SP which fortunately has the
- same LCID as the newer sr_CS.
- Linux also supports no_NO which is equivalent to nb_NO. */
- struct {
- const char *loc;
- LCID lcid;
- } ambiguous_locale[] = {
- { "bs_BA", MAKELANGID (LANG_BOSNIAN, 0x05) },
- { "nn_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_NYNORSK) },
- { "no_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_BOKMAL) },
- { "sr_BA", MAKELANGID (LANG_BOSNIAN,
- SUBLANG_SERBIAN_BOSNIA_HERZEGOVINA_CYRILLIC) },
- { "sr_CS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
- { "sr_ME", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
- { "sr_RS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
- { "sr_SP", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
- { NULL, 0 },
- };
- *--c = '_';
- for (int i = 0; ambiguous_locale[i].loc
- && ambiguous_locale[i].loc[0] <= locale[0]; ++i)
- if (!strcmp (locale, ambiguous_locale[i].loc)
- && GetLocaleInfo (ambiguous_locale[i].lcid, LOCALE_SISO639LANGNAME,
- iso, 10))
- {
- lcid = ambiguous_locale[i].lcid;
- /* "@latin" modifier for the sr_XY locales changes collation
- behaviour so lcid should accommodate that by being set to
- the Latin sublang. */
- if (!strncmp (locale, "sr_", 3) && has_modifier ("@latin"))
- lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
- break;
- }
- }
- else if (lcid == 0x0443) /* uz_UZ (Uzbek/Uzbekistan) */
- {
- /* Equivalent for "@cyrillic" modifier in uz_UZ locale */
- if (lcid != 0 && has_modifier ("@cyrillic"))
- lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) + 1);
- }
- last_lcid = lcid ?: (LCID) -1;
- debug_printf ("LCID=0x%04x", last_lcid);
- return last_lcid;
-}
-
-/* Never returns -1. Just skips invalid chars instead. Only if return_invalid
- is set, s==NULL returns -1 since then it's used to recognize invalid strings
- in the used charset. */
-static size_t
-lc_wcstombs (wctomb_p f_wctomb, const char *charset,
- char *s, const wchar_t *pwcs, size_t n,
- bool return_invalid = false)
-{
- char *ptr = s;
- size_t max = n;
- char buf[8];
- size_t i, bytes, num_to_copy;
- mbstate_t state;
-
- memset (&state, 0, sizeof state);
- if (s == NULL)
- {
- size_t num_bytes = 0;
- while (*pwcs != 0)
- {
- bytes = f_wctomb (_REENT, buf, *pwcs++, charset, &state);
- if (bytes != (size_t) -1)
- num_bytes += bytes;
- else if (return_invalid)
- return (size_t) -1;
- }
- return num_bytes;
- }
- while (n > 0)
- {
- bytes = f_wctomb (_REENT, buf, *pwcs, charset, &state);
- if (bytes == (size_t) -1)
- {
- memset (&state, 0, sizeof state);
- ++pwcs;
- continue;
- }
- num_to_copy = (n > bytes ? bytes : n);
- for (i = 0; i < num_to_copy; ++i)
- *ptr++ = buf[i];
-
- if (*pwcs == 0x00)
- return ptr - s - (n >= bytes);
- ++pwcs;
- n -= num_to_copy;
- }
- return max;
-}
-
-/* Never returns -1. Invalid sequences are translated to replacement
- wide-chars. */
-static size_t
-lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset,
- wchar_t *pwcs, const char *s, size_t n)
-{
- size_t ret = 0;
- char *t = (char *) s;
- size_t bytes;
- mbstate_t state;
-
- memset (&state, 0, sizeof state);
- if (!pwcs)
- n = 1;
- while (n > 0)
- {
- bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */,
- charset, &state);
- if (bytes == (size_t) -1)
- {
- state.__count = 0;
- bytes = 1;
- if (pwcs)
- *pwcs = L' ';
- }
- else if (bytes == 0)
- break;
- t += bytes;
- ++ret;
- if (pwcs)
- {
- ++pwcs;
- --n;
- }
- }
- return ret;
-}
-
-static int
-locale_cmp (const void *a, const void *b)
-{
- char **la = (char **) a;
- char **lb = (char **) b;
- return strcmp (*la, *lb);
-}
-
-/* Helper function to workaround reallocs which move blocks even if they shrink.
- Cygwin's realloc is not doing this, but tcsh's, for instance. All lc_foo
- structures consist entirely of pointers so they are practically pointer
- arrays. What we do here is just treat the lc_foo pointers as char ** and
- rebase all char * pointers within, up to the given size of the structure. */
-static void
-rebase_locale_buf (const void *ptrv, const void *ptrvend, const char *newbase,
- const char *oldbase, const char *oldend)
-{
- const char **ptrsend = (const char **) ptrvend;
- for (const char **ptrs = (const char **) ptrv; ptrs < ptrsend; ++ptrs)
- if (*ptrs >= oldbase && *ptrs < oldend)
- *ptrs += newbase - oldbase;
-}
-
-static wchar_t *
-__getlocaleinfo (LCID lcid, LCTYPE type, char **ptr, size_t size)
-{
- size_t num;
- wchar_t *ret;
-
- if ((uintptr_t) *ptr % 1)
- ++*ptr;
- ret = (wchar_t *) *ptr;
- num = GetLocaleInfoW (lcid, type, ret, size / sizeof (wchar_t));
- *ptr = (char *) (ret + num);
- return ret;
-}
-
-static char *
-__charfromwchar (const wchar_t *in, char **ptr, size_t size,
- wctomb_p f_wctomb, const char *charset)
-{
- size_t num;
- char *ret;
-
- num = lc_wcstombs (f_wctomb, charset, ret = *ptr, in, size);
- *ptr += num + 1;
- return ret;
-}
-
-static UINT
-getlocaleint (LCID lcid, LCTYPE type)
-{
- UINT val;
- return GetLocaleInfoW (lcid, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val,
- sizeof val) ? val : 0;
-}
-
-enum dt_flags {
- DT_DEFAULT = 0x00,
- DT_AMPM = 0x01, /* Enforce 12 hour time format. */
- DT_ABBREV = 0x02, /* Enforce abbreviated month and day names. */
-};
-
-static wchar_t *
-__eval_datetimefmt (LCID lcid, LCTYPE type, dt_flags flags, char **ptr,
- size_t size)
-{
- wchar_t buf[80];
- wchar_t fc;
- size_t idx;
- const wchar_t *day_str = L"edaA";
- const wchar_t *mon_str = L"mmbB";
- const wchar_t *year_str = L"yyyY";
- const wchar_t *hour12_str = L"lI";
- const wchar_t *hour24_str = L"kH";
- const wchar_t *t_str;
-
- if ((uintptr_t) *ptr % 1)
- ++*ptr;
- wchar_t *ret = (wchar_t *) *ptr;
- wchar_t *p = (wchar_t *) *ptr;
- GetLocaleInfoW (lcid, type, buf, 80);
- for (wchar_t *fmt = buf; *fmt; ++fmt)
- switch (fc = *fmt)
- {
- case L'\'':
- if (fmt[1] == L'\'')
- *p++ = L'\'';
- else
- while (fmt[1] && *++fmt != L'\'')
- *p++ = *fmt;
- break;
- case L'd':
- case L'M':
- case L'y':
- t_str = (fc == L'd' ? day_str : fc == L'M' ? mon_str : year_str);
- for (idx = 0; fmt[1] == fc; ++idx, ++fmt);
- if (idx > 3)
- idx = 3;
- if ((flags & DT_ABBREV) && fc != L'y' && idx == 3)
- idx = 2;
- *p++ = L'%';
- *p++ = t_str[idx];
- break;
- case L'g':
- /* TODO */
- break;
- case L'h':
- case L'H':
- t_str = (fc == L'h' || (flags & DT_AMPM) ? hour12_str : hour24_str);
- idx = 0;
- if (fmt[1] == fc)
- {
- ++fmt;
- idx = 1;
- }
- *p++ = L'%';
- *p++ = t_str[idx];
- break;
- case L'm':
- case L's':
- case L't':
- if (fmt[1] == fc)
- ++fmt;
- *p++ = L'%';
- *p++ = (fc == L'm' ? L'M' : fc == L's' ? L'S' : L'p');
- break;
- case L'\t':
- case L'\n':
- case L'%':
- *p++ = L'%';
- *p++ = fc;
- break;
- default:
- *p++ = *fmt;
- break;
- }
- *p++ = L'\0';
- *ptr = (char *) p;
- return ret;
-}
-
-/* Convert Windows grouping format into POSIX grouping format. */
-static char *
-conv_grouping (LCID lcid, LCTYPE type, char **lc_ptr)
-{
- char buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
- bool repeat = false;
- char *ptr = *lc_ptr;
- char *ret = ptr;
-
- GetLocaleInfoA (lcid, type, buf, 10);
- /* Convert Windows grouping format into POSIX grouping format. */
- for (char *c = buf; *c; ++c)
- {
- if (*c < '0' || *c > '9')
- continue;
- char val = *c - '0';
- if (!val)
- {
- repeat = true;
- break;
- }
- *ptr++ = val;
- }
- if (!repeat)
- *ptr++ = CHAR_MAX;
- *ptr++ = '\0';
- *lc_ptr = ptr;
- return ret;
-}
-
-/* Called from newlib's setlocale() via __time_load_locale() if category
- is LC_TIME. Returns LC_TIME values fetched from Windows locale data
- in the structure pointed to by _time_locale. This is subsequently
- accessed by functions like nl_langinfo, strftime, strptime. */
-extern "C" int
-__set_lc_time_from_win (const char *name,
- const struct lc_time_T *_C_time_locale,
- struct lc_time_T *_time_locale,
- char **lc_time_buf, wctomb_p f_wctomb,
- const char *charset)
-{
- LCID lcid = __get_lcid_from_locale (name);
- if (lcid == (LCID) -1)
- return lcid;
- if (!lcid && !strcmp (charset, "ASCII"))
- return 0;
-
-# define MAX_TIME_BUFFER_SIZE 4096
-
- char *new_lc_time_buf = (char *) malloc (MAX_TIME_BUFFER_SIZE);
- const char *lc_time_end = new_lc_time_buf + MAX_TIME_BUFFER_SIZE;
-
- if (!new_lc_time_buf)
- return -1;
- char *lc_time_ptr = new_lc_time_buf;
-
- /* C.foo is just a copy of "C" with fixed charset. */
- if (!lcid)
- memcpy (_time_locale, _C_time_locale, sizeof (struct lc_time_T));
- /* codeset */
- _time_locale->codeset = lc_time_ptr;
- lc_time_ptr = stpcpy (lc_time_ptr, charset) + 1;
-
- if (lcid)
- {
- char locale[ENCODING_LEN + 1];
- strcpy (locale, name);
- /* Removes the charset from the locale and attach the modifer to the
- language_TERRITORY part. */
- char *c = strchr (locale, '.');
- if (c)
- {
- *c = '\0';
- char *c2 = strchr (c + 1, '@');
- /* Ignore @cjknarrow modifier since it's a very personal thing between
- Cygwin and newlib... */
- if (c2 && strcmp (c2, "@cjknarrow"))
- memmove (c, c2, strlen (c2) + 1);
- }
- /* Now search in the alphabetically order lc_era array for the
- locale. */
- lc_era_t locale_key = { locale, NULL, NULL, NULL, NULL, NULL ,
- NULL, NULL, NULL, NULL, NULL };
- lc_era_t *era = (lc_era_t *) bsearch ((void *) &locale_key, (void *) lc_era,
- sizeof lc_era / sizeof *lc_era,
- sizeof *lc_era, locale_cmp);
-
- /* mon */
- /* Windows has a bug in Japanese and Korean locales. In these
- locales, strings returned for LOCALE_SABBREVMONTHNAME* are missing
- the suffix representing a month. Unfortunately this is not
- documented in English. A Japanese article describing the problem
- is http://msdn.microsoft.com/ja-jp/library/cc422084.aspx
- The workaround is to use LOCALE_SMONTHNAME* in these locales,
- even for the abbreviated month name. */
- const LCTYPE mon_base =
- lcid == MAKELANGID (LANG_JAPANESE, SUBLANG_JAPANESE_JAPAN)
- || lcid == MAKELANGID (LANG_KOREAN, SUBLANG_KOREAN)
- ? LOCALE_SMONTHNAME1 : LOCALE_SABBREVMONTHNAME1;
- for (int i = 0; i < 12; ++i)
- {
- _time_locale->wmon[i] = getlocaleinfo (time, mon_base + i);
- _time_locale->mon[i] = charfromwchar (time, wmon[i]);
- }
- /* month and alt_month */
- for (int i = 0; i < 12; ++i)
- {
- _time_locale->wmonth[i] = getlocaleinfo (time, LOCALE_SMONTHNAME1 + i);
- _time_locale->month[i] = _time_locale->alt_month[i]
- = charfromwchar (time, wmonth[i]);
- }
- /* wday */
- _time_locale->wwday[0] = getlocaleinfo (time, LOCALE_SABBREVDAYNAME7);
- _time_locale->wday[0] = charfromwchar (time, wwday[0]);
- for (int i = 0; i < 6; ++i)
- {
- _time_locale->wwday[i + 1] = getlocaleinfo (time,
- LOCALE_SABBREVDAYNAME1 + i);
- _time_locale->wday[i + 1] = charfromwchar (time, wwday[i + 1]);
- }
- /* weekday */
- _time_locale->wweekday[0] = getlocaleinfo (time, LOCALE_SDAYNAME7);
- _time_locale->weekday[0] = charfromwchar (time, wweekday[0]);
- for (int i = 0; i < 6; ++i)
- {
- _time_locale->wweekday[i + 1] = getlocaleinfo (time,
- LOCALE_SDAYNAME1 + i);
- _time_locale->weekday[i + 1] = charfromwchar (time, wweekday[i + 1]);
- }
- size_t len;
- /* X_fmt */
- if (era && *era->t_fmt)
- {
- _time_locale->wX_fmt = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wX_fmt,
- era->t_fmt) + 1);
- }
- else
- _time_locale->wX_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
- _time_locale->X_fmt = charfromwchar (time, wX_fmt);
- /* x_fmt */
- if (era && *era->d_fmt)
- {
- _time_locale->wx_fmt = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wx_fmt,
- era->d_fmt) + 1);
- }
- else
- _time_locale->wx_fmt = eval_datetimefmt (LOCALE_SSHORTDATE, DT_DEFAULT);
- _time_locale->x_fmt = charfromwchar (time, wx_fmt);
- /* c_fmt */
- if (era && *era->d_t_fmt)
- {
- _time_locale->wc_fmt = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wc_fmt,
- era->d_t_fmt) + 1);
- }
- else
- {
- _time_locale->wc_fmt = eval_datetimefmt (LOCALE_SLONGDATE, DT_ABBREV);
- ((wchar_t *) lc_time_ptr)[-1] = L' ';
- eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
- }
- _time_locale->c_fmt = charfromwchar (time, wc_fmt);
- /* AM/PM */
- _time_locale->wam_pm[0] = getlocaleinfo (time, LOCALE_S1159);
- _time_locale->wam_pm[1] = getlocaleinfo (time, LOCALE_S2359);
- _time_locale->am_pm[0] = charfromwchar (time, wam_pm[0]);
- _time_locale->am_pm[1] = charfromwchar (time, wam_pm[1]);
- /* date_fmt */
- if (era && *era->date_fmt)
- {
- _time_locale->wdate_fmt = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wdate_fmt,
- era->date_fmt) + 1);
- }
- else
- _time_locale->wdate_fmt = _time_locale->wc_fmt;
- _time_locale->date_fmt = charfromwchar (time, wdate_fmt);
- /* md */
- {
- wchar_t buf[80];
- GetLocaleInfoW (lcid, LOCALE_IDATE, buf, 80);
- _time_locale->md_order = (const char *) lc_time_ptr;
- lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1;
- }
- /* ampm_fmt */
- if (era)
- {
- _time_locale->wampm_fmt = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wampm_fmt,
- era->t_fmt_ampm) + 1);
- }
- else
- _time_locale->wampm_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_AMPM);
- _time_locale->ampm_fmt = charfromwchar (time, wampm_fmt);
-
- if (era)
- {
- /* Evaluate string length in target charset. Characters invalid in the
- target charset are simply ignored, as on Linux. */
- len = 0;
- len += lc_wcstombs (f_wctomb, charset, NULL, era->era, 0) + 1;
- len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_fmt, 0) + 1;
- len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_t_fmt, 0) + 1;
- len += lc_wcstombs (f_wctomb, charset, NULL, era->era_t_fmt, 0) + 1;
- len += lc_wcstombs (f_wctomb, charset, NULL, era->alt_digits, 0) + 1;
- len += (wcslen (era->era) + 1) * sizeof (wchar_t);
- len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t);
- len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t);
- len += (wcslen (era->era_t_fmt) + 1) * sizeof (wchar_t);
- len += (wcslen (era->alt_digits) + 1) * sizeof (wchar_t);
-
- /* Make sure data fits into the buffer */
- if (lc_time_ptr + len > lc_time_end)
- {
- len = lc_time_ptr + len - new_lc_time_buf;
- char *tmp = (char *) realloc (new_lc_time_buf, len);
- if (!tmp)
- era = NULL;
- else
- {
- if (tmp != new_lc_time_buf)
- rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
- new_lc_time_buf, lc_time_ptr);
- lc_time_ptr = tmp + (lc_time_ptr - new_lc_time_buf);
- new_lc_time_buf = tmp;
- lc_time_end = new_lc_time_buf + len;
- }
- }
- /* Copy over */
- if (era)
- {
- /* era */
- _time_locale->wera = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera,
- era->era) + 1);
- _time_locale->era = charfromwchar (time, wera);
- /* era_d_fmt */
- _time_locale->wera_d_fmt = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_fmt,
- era->era_d_fmt) + 1);
- _time_locale->era_d_fmt = charfromwchar (time, wera_d_fmt);
- /* era_d_t_fmt */
- _time_locale->wera_d_t_fmt = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_t_fmt,
- era->era_d_t_fmt) + 1);
- _time_locale->era_d_t_fmt = charfromwchar (time, wera_d_t_fmt);
- /* era_t_fmt */
- _time_locale->wera_t_fmt = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_t_fmt,
- era->era_t_fmt) + 1);
- _time_locale->era_t_fmt = charfromwchar (time, wera_t_fmt);
- /* alt_digits */
- _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
- lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->walt_digits,
- era->alt_digits) + 1);
- _time_locale->alt_digits = charfromwchar (time, walt_digits);
- }
- }
- if (!era)
- {
- _time_locale->wera =
- _time_locale->wera_d_fmt =
- _time_locale->wera_d_t_fmt =
- _time_locale->wera_t_fmt =
- _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
- _time_locale->era =
- _time_locale->era_d_fmt =
- _time_locale->era_d_t_fmt =
- _time_locale->era_t_fmt =
- _time_locale->alt_digits = (const char *) lc_time_ptr;
- /* Twice, to make sure wide char strings are correctly terminated. */
- *lc_time_ptr++ = '\0';
- *lc_time_ptr++ = '\0';
- }
- }
-
- char *tmp = (char *) realloc (new_lc_time_buf, lc_time_ptr - new_lc_time_buf);
- if (!tmp)
- {
- free (new_lc_time_buf);
- return -1;
- }
- if (tmp != new_lc_time_buf)
- rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
- new_lc_time_buf, lc_time_ptr);
- if (*lc_time_buf)
- free (*lc_time_buf);
- *lc_time_buf = tmp;
- return 1;
-}
-
-/* Called from newlib's setlocale() via __ctype_load_locale() if category
- is LC_CTYPE. Returns LC_CTYPE values fetched from Windows locale data
- in the structure pointed to by _ctype_locale. This is subsequently
- accessed by functions like nl_langinfo, localeconv, printf, etc. */
-extern "C" int
-__set_lc_ctype_from_win (const char *name,
- const struct lc_ctype_T *_C_ctype_locale,
- struct lc_ctype_T *_ctype_locale,
- char **lc_ctype_buf, wctomb_p f_wctomb,
- const char *charset, int mb_cur_max)
-{
- LCID lcid = __get_lcid_from_locale (name);
- if (lcid == (LCID) -1)
- return lcid;
- if (!lcid && !strcmp (charset, "ASCII"))
- return 0;
-
-# define MAX_CTYPE_BUFFER_SIZE 256
-
- char *new_lc_ctype_buf = (char *) malloc (MAX_CTYPE_BUFFER_SIZE);
-
- if (!new_lc_ctype_buf)
- return -1;
- char *lc_ctype_ptr = new_lc_ctype_buf;
- /* C.foo is just a copy of "C" with fixed charset. */
- if (!lcid)
- memcpy (_ctype_locale, _C_ctype_locale, sizeof (struct lc_ctype_T));
- /* codeset */
- _ctype_locale->codeset = lc_ctype_ptr;
- lc_ctype_ptr = stpcpy (lc_ctype_ptr, charset) + 1;
- /* mb_cur_max */
- _ctype_locale->mb_cur_max = lc_ctype_ptr;
- *lc_ctype_ptr++ = mb_cur_max;
- *lc_ctype_ptr++ = '\0';
- if (lcid)
- {
- /* outdigits and woutdigits */
- wchar_t digits[11];
- GetLocaleInfoW (lcid, LOCALE_SNATIVEDIGITS, digits, 11);
- for (int i = 0; i <= 9; ++i)
- {
- mbstate_t state;
-
- /* Make sure the wchar_t's are always 2 byte aligned. */
- if ((uintptr_t) lc_ctype_ptr % 2)
- ++lc_ctype_ptr;
- wchar_t *woutdig = (wchar_t *) lc_ctype_ptr;
- _ctype_locale->woutdigits[i] = (const wchar_t *) woutdig;
- *woutdig++ = digits[i];
- *woutdig++ = L'\0';
- lc_ctype_ptr = (char *) woutdig;
- _ctype_locale->outdigits[i] = lc_ctype_ptr;
- memset (&state, 0, sizeof state);
- lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], charset,
- &state);
- *lc_ctype_ptr++ = '\0';
- }
- }
-
- char *tmp = (char *) realloc (new_lc_ctype_buf,
- lc_ctype_ptr - new_lc_ctype_buf);
- if (!tmp)
- {
- free (new_lc_ctype_buf);
- return -1;
- }
- if (tmp != new_lc_ctype_buf)
- rebase_locale_buf (_ctype_locale, _ctype_locale + 1, tmp,
- new_lc_ctype_buf, lc_ctype_ptr);
- if (*lc_ctype_buf)
- free (*lc_ctype_buf);
- *lc_ctype_buf = tmp;
- return 1;
-}
-
-/* Called from newlib's setlocale() via __numeric_load_locale() if category
- is LC_NUMERIC. Returns LC_NUMERIC values fetched from Windows locale data
- in the structure pointed to by _numeric_locale. This is subsequently
- accessed by functions like nl_langinfo, localeconv, printf, etc. */
-extern "C" int
-__set_lc_numeric_from_win (const char *name,
- const struct lc_numeric_T *_C_numeric_locale,
- struct lc_numeric_T *_numeric_locale,
- char **lc_numeric_buf, wctomb_p f_wctomb,
- const char *charset)
-{
- LCID lcid = __get_lcid_from_locale (name);
- if (lcid == (LCID) -1)
- return lcid;
- if (!lcid && !strcmp (charset, "ASCII"))
- return 0;
-
-# define MAX_NUMERIC_BUFFER_SIZE 256
-
- char *new_lc_numeric_buf = (char *) malloc (MAX_NUMERIC_BUFFER_SIZE);
- const char *lc_numeric_end = new_lc_numeric_buf + MAX_NUMERIC_BUFFER_SIZE;
-
- if (!new_lc_numeric_buf)
- return -1;
- char *lc_numeric_ptr = new_lc_numeric_buf;
- /* C.foo is just a copy of "C" with fixed charset. */
- if (!lcid)
- memcpy (_numeric_locale, _C_numeric_locale, sizeof (struct lc_numeric_T));
- else
- {
- /* decimal_point */
- _numeric_locale->wdecimal_point = getlocaleinfo (numeric, LOCALE_SDECIMAL);
- _numeric_locale->decimal_point = charfromwchar (numeric, wdecimal_point);
- /* thousands_sep */
- _numeric_locale->wthousands_sep = getlocaleinfo (numeric, LOCALE_STHOUSAND);
- _numeric_locale->thousands_sep = charfromwchar (numeric, wthousands_sep);
- /* grouping */
- _numeric_locale->grouping = conv_grouping (lcid, LOCALE_SGROUPING,
- &lc_numeric_ptr);
- }
- /* codeset */
- _numeric_locale->codeset = lc_numeric_ptr;
- lc_numeric_ptr = stpcpy (lc_numeric_ptr, charset) + 1;
-
- char *tmp = (char *) realloc (new_lc_numeric_buf,
- lc_numeric_ptr - new_lc_numeric_buf);
- if (!tmp)
- {
- free (new_lc_numeric_buf);
- return -1;
- }
- if (tmp != new_lc_numeric_buf)
- rebase_locale_buf (_numeric_locale, _numeric_locale + 1, tmp,
- new_lc_numeric_buf, lc_numeric_ptr);
- if (*lc_numeric_buf)
- free (*lc_numeric_buf);
- *lc_numeric_buf = tmp;
- return 1;
-}
-
-/* Called from newlib's setlocale() via __monetary_load_locale() if category
- is LC_MONETARY. Returns LC_MONETARY values fetched from Windows locale data
- in the structure pointed to by _monetary_locale. This is subsequently
- accessed by functions like nl_langinfo, localeconv, printf, etc. */
-extern "C" int
-__set_lc_monetary_from_win (const char *name,
- const struct lc_monetary_T *_C_monetary_locale,
- struct lc_monetary_T *_monetary_locale,
- char **lc_monetary_buf, wctomb_p f_wctomb,
- const char *charset)
-{
- LCID lcid = __get_lcid_from_locale (name);
- if (lcid == (LCID) -1)
- return lcid;
- if (!lcid && !strcmp (charset, "ASCII"))
- return 0;
-
-# define MAX_MONETARY_BUFFER_SIZE 512
-
- char *new_lc_monetary_buf = (char *) malloc (MAX_MONETARY_BUFFER_SIZE);
- const char *lc_monetary_end = new_lc_monetary_buf + MAX_MONETARY_BUFFER_SIZE;
-
- if (!new_lc_monetary_buf)
- return -1;
- char *lc_monetary_ptr = new_lc_monetary_buf;
- /* C.foo is just a copy of "C" with fixed charset. */
- if (!lcid)
- memcpy (_monetary_locale, _C_monetary_locale, sizeof (struct lc_monetary_T));
- else
- {
- /* int_curr_symbol */
- _monetary_locale->wint_curr_symbol = getlocaleinfo (monetary,
- LOCALE_SINTLSYMBOL);
- /* No spacing char means space. */
- if (!_monetary_locale->wint_curr_symbol[3])
- {
- wchar_t *wc = (wchar_t *) _monetary_locale->wint_curr_symbol + 3;
- *wc++ = L' ';
- *wc++ = L'\0';
- lc_monetary_ptr = (char *) wc;
- }
- _monetary_locale->int_curr_symbol = charfromwchar (monetary,
- wint_curr_symbol);
- /* currency_symbol */
- _monetary_locale->wcurrency_symbol = getlocaleinfo (monetary,
- LOCALE_SCURRENCY);
- /* As on Linux: If the currency_symbol can't be represented in the
- given charset, use int_curr_symbol. */
- if (lc_wcstombs (f_wctomb, charset, NULL,
- _monetary_locale->wcurrency_symbol,
- 0, true) == (size_t) -1)
- _monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol;
- else
- _monetary_locale->currency_symbol = charfromwchar (monetary,
- wcurrency_symbol);
- /* mon_decimal_point */
- _monetary_locale->wmon_decimal_point = getlocaleinfo (monetary,
- LOCALE_SMONDECIMALSEP);
- _monetary_locale->mon_decimal_point = charfromwchar (monetary,
- wmon_decimal_point);
- /* mon_thousands_sep */
- _monetary_locale->wmon_thousands_sep = getlocaleinfo (monetary,
- LOCALE_SMONTHOUSANDSEP);
- _monetary_locale->mon_thousands_sep = charfromwchar (monetary,
- wmon_thousands_sep);
- /* mon_grouping */
- _monetary_locale->mon_grouping = conv_grouping (lcid, LOCALE_SMONGROUPING,
- &lc_monetary_ptr);
- /* positive_sign */
- _monetary_locale->wpositive_sign = getlocaleinfo (monetary,
- LOCALE_SPOSITIVESIGN);
- _monetary_locale->positive_sign = charfromwchar (monetary, wpositive_sign);
- /* negative_sign */
- _monetary_locale->wnegative_sign = getlocaleinfo (monetary,
- LOCALE_SNEGATIVESIGN);
- _monetary_locale->negative_sign = charfromwchar (monetary, wnegative_sign);
- /* int_frac_digits */
- *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IINTLCURRDIGITS);
- _monetary_locale->int_frac_digits = lc_monetary_ptr++;
- /* frac_digits */
- *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_ICURRDIGITS);
- _monetary_locale->frac_digits = lc_monetary_ptr++;
- /* p_cs_precedes and int_p_cs_precedes */
- *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSYMPRECEDES);
- _monetary_locale->p_cs_precedes
- = _monetary_locale->int_p_cs_precedes = lc_monetary_ptr++;
- /* p_sep_by_space and int_p_sep_by_space */
- *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSEPBYSPACE);
- _monetary_locale->p_sep_by_space
- = _monetary_locale->int_p_sep_by_space = lc_monetary_ptr++;
- /* n_cs_precedes and int_n_cs_precedes */
- *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSYMPRECEDES);
- _monetary_locale->n_cs_precedes
- = _monetary_locale->int_n_cs_precedes = lc_monetary_ptr++;
- /* n_sep_by_space and int_n_sep_by_space */
- *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSEPBYSPACE);
- _monetary_locale->n_sep_by_space
- = _monetary_locale->int_n_sep_by_space = lc_monetary_ptr++;
- /* p_sign_posn and int_p_sign_posn */
- *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSIGNPOSN);
- _monetary_locale->p_sign_posn
- = _monetary_locale->int_p_sign_posn = lc_monetary_ptr++;
- /* n_sign_posn and int_n_sign_posn */
- *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSIGNPOSN);
- _monetary_locale->n_sign_posn
- = _monetary_locale->int_n_sign_posn = lc_monetary_ptr++;
- }
- /* codeset */
- _monetary_locale->codeset = lc_monetary_ptr;
- lc_monetary_ptr = stpcpy (lc_monetary_ptr, charset) + 1;
-
- char *tmp = (char *) realloc (new_lc_monetary_buf,
- lc_monetary_ptr - new_lc_monetary_buf);
- if (!tmp)
- {
- free (new_lc_monetary_buf);
- return -1;
- }
- if (tmp != new_lc_monetary_buf)
- rebase_locale_buf (_monetary_locale, _monetary_locale + 1, tmp,
- new_lc_monetary_buf, lc_monetary_ptr);
- if (*lc_monetary_buf)
- free (*lc_monetary_buf);
- *lc_monetary_buf = tmp;
- return 1;
-}
-
-extern "C" int
-__set_lc_messages_from_win (const char *name,
- const struct lc_messages_T *_C_messages_locale,
- struct lc_messages_T *_messages_locale,
- char **lc_messages_buf,
- wctomb_p f_wctomb, const char *charset)
-{
- LCID lcid = __get_lcid_from_locale (name);
- if (lcid == (LCID) -1)
- return lcid;
- if (!lcid && !strcmp (charset, "ASCII"))
- return 0;
-
- char locale[ENCODING_LEN + 1];
- char *c, *c2;
- lc_msg_t *msg = NULL;
-
- /* C.foo is just a copy of "C" with fixed charset. */
- if (!lcid)
- memcpy (_messages_locale, _C_messages_locale, sizeof (struct lc_messages_T));
- else
- {
- strcpy (locale, name);
- /* Removes the charset from the locale and attach the modifer to the
- language_TERRITORY part. */
- c = strchr (locale, '.');
- if (c)
- {
- *c = '\0';
- c2 = strchr (c + 1, '@');
- /* Ignore @cjknarrow modifier since it's a very personal thing between
- Cygwin and newlib... */
- if (c2 && strcmp (c2, "@cjknarrow"))
- memmove (c, c2, strlen (c2) + 1);
- }
- /* Now search in the alphabetically order lc_msg array for the
- locale. */
- lc_msg_t locale_key = { locale, NULL, NULL, NULL, NULL };
- msg = (lc_msg_t *) bsearch ((void *) &locale_key, (void *) lc_msg,
- sizeof lc_msg / sizeof *lc_msg,
- sizeof *lc_msg, locale_cmp);
- if (!msg)
- return 0;
- }
-
- /* Evaluate string length in target charset. Characters invalid in the
- target charset are simply ignored, as on Linux. */
- size_t len = 0;
- len += (strlen (charset) + 1);
- if (lcid)
- {
- len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesexpr, 0) + 1;
- len += lc_wcstombs (f_wctomb, charset, NULL, msg->noexpr, 0) + 1;
- len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesstr, 0) + 1;
- len += lc_wcstombs (f_wctomb, charset, NULL, msg->nostr, 0) + 1;
- len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t);
- len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t);
- len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t);
- len += (wcslen (msg->nostr) + 1) * sizeof (wchar_t);
- if (len % 1)
- ++len;
- }
- /* Allocate. */
- char *new_lc_messages_buf = (char *) malloc (len);
- const char *lc_messages_end = new_lc_messages_buf + len;
-
- if (!new_lc_messages_buf)
- return -1;
- /* Copy over. */
- c = new_lc_messages_buf;
- /* codeset */
- _messages_locale->codeset = c;
- c = stpcpy (c, charset) + 1;
- if (lcid)
- {
- _messages_locale->yesexpr = (const char *) c;
- len = lc_wcstombs (f_wctomb, charset, c, msg->yesexpr, lc_messages_end - c);
- _messages_locale->noexpr = (const char *) (c += len + 1);
- len = lc_wcstombs (f_wctomb, charset, c, msg->noexpr, lc_messages_end - c);
- _messages_locale->yesstr = (const char *) (c += len + 1);
- len = lc_wcstombs (f_wctomb, charset, c, msg->yesstr, lc_messages_end - c);
- _messages_locale->nostr = (const char *) (c += len + 1);
- len = lc_wcstombs (f_wctomb, charset, c, msg->nostr, lc_messages_end - c);
- c += len + 1;
- if ((uintptr_t) c % 1)
- ++c;
- wchar_t *wc = (wchar_t *) c;
- _messages_locale->wyesexpr = (const wchar_t *) wc;
- wc = wcpcpy (wc, msg->yesexpr) + 1;
- _messages_locale->wnoexpr = (const wchar_t *) wc;
- wc = wcpcpy (wc, msg->noexpr) + 1;
- _messages_locale->wyesstr = (const wchar_t *) wc;
- wc = wcpcpy (wc, msg->yesstr) + 1;
- _messages_locale->wnostr = (const wchar_t *) wc;
- wcpcpy (wc, msg->nostr);
- }
- /* Aftermath. */
- if (*lc_messages_buf)
- free (*lc_messages_buf);
- *lc_messages_buf = new_lc_messages_buf;
- return 1;
-}
-
-LCID collate_lcid = 0;
-static mbtowc_p collate_mbtowc = __ascii_mbtowc;
-char collate_charset[ENCODING_LEN + 1] = "ASCII";
-
-/* Called from newlib's setlocale() if category is LC_COLLATE. Stores
- LC_COLLATE locale information. This is subsequently accessed by the
- below functions strcoll, strxfrm, wcscoll, wcsxfrm. */
-extern "C" int
-__collate_load_locale (const char *name, mbtowc_p f_mbtowc, const char *charset)
-{
- LCID lcid = __get_lcid_from_locale (name);
- if (lcid == (LCID) -1)
- return -1;
- collate_lcid = lcid;
- collate_mbtowc = f_mbtowc;
- stpcpy (collate_charset, charset);
- return 0;
-}
-
-extern "C" const char *
-__get_current_collate_codeset (void)
-{
- return collate_charset;
-}
-
-/* We use the Windows functions for locale-specific string comparison and
- transformation. The advantage is that we don't need any files with
- collation information. */
-extern "C" int
-wcscoll (const wchar_t *ws1, const wchar_t *ws2)
-{
- int ret;
-
- if (!collate_lcid)
- return wcscmp (ws1, ws2);
- ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
- if (!ret)
- set_errno (EINVAL);
- return ret - CSTR_EQUAL;
-}
-
-extern "C" int
-strcoll (const char *s1, const char *s2)
-{
- size_t n1, n2;
- wchar_t *ws1, *ws2;
- tmp_pathbuf tp;
- int ret;
-
- if (!collate_lcid)
- return strcmp (s1, s2);
- /* The ANSI version of CompareString uses the default charset of the lcid,
- so we must use the Unicode version. */
- n1 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s1, 0) + 1;
- ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
- : tp.w_get ());
- lc_mbstowcs (collate_mbtowc, collate_charset, ws1, s1, n1);
- n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
- ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
- : tp.w_get ());
- lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
- ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
- if (n1 > NT_MAX_PATH)
- free (ws1);
- if (n2 > NT_MAX_PATH)
- free (ws2);
- if (!ret)
- set_errno (EINVAL);
- return ret - CSTR_EQUAL;
-}
-
-/* BSD. Used in glob.cc and regcomp.c, for instance. */
-extern "C" int
-__collate_range_cmp (int c1, int c2)
-{
- char s1[2] = { c1, '\0' };
- char s2[2] = { c2, '\0' };
- return strcoll (s1, s2);
-}
-
-extern "C" size_t
-wcsxfrm (wchar_t *ws1, const wchar_t *ws2, size_t wsn)
-{
- size_t ret;
-
- if (!collate_lcid)
- return wcslcpy (ws1, ws2, wsn);
- ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY | LCMAP_BYTEREV,
- ws2, -1, ws1, wsn * sizeof (wchar_t));
- /* LCMapStringW returns byte count including the terminating NUL character,
- wcsxfrm is supposed to return length in wchar_t excluding the NUL.
- Since the array is only single byte NUL-terminated we must make sure
- the result is wchar_t-NUL terminated. */
- if (ret)
- {
- ret = (ret + 1) / sizeof (wchar_t);
- if (ret >= wsn)
- return wsn;
- ws1[ret] = L'\0';
- return ret;
- }
- if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
- set_errno (EINVAL);
- return wsn;
-}
-
-extern "C" size_t
-strxfrm (char *s1, const char *s2, size_t sn)
-{
- size_t ret;
- size_t n2;
- wchar_t *ws2;
- tmp_pathbuf tp;
-
- if (!collate_lcid)
- return strlcpy (s1, s2, sn);
- /* The ANSI version of LCMapString uses the default charset of the lcid,
- so we must use the Unicode version. */
- n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
- ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
- : tp.w_get ());
- lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
- /* The sort key is a NUL-terminated byte string. */
- ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, (PWCHAR) s1, sn);
- if (n2 > NT_MAX_PATH)
- free (ws2);
- if (ret == 0)
- {
- if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
- set_errno (EINVAL);
- return sn;
- }
- /* LCMapStringW returns byte count including the terminating NUL character.
- strxfrm is supposed to return length excluding the NUL. */
- return ret - 1;
-}
-
-/* Fetch default ANSI codepage from locale info and generate a setlocale
- compatible character set code. Called from newlib's setlocale(), if the
- charset isn't given explicitely in the POSIX compatible locale specifier. */
-extern "C" void
-__set_charset_from_locale (const char *locale, char *charset)
-{
- UINT cp;
- LCID lcid = __get_lcid_from_locale (locale);
- wchar_t wbuf[9];
-
- /* "C" locale, or invalid locale? */
- if (lcid == 0 || lcid == (LCID) -1)
- cp = 20127;
- else if (!GetLocaleInfoW (lcid,
- LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
- (PWCHAR) &cp, sizeof cp))
- cp = 0;
- /* Translate codepage and lcid to a charset closely aligned with the default
- charsets defined in Glibc. */
- const char *cs;
- const char *modifier = strchr (locale, '@') ?: "";
- switch (cp)
- {
- case 20127:
- cs = "ASCII";
- break;
- case 874:
- cs = "CP874";
- break;
- case 932:
- cs = "EUCJP";
- break;
- case 936:
- cs = "GB2312";
- break;
- case 949:
- cs = "EUCKR";
- break;
- case 950:
- cs = "BIG5";
- break;
- case 1250:
- if (lcid == 0x081a /* sr_CS (Serbian Language/Former
- Serbia and Montenegro) */
- || lcid == 0x181a /* sr_BA (Serbian Language/Bosnia
- and Herzegovina) */
- || lcid == 0x241a /* sr_RS (Serbian Language/Serbia) */
- || lcid == 0x2c1a /* sr_ME (Serbian Language/Montenegro)*/
- || lcid == 0x0442) /* tk_TM (Turkmen/Turkmenistan) */
- cs = "UTF-8";
- else if (lcid == 0x041c) /* sq_AL (Albanian/Albania) */
- cs = "ISO-8859-1";
- else
- cs = "ISO-8859-2";
- break;
- case 1251:
- if (lcid == 0x0c1a /* sr_CS (Serbian Language/Former
- Serbia and Montenegro) */
- || lcid == 0x1c1a /* sr_BA (Serbian Language/Bosnia
- and Herzegovina) */
- || lcid == 0x281a /* sr_RS (Serbian Language/Serbia) */
- || lcid == 0x301a /* sr_ME (Serbian Language/Montenegro)*/
- || lcid == 0x0440 /* ky_KG (Kyrgyz/Kyrgyzstan) */
- || lcid == 0x0843 /* uz_UZ (Uzbek/Uzbekistan) */
- /* tt_RU (Tatar/Russia),
- IQTElif alphabet */
- || (lcid == 0x0444 && has_modifier ("@iqtelif"))
- || lcid == 0x0450) /* mn_MN (Mongolian/Mongolia) */
- cs = "UTF-8";
- else if (lcid == 0x0423) /* be_BY (Belarusian/Belarus) */
- cs = has_modifier ("@latin") ? "UTF-8" : "CP1251";
- else if (lcid == 0x0402) /* bg_BG (Bulgarian/Bulgaria) */
- cs = "CP1251";
- else if (lcid == 0x0422) /* uk_UA (Ukrainian/Ukraine) */
- cs = "KOI8-U";
- else
- cs = "ISO-8859-5";
- break;
- case 1252:
- if (lcid == 0x0452) /* cy_GB (Welsh/Great Britain) */
- cs = "ISO-8859-14";
- else if (lcid == 0x4009 /* en_IN (English/India) */
- || lcid == 0x0464 /* fil_PH (Filipino/Philippines) */
- || lcid == 0x0462 /* fy_NL (Frisian/Netherlands) */
- || lcid == 0x0468 /* ha_NG (Hausa/Nigeria) */
- || lcid == 0x0470 /* ig_NG (Igbo/Nigeria) */
- || lcid == 0x046c /* nso_ZA (Northern Sotho/South Africa) */
- || lcid == 0x0487 /* rw_RW (Kinyarwanda/Rwanda) */
- || lcid == 0x043b /* se_NO (Northern Saami/Norway) */
- || lcid == 0x0432 /* tn_ZA (Tswana/South Africa) */
- || lcid == 0x0488 /* wo_SN (Wolof/Senegal) */
- || lcid == 0x046a /* yo_NG (Yoruba/Nigeria) */
- || lcid == 0x085d) /* iu_CA (Inuktitut/Canada) */
- cs = "UTF-8";
- else if (lcid == 0x042e) /* hsb_DE (Upper Sorbian/Germany) */
- cs = "ISO-8859-2";
- else if (lcid == 0x0491 /* gd_GB (Scots Gaelic/Great Britain) */
- || (has_modifier ("@euro")
- && GetLocaleInfoW (lcid, LOCALE_SINTLSYMBOL, wbuf, 9)
- && !wcsncmp (wbuf, L"EUR", 3)))
- cs = "ISO-8859-15";
- else
- cs = "ISO-8859-1";
- break;
- case 1253:
- cs = "ISO-8859-7";
- break;
- case 1254:
- if (lcid == 0x042c) /* az_AZ (Azeri/Azerbaijan) */
- cs = "UTF-8";
- else if (lcid == 0x0443) /* uz_UZ (Uzbek/Uzbekistan) */
- cs = "ISO-8859-1";
- else
- cs = "ISO-8859-9";
- break;
- case 1255:
- cs = "ISO-8859-8";
- break;
- case 1256:
- if (lcid == 0x0429 /* fa_IR (Persian/Iran) */
- || lcid == 0x0480 /* ug_CN (Uyghur/China) */
- || lcid == 0x0420) /* ur_PK (Urdu/Pakistan) */
- cs = "UTF-8";
- else
- cs = "ISO-8859-6";
- break;
- case 1257:
- if (lcid == 0x0425) /* et_EE (Estonian/Estonia) */
- cs = "ISO-8859-15";
- else
- cs = "ISO-8859-13";
- break;
- case 1258:
- default:
- if (lcid == 0x0481) /* mi_NZ (Maori/New Zealand) */
- cs = "ISO-8859-13";
- else if (lcid == 0x043a) /* mt_MT (Maltese/Malta) */
- cs = "ISO-8859-3";
- else if (lcid == 0x0437) /* ka_GE (Georgian/Georgia) */
- cs = "GEORGIAN-PS";
- else if (lcid == 0x043f) /* kk_KZ (Kazakh/Kazakhstan) */
- cs = "PT154";
- else
- cs = "UTF-8";
- break;
- }
- stpcpy (charset, cs);
-}
-
-/* This function is called from newlib's loadlocale if the locale identifier
- was invalid, one way or the other. It looks for the file
-
- /usr/share/locale/locale.alias
-
- which is part of the gettext package, and if it finds the locale alias
- in that file, it replaces the locale with the correct locale string from
- that file.
-
- If successful, it returns a pointer to new_locale, NULL otherwise.*/
-extern "C" char *
-__set_locale_from_locale_alias (const char *locale, char *new_locale)
-{
- wchar_t wlocale[ENCODING_LEN + 1];
- wchar_t walias[ENCODING_LEN + 1];
-#define LOCALE_ALIAS_LINE_LEN 255
- char alias_buf[LOCALE_ALIAS_LINE_LEN + 1], *c;
- wchar_t *wc;
- const char *alias, *replace;
- char *ret = NULL;
-
- FILE *fp = fopen ("/usr/share/locale/locale.alias", "rt");
- if (!fp)
- return NULL;
- /* The incoming locale is given in the application charset, or in
- the Cygwin internal charset. We try both. */
- if (mbstowcs (wlocale, locale, ENCODING_LEN + 1) == (size_t) -1)
- sys_mbstowcs (wlocale, ENCODING_LEN + 1, locale);
- wlocale[ENCODING_LEN] = L'\0';
- /* Ignore @cjknarrow modifier since it's a very personal thing between
- Cygwin and newlib... */
- if ((wc = wcschr (wlocale, L'@')) && !wcscmp (wc + 1, L"cjknarrow"))
- *wc = L'\0';
- while (fgets (alias_buf, LOCALE_ALIAS_LINE_LEN + 1, fp))
- {
- alias_buf[LOCALE_ALIAS_LINE_LEN] = '\0';
- c = strrchr (alias_buf, '\n');
- if (c)
- *c = '\0';
- c = alias_buf;
- c += strspn (c, " \t");
- if (!*c || *c == '#')
- continue;
- alias = c;
- c += strcspn (c, " \t");
- *c++ = '\0';
- c += strspn (c, " \t");
- if (*c == '#')
- continue;
- replace = c;
- c += strcspn (c, " \t");
- *c++ = '\0';
- if (strlen (replace) > ENCODING_LEN)
- continue;
- /* The file is latin1 encoded */
- lc_mbstowcs (__iso_mbtowc, "ISO-8859-1", walias, alias, ENCODING_LEN + 1);
- walias[ENCODING_LEN] = L'\0';
- if (!wcscmp (wlocale, walias))
- {
- ret = strcpy (new_locale, replace);
- break;
- }
- }
- fclose (fp);
- return ret;
-}
-
-static char *
-check_codepage (char *ret)
-{
- if (!wincap.has_always_all_codepages ())
- {
- /* Prior to Windows Vista, many codepages are not installed by
- default, or can be deinstalled. The following codepages require
- that the respective conversion tables are installed into the OS.
- So we check if they are installed and if not, setlocale should
- fail. */
- CPINFO cpi;
- UINT cp = 0;
- if (__mbtowc == __sjis_mbtowc)
- cp = 932;
- else if (__mbtowc == __eucjp_mbtowc)
- cp = 20932;
- else if (__mbtowc == __gbk_mbtowc)
- cp = 936;
- else if (__mbtowc == __kr_mbtowc)
- cp = 949;
- else if (__mbtowc == __big5_mbtowc)
- cp = 950;
- if (cp && !GetCPInfo (cp, &cpi)
- && GetLastError () == ERROR_INVALID_PARAMETER)
- return NULL;
- }
- return ret;
-}
-
-/* Can be called via cygwin_internal (CW_INTERNAL_SETLOCALE) for application
- which really (think they) know what they are doing. */
-extern "C" void
-internal_setlocale ()
-{
- /* Each setlocale from the environment potentially changes the
- multibyte representation of the CWD. Therefore we have to
- reevaluate the CWD's posix path and store in the new charset.
- Same for the PATH environment variable. */
- /* FIXME: Other buffered paths might be affected as well. */
- /* FIXME: It could be necessary to convert the entire environment,
- not just PATH. */
- tmp_pathbuf tp;
- char *path;
- wchar_t *w_path = NULL, *w_cwd;
-
- /* Don't do anything if the charset hasn't actually changed. */
- if (strcmp (cygheap->locale.charset, __locale_charset ()) == 0)
- return;
-
- debug_printf ("Cygwin charset changed from %s to %s",
- cygheap->locale.charset, __locale_charset ());
- /* Fetch PATH and CWD and convert to wchar_t in previous charset. */
- path = getenv ("PATH");
- if (path && *path) /* $PATH can be potentially unset. */
- {
- w_path = tp.w_get ();
- sys_mbstowcs (w_path, 32768, path);
- }
- w_cwd = tp.w_get ();
- cwdstuff::cwd_lock.acquire ();
- sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
- /* Set charset for internal conversion functions. */
- if (*__locale_charset () == 'A'/*SCII*/)
- {
- cygheap->locale.mbtowc = __utf8_mbtowc;
- cygheap->locale.wctomb = __utf8_wctomb;
- }
- else
- {
- cygheap->locale.mbtowc = __mbtowc;
- cygheap->locale.wctomb = __wctomb;
- }
- strcpy (cygheap->locale.charset, __locale_charset ());
- /* Restore CWD and PATH in new charset. */
- cygheap->cwd.reset_posix (w_cwd);
- cwdstuff::cwd_lock.release ();
- if (w_path)
- {
- char *c_path = tp.c_get ();
- sys_wcstombs (c_path, 32768, w_path);
- setenv ("PATH", c_path, 1);
- }
-}
-
-/* Called from dll_crt0_1, before fetching the command line from Windows.
- Set the internal charset according to the environment locale settings.
- Check if a required codepage is available, and only switch internal
- charset if so.
- Make sure to reset the application locale to "C" per POSIX. */
-void
-initial_setlocale ()
-{
- char *ret = _setlocale_r (_REENT, LC_CTYPE, "");
- if (ret && check_codepage (ret))
- internal_setlocale ();
-}
-
-/* Like newlib's setlocale, but additionally check if the charset needs
- OS support and the required codepage is actually installed. If codepage
- is not available, revert to previous locale and return NULL. For details
- about codepage availability, see the comment in check_codepage() above. */
-extern "C" char *
-setlocale (int category, const char *locale)
-{
- char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)];
- if (locale && !wincap.has_always_all_codepages ())
- stpcpy (old, _setlocale_r (_REENT, category, NULL));
- char *ret = _setlocale_r (_REENT, category, locale);
- if (ret && locale && !(ret = check_codepage (ret)))
- _setlocale_r (_REENT, category, old);
- return ret;
-}