Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2010-02-26 20:53:52 +0300
committerCorinna Vinschen <corinna@vinschen.de>2010-02-26 20:53:52 +0300
commit75bd77d2465e7e7e83df6d9ec6ac6ae976cdc113 (patch)
tree26325da9e8148a82bc483a10c4340eaa28c0f7de /winsup/cygwin/libc
parentf467ede2830d60d9dfa0bbba66674662ed566086 (diff)
* libc/strptime.cc: Implement support for era, alt_digits and POSIX
padding and width modifiers. (era_info_t): New type. (free_era_info): New static function to free era_info_t storage. (get_era_info): New static function to create era_info_t storage from LC_TIME era information. (alt_digits_t): New type. (get_alt_digits): New static function to create alt_digits_t storage from LC_TIME alt_digits information. (free_alt_digits): New static function to free alt_digits_t storage. (find_alt_digits): New static function to scan input for alternative digits and return them, if any. Return NULL otherwise. (__strptime): New static function taking all code from strptime. Implement handling for E, O, +, 0, and width modifiers per POSIX-1.2008. (strptime): Convert into wrapper function to provide era_info and alt_digits pointers and call __strptime. (conv_num): Take additional alt_digits_t parameter and if it's not NULL, call find_alt_digits to convert.
Diffstat (limited to 'winsup/cygwin/libc')
-rw-r--r--winsup/cygwin/libc/strptime.cc449
1 files changed, 402 insertions, 47 deletions
diff --git a/winsup/cygwin/libc/strptime.cc b/winsup/cygwin/libc/strptime.cc
index e96de5917..08d4c5f56 100644
--- a/winsup/cygwin/libc/strptime.cc
+++ b/winsup/cygwin/libc/strptime.cc
@@ -41,6 +41,7 @@ __RCSID("$NetBSD: strptime.c,v 1.28 2008/04/28 20:23:01 martin Exp $");
#include <sys/localedef.h>
#endif
#include <ctype.h>
+#include <stdlib.h>
#include <locale.h>
#include <string.h>
#include <time.h>
@@ -63,24 +64,240 @@ __weak_alias(strptime,_strptime)
static const char gmt[4] = { "GMT" };
-static const u_char *conv_num(const unsigned char *, int *, uint, uint);
+typedef struct _era_info_t {
+ size_t num; /* Only in first entry: Number of entries,
+ 1 otherwise. */
+ int dir; /* Direction */
+ long offset; /* Number of year closest to start_date in the era. */
+ struct tm start; /* Start date of era */
+ struct tm end; /* End date of era */
+ CHAR *era_C; /* Era string */
+ CHAR *era_Y; /* Replacement for %EY */
+} era_info_t;
+
+static void
+free_era_info (era_info_t *era_info)
+{
+ size_t num = era_info->num;
+
+ for (size_t i = 0; i < num; ++i)
+ {
+ free (era_info[i].era_C);
+ free (era_info[i].era_Y);
+ }
+ free (era_info);
+}
+
+static era_info_t *
+get_era_info (const char *era)
+{
+ char *c;
+ era_info_t *ei = NULL;
+ size_t num = 0, cur = 0, len;
+
+ while (*era)
+ {
+ ++num;
+ era_info_t *tmp = (era_info_t *) realloc (ei, num * sizeof (era_info_t));
+ if (!tmp)
+ {
+ ei->num = cur;
+ free_era_info (ei);
+ return NULL;
+ }
+ ei = tmp;
+ ei[cur].num = 1;
+ ei[cur].dir = (*era == '+') ? 1 : -1;
+ era += 2;
+ ei[cur].offset = strtol (era, &c, 10);
+ era = c + 1;
+ ei[cur].start.tm_year = strtol (era, &c, 10);
+ /* Adjust offset for negative gregorian dates. */
+ if (ei[cur].start.tm_year < 0)
+ ++ei[cur].start.tm_year;
+ ei[cur].start.tm_mon = strtol (c + 1, &c, 10);
+ ei[cur].start.tm_mday = strtol (c + 1, &c, 10);
+ ei[cur].start.tm_hour = ei[cur].start.tm_min = ei[cur].start.tm_sec = 0;
+ era = c + 1;
+ if (era[0] == '-' && era[1] == '*')
+ {
+ ei[cur].end = ei[cur].start;
+ ei[cur].start.tm_year = INT_MIN;
+ ei[cur].start.tm_mon = ei[cur].start.tm_mday = ei[cur].start.tm_hour
+ = ei[cur].start.tm_min = ei[cur].start.tm_sec = 0;
+ era += 3;
+ }
+ else if (era[0] == '+' && era[1] == '*')
+ {
+ ei[cur].end.tm_year = INT_MAX;
+ ei[cur].end.tm_mon = 12;
+ ei[cur].end.tm_mday = 31;
+ ei[cur].end.tm_hour = 23;
+ ei[cur].end.tm_min = ei[cur].end.tm_sec = 59;
+ era += 3;
+ }
+ else
+ {
+ ei[cur].end.tm_year = strtol (era, &c, 10);
+ /* Adjust offset for negative gregorian dates. */
+ if (ei[cur].end.tm_year < 0)
+ ++ei[cur].end.tm_year;
+ ei[cur].end.tm_mon = strtol (c + 1, &c, 10);
+ ei[cur].end.tm_mday = strtol (c + 1, &c, 10);
+ ei[cur].end.tm_mday = 31;
+ ei[cur].end.tm_hour = 23;
+ ei[cur].end.tm_min = ei[cur].end.tm_sec = 59;
+ era = c + 1;
+ }
+ /* era_C */
+ c = strchr (era, ':');
+ len = c - era;
+ ei[cur].era_C = (CHAR *) malloc ((len + 1) * sizeof (CHAR));
+ if (!ei[cur].era_C)
+ {
+ ei->num = cur;
+ free_era_info (ei);
+ return NULL;
+ }
+ strncpy (ei[cur].era_C, era, len);
+ era += len;
+ ei[cur].era_C[len] = '\0';
+ /* era_Y */
+ ++era;
+ c = strchr (era, ';');
+ if (!c)
+ c = strchr (era, '\0');
+ len = c - era;
+ ei[cur].era_Y = (CHAR *) malloc ((len + 1) * sizeof (CHAR));
+ if (!ei[cur].era_Y)
+ {
+ free (ei[cur].era_C);
+ ei->num = cur;
+ free_era_info (ei);
+ return NULL;
+ }
+ strncpy (ei[cur].era_Y, era, len);
+ era += len;
+ ei[cur].era_Y[len] = '\0';
+ ++cur;
+ if (*c)
+ era = c + 1;
+ }
+ ei->num = num;
+ return ei;
+}
+
+typedef struct _alt_digits_t {
+ size_t num;
+ char **digit;
+ char *buffer;
+} alt_digits_t;
+
+static alt_digits_t *
+get_alt_digits (const char *alt_digits)
+{
+ alt_digits_t *adi;
+ const char *a, *e;
+ char *aa, *ae;
+ size_t len;
+
+ adi = (alt_digits_t *) calloc (1, sizeof (alt_digits_t));
+ if (!adi)
+ return NULL;
+
+ /* Compute number of alt_digits. */
+ adi->num = 1;
+ for (a = alt_digits; (e = strchr (a, ';')) != NULL; a = e + 1)
+ ++adi->num;
+ /* Allocate the `digit' array, which is an array of `num' pointers into
+ `buffer'. */
+ adi->digit = (CHAR **) calloc (adi->num, sizeof (CHAR **));
+ if (!adi->digit)
+ {
+ free (adi);
+ return NULL;
+ }
+ /* Compute memory required for `buffer'. */
+ len = strlen (alt_digits);
+ /* Allocate it. */
+ adi->buffer = (CHAR *) malloc ((len + 1) * sizeof (CHAR));
+ if (!adi->buffer)
+ {
+ free (adi->digit);
+ free (adi);
+ return NULL;
+ }
+ /* Store digits in it. */
+ strcpy (adi->buffer, alt_digits);
+ /* Store the pointers into `buffer' into the appropriate `digit' slot. */
+ for (len = 0, aa = adi->buffer; (ae = strchr (aa, ';')) != NULL;
+ ++len, aa = ae + 1)
+ {
+ *ae = '\0';
+ adi->digit[len] = aa;
+ }
+ adi->digit[len] = aa;
+ return adi;
+}
+
+static void
+free_alt_digits (alt_digits_t *adi)
+{
+ free (adi->digit);
+ free (adi->buffer);
+ free (adi);
+}
+
+static const unsigned char *
+find_alt_digits (const unsigned char *bp, alt_digits_t *adi, uint *pval)
+{
+ /* This is rather error-prone, but the entire idea of alt_digits
+ isn't thought out well. If you start to look for matches at the
+ start, there's a high probability that you find short matches but
+ the entire translation is wrong. So we scan the alt_digits array
+ from the highest to the lowest digits instead, hoping that it's
+ more likely to catch digits consisting of multiple characters. */
+ for (int i = (int) adi->num - 1; i >= 0; --i)
+ {
+ size_t len = strlen (adi->digit[i]);
+ if (!strncmp ((const char *) bp, adi->digit[i], len))
+ {
+ *pval = i;
+ return bp + len;
+ }
+ }
+ return NULL;
+}
+
+/* This simplifies the calls to conv_num enormously. */
+#define ALT_DIGITS ((alt_format & ALT_O) ? *alt_digits : NULL)
+
+static const u_char *conv_num(const unsigned char *, int *, uint, uint,
+ alt_digits_t *);
static const u_char *find_string(const u_char *, int *, const char * const *,
const char * const *, int);
-
-char *
-strptime(const char *buf, const char *fmt, struct tm *tm)
+static char *
+__strptime(const char *buf, const char *fmt, struct tm *tm,
+ era_info_t **era_info, alt_digits_t **alt_digits)
{
unsigned char c;
const unsigned char *bp;
int alt_format, i, split_year = 0;
+ era_info_t *era = NULL;
+ int era_offset, got_eoff = 0;
+ int saw_padding;
+ unsigned long width;
const char *new_fmt;
+ uint ulim;
bp = (const u_char *)buf;
struct lc_time_T *_CurrentTimeLocale = __get_current_time_locale ();
while (bp != NULL && (c = *fmt++) != '\0') {
/* Clear `alternate' modifier prior to new conversion. */
+ saw_padding = 0;
+ width = 0;
alt_format = 0;
i = 0;
@@ -110,18 +327,43 @@ literal:
case 'E': /* "%E?" alternative conversion modifier. */
LEGAL_ALT(0);
alt_format |= ALT_E;
+ if (!*era_info && *_CurrentTimeLocale->era)
+ *era_info = get_era_info (_CurrentTimeLocale->era);
goto again;
case 'O': /* "%O?" alternative conversion modifier. */
LEGAL_ALT(0);
alt_format |= ALT_O;
+ if (!*alt_digits && *_CurrentTimeLocale->alt_digits)
+ *alt_digits =
+ get_alt_digits (_CurrentTimeLocale->alt_digits);
goto again;
-
+ case '0':
+ case '+':
+ LEGAL_ALT(0);
+ if (saw_padding)
+ return NULL;
+ saw_padding = 1;
+ goto again;
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ /* POSIX-1.2008 maximum field width. Per POSIX,
+ the width is only defined for the 'C', 'F', and 'Y'
+ conversion specifiers. */
+ LEGAL_ALT(0);
+ {
+ char *end;
+ width = strtoul (fmt - 1, &end, 10);
+ fmt = (const char *) end;
+ goto again;
+ }
/*
* "Complex" conversion rules, implemented through recursion.
*/
case 'c': /* Date and time, using the locale's format. */
- new_fmt = _ctloc(c_fmt);
+ new_fmt = (alt_format & ALT_E)
+ ? _ctloc (era_d_t_fmt) : _ctloc(c_fmt);
+ LEGAL_ALT(ALT_E);
goto recurse;
case 'D': /* The date as "%m/%d/%y". */
@@ -130,9 +372,15 @@ literal:
goto recurse;
case 'F': /* The date as "%Y-%m-%d". */
- new_fmt = "%Y-%m-%d";
- LEGAL_ALT(0);
- goto recurse;
+ {
+ LEGAL_ALT(0);
+ char *tmp = __strptime ((const char *) bp, "%Y-%m-%d",
+ tm, era_info, alt_digits);
+ if (tmp && (uint) (tmp - (char *) bp) > width)
+ return NULL;
+ bp = (const unsigned char *) tmp;
+ continue;
+ }
case 'R': /* The time as "%H:%M". */
new_fmt = "%H:%M";
@@ -150,15 +398,19 @@ literal:
goto recurse;
case 'X': /* The time, using the locale's format. */
- new_fmt =_ctloc(X_fmt);
+ new_fmt = (alt_format & ALT_E)
+ ? _ctloc (era_t_fmt) : _ctloc(X_fmt);
+ LEGAL_ALT(ALT_E);
goto recurse;
case 'x': /* The date, using the locale's format. */
- new_fmt =_ctloc(x_fmt);
- recurse:
- bp = (const u_char *)strptime((const char *)bp,
- new_fmt, tm);
+ new_fmt = (alt_format & ALT_E)
+ ? _ctloc (era_d_fmt) : _ctloc(x_fmt);
LEGAL_ALT(ALT_E);
+ recurse:
+ bp = (const u_char *)__strptime((const char *)bp,
+ new_fmt, tm,
+ era_info, alt_digits);
continue;
/*
@@ -180,58 +432,83 @@ literal:
continue;
case 'C': /* The century number. */
+ LEGAL_ALT(ALT_E);
+ if ((alt_format & ALT_E) && *era_info)
+ {
+ /* With E modifier, an era. We potentially
+ don't know the era offset yet, so we have to
+ store the value in a local variable.
+ The final computation of tm_year is only done
+ right before this function returns. */
+ size_t num = (*era_info)->num;
+ for (size_t i = 0; i < num; ++i)
+ if (!strncmp ((const char *) bp,
+ (*era_info)[i].era_C,
+ strlen ((*era_info)[i].era_C)))
+ {
+ era = (*era_info) + i;
+ bp += strlen (era->era_C);
+ break;
+ }
+ if (!era)
+ return NULL;
+ continue;
+ }
i = 20;
- bp = conv_num(bp, &i, 0, 99);
+ for (ulim = 99; width && width < 2; ++width)
+ ulim /= 10;
+ bp = conv_num(bp, &i, 0, ulim, NULL);
i = i * 100 - TM_YEAR_BASE;
if (split_year)
i += tm->tm_year % 100;
split_year = 1;
tm->tm_year = i;
- LEGAL_ALT(ALT_E);
+ era = NULL;
+ got_eoff = 0;
continue;
case 'd': /* The day of month. */
case 'e':
- bp = conv_num(bp, &tm->tm_mday, 1, 31);
LEGAL_ALT(ALT_O);
+ bp = conv_num(bp, &tm->tm_mday, 1, 31, ALT_DIGITS);
continue;
case 'k': /* The hour (24-hour clock representation). */
LEGAL_ALT(0);
/* FALLTHROUGH */
case 'H':
- bp = conv_num(bp, &tm->tm_hour, 0, 23);
LEGAL_ALT(ALT_O);
+ bp = conv_num(bp, &tm->tm_hour, 0, 23, ALT_DIGITS);
continue;
case 'l': /* The hour (12-hour clock representation). */
LEGAL_ALT(0);
/* FALLTHROUGH */
case 'I':
- bp = conv_num(bp, &tm->tm_hour, 1, 12);
+ LEGAL_ALT(ALT_O);
+ bp = conv_num(bp, &tm->tm_hour, 1, 12, ALT_DIGITS);
if (tm->tm_hour == 12)
tm->tm_hour = 0;
- LEGAL_ALT(ALT_O);
continue;
case 'j': /* The day of year. */
i = 1;
- bp = conv_num(bp, &i, 1, 366);
+ bp = conv_num(bp, &i, 1, 366, NULL);
tm->tm_yday = i - 1;
LEGAL_ALT(0);
continue;
case 'M': /* The minute. */
- bp = conv_num(bp, &tm->tm_min, 0, 59);
LEGAL_ALT(ALT_O);
+ bp = conv_num(bp, &tm->tm_min, 0, 59, ALT_DIGITS);
continue;
case 'm': /* The month. */
+ LEGAL_ALT(ALT_O);
i = 1;
- bp = conv_num(bp, &i, 1, 12);
+ bp = conv_num(bp, &i, 1, 12, ALT_DIGITS);
tm->tm_mon = i - 1;
- LEGAL_ALT(ALT_O);
continue;
case 'p': /* The locale's equivalent of AM/PM. */
@@ -243,8 +520,8 @@ literal:
continue;
case 'S': /* The seconds. */
- bp = conv_num(bp, &tm->tm_sec, 0, 61);
LEGAL_ALT(ALT_O);
+ bp = conv_num(bp, &tm->tm_sec, 0, 61, ALT_DIGITS);
continue;
case 'U': /* The week of year, beginning on sunday. */
@@ -255,28 +532,67 @@ literal:
* point to calculate a real value, so just check the
* range for now.
*/
- bp = conv_num(bp, &i, 0, 53);
LEGAL_ALT(ALT_O);
+ bp = conv_num(bp, &i, 0, 53, ALT_DIGITS);
continue;
case 'w': /* The day of week, beginning on sunday. */
- bp = conv_num(bp, &tm->tm_wday, 0, 6);
LEGAL_ALT(ALT_O);
+ bp = conv_num(bp, &tm->tm_wday, 0, 6, ALT_DIGITS);
continue;
case 'Y': /* The year. */
+ LEGAL_ALT(ALT_E);
+ if ((alt_format & ALT_E) && *era_info)
+ {
+ bool gotit = false;
+ size_t num = (*era_info)->num;
+ (*era_info)->num = 1;
+ for (size_t i = 0; i < num; ++i)
+ {
+ era_info_t *tmp_ei = (*era_info) + i;
+ char *tmp = __strptime ((const char *) bp,
+ tmp_ei->era_Y,
+ tm, &tmp_ei,
+ alt_digits);
+ if (tmp)
+ {
+ bp = (const unsigned char *) tmp;
+ gotit = true;
+ break;
+ }
+ }
+ (*era_info)->num = num;
+ if (gotit)
+ continue;
+ return NULL;
+ }
i = TM_YEAR_BASE; /* just for data sanity... */
- bp = conv_num(bp, &i, 0, 9999);
+ for (ulim = 9999; width && width < 4; ++width)
+ ulim /= 10;
+ bp = conv_num(bp, &i, 0, ulim, NULL);
tm->tm_year = i - TM_YEAR_BASE;
- LEGAL_ALT(ALT_E);
+ era = NULL;
+ got_eoff = 0;
continue;
case 'y': /* The year within 100 years of the epoch. */
/* LEGAL_ALT(ALT_E | ALT_O); */
- bp = conv_num(bp, &i, 0, 99);
-
- if (split_year)
- /* preserve century */
+ if ((alt_format & ALT_E) && *era_info)
+ {
+ /* With E modifier, the offset to the start date
+ of the era specified with %EC. We potentially
+ don't know the era yet, so we have to store the
+ value in a local variable, just like era itself.
+ The final computation of tm_year is only done
+ right before this function returns. */
+ bp = conv_num(bp, &era_offset, 0, UINT_MAX, NULL);
+ got_eoff = 1;
+ continue;
+ }
+ bp = conv_num(bp, &i, 0, 99, ALT_DIGITS);
+
+ if (split_year) /* preserve century */
i += (tm->tm_year / 100) * 100;
else {
split_year = 1;
@@ -286,6 +602,8 @@ literal:
i = i + 1900 - TM_YEAR_BASE;
}
tm->tm_year = i;
+ era = NULL;
+ got_eoff = 0;
continue;
case 'Z':
@@ -334,29 +652,66 @@ literal:
}
}
+ if (bp && (era || got_eoff))
+ {
+ /* Default to current era. */
+ if (!era)
+ era = *era_info;
+ /* Default to first year of era if offset is missing */
+ if (!got_eoff)
+ era_offset = era->offset;
+ tm->tm_year = (era->start.tm_year != INT_MIN
+ ? era->start.tm_year : era->end.tm_year)
+ + (era_offset - era->offset) * era->dir;
+ /* Check if year falls into the era. If not, it's an
+ invalid combination of era and offset. */
+ if (era->start.tm_year > tm->tm_year
+ || era->end.tm_year < tm->tm_year)
+ return NULL;
+ tm->tm_year -= TM_YEAR_BASE;
+ }
+
return (char *) bp;
}
+char *
+strptime (const char *buf, const char *fmt, struct tm *tm)
+{
+ era_info_t *era_info = NULL;
+ alt_digits_t *alt_digits = NULL;
+ char *ret = __strptime (buf, fmt, tm, &era_info, &alt_digits);
+ if (era_info)
+ free_era_info (era_info);
+ if (alt_digits)
+ free_alt_digits (alt_digits);
+ return ret;
+}
static const u_char *
-conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
+conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim,
+ alt_digits_t *alt_digits)
{
uint result = 0;
unsigned char ch;
- /* The limit also determines the number of valid digits. */
- uint rulim = ulim;
-
- ch = *buf;
- if (ch < '0' || ch > '9')
- return NULL;
-
- do {
- result *= 10;
- result += ch - '0';
- rulim /= 10;
- ch = *++buf;
- } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
+ if (alt_digits)
+ buf = find_alt_digits (buf, alt_digits, &result);
+ else
+ {
+ /* The limit also determines the number of valid digits. */
+ uint rulim = ulim;
+
+ ch = *buf;
+ if (ch < '0' || ch > '9')
+ return NULL;
+
+ do {
+ result *= 10;
+ result += ch - '0';
+ rulim /= 10;
+ ch = *++buf;
+ } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
+ }
if (result < llim || result > ulim)
return NULL;