platform_bionic/libc/tzcode/strptime.c
Elliott Hughes 2bd4316bd6 Expose tzalloc()/localtime_rz()/mktime_z()/tzfree().
* Rationale

The question often comes up of how to use multiple time zones in C code.
If you're single-threaded, you can just use setenv() to manipulate $TZ.
toybox does this, for example. But that's not thread-safe in two
distinct ways: firstly, getenv() is not thread-safe with respect to
modifications to the environment (and between the way putenv() is
specified and the existence of environ, it's not obvious how to fully
fix that), and secondly the _caller_ needs to ensure that no other
threads are using tzset() or any function that behaves "as if" tzset()
was called (which is neither easy to determine nor easy to ensure).

This isn't a bigger problem because most of the time the right answer
is to stop pretending that libc is at all suitable for any i18n, and
switch to icu4c instead. (The NDK icu4c headers do not include ucal_*,
so this is not a realistic option for most applications.)

But what if you're somewhere in between? Like the rust chrono library,
for example? What then?

Currently their "least worst" option is to reinvent the entire wheel and
read our tzdata files. Which isn't a great solution for anyone, for
obvious maintainability reasons.

So it's probably time we broke the catch-22 here and joined NetBSD in
offering a less broken API than standard C has for the last 40 years.
Sure, any would-be caller will have to have a separate "is this
Android?" and even "is this API level >= 35?" path, but that will fix
itself sometime in the 2030s when developers can just assume "yes, it
is", whereas if we keep putting off exposing anything, this problem
never gets solved.

(No-one's bothered to try to implement the std::chrono::time_zone
functionality in libc++ yet, but they'll face a similar problem if/when
they do.)

* Implementation

The good news is that tzcode already implements these functions, so
there's relatively little here.

I've chosen not to expose `struct state` because `struct __timezone_t`
makes for clearer error messages, given that compiler diagnostics will
show the underlying type name (`struct __timezone_t*`) rather than the
typedef name (`timezone_t`) that's used in calling code.

I've moved us over to FreeBSD's wcsftime() rather than keep the OpenBSD
one building --- I've long wanted to only have one implementation here,
and FreeBSD is already doing the "convert back and forth, calling the
non-wide function in the middle" dance that I'd hoped to get round to
doing myself someday. This should mean that our strftime() and
wcsftime() behaviors can't easily diverge in future, plus macOS/iOS are
mostly FreeBSD, so any bugs will likely be interoperable with the other
major mobile operating system, so there's something nice for everyone
there!

The FreeBSD wcsftime() implementation includes a wcsftime_l()
implementation, so that's one stub we can remove. The flip side of that
is that it uses mbsrtowcs_l() and wcsrtombs_l() which we didn't
previously have. So expose those as aliases of mbsrtowcs() and
wcsrtombs().

Bug: https://github.com/chronotope/chrono/issues/499
Test: treehugger
Change-Id: Iee1b9d763ead15eef3d2c33666b3403b68940c3c
2023-06-16 08:10:47 -07:00

676 lines
16 KiB
C

/* $OpenBSD: strptime.c,v 1.31 2023/03/02 16:21:51 millert Exp $ */
/* $NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $ */
/*-
* Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code was contributed to The NetBSD Foundation by Klaus Klein.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "private.h"
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <locale.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "localedef.h"
#include "tzfile.h"
// Android: ignore OpenBSD's DEF_WEAK() stuff.
#define DEF_WEAK(sym) /* */
// Android: this code is not pointer-sign clean.
#pragma clang diagnostic ignored "-Wpointer-sign"
#pragma clang diagnostic ignored "-Wunused-function"
// Android: clang thinks people don't know && has higher precedence than ||.
#pragma clang diagnostic ignored "-Wlogical-op-parentheses"
#define _ctloc(x) (_CurrentTimeLocale->x)
/*
* We do not implement alternate representations. However, we always
* check whether a given modifier is allowed for a certain conversion.
*/
#define _ALT_E 0x01
#define _ALT_O 0x02
#define _LEGAL_ALT(x) { if (alt_format & ~(x)) return (0); }
/*
* We keep track of some of the fields we set in order to compute missing ones.
*/
#define FIELD_TM_MON (1 << 0)
#define FIELD_TM_MDAY (1 << 1)
#define FIELD_TM_WDAY (1 << 2)
#define FIELD_TM_YDAY (1 << 3)
#define FIELD_TM_YEAR (1 << 4)
static char gmt[] = { "GMT" };
static char utc[] = { "UTC" };
/* RFC-822/RFC-2822 */
static const char * const nast[5] = {
"EST", "CST", "MST", "PST", "\0\0\0"
};
static const char * const nadt[5] = {
"EDT", "CDT", "MDT", "PDT", "\0\0\0"
};
static const int mon_lengths[2][MONSPERYEAR] = {
{ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
{ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
};
static int _conv_num(const unsigned char **, int *, int, int);
static int epoch_to_tm(const unsigned char **, struct tm *);
static int leaps_thru_end_of(const int y);
static char *_strptime(const char *, const char *, struct tm *, int);
static const u_char *_find_string(const u_char *, int *, const char * const *,
const char * const *, int);
char *
strptime(const char *buf, const char *fmt, struct tm *tm)
{
return(_strptime(buf, fmt, tm, 1));
}
DEF_WEAK(strptime);
static char *
_strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
{
unsigned char c;
const unsigned char *bp, *ep;
size_t len;
int alt_format, i, offs;
int neg = 0;
static int century, relyear, fields;
if (initialize) {
century = TM_YEAR_BASE;
relyear = -1;
fields = 0;
}
bp = (const unsigned char *)buf;
while ((c = *fmt) != '\0') {
/* Clear `alternate' modifier prior to new conversion. */
alt_format = 0;
/* Eat up white-space. */
if (isspace(c)) {
while (isspace(*bp))
bp++;
fmt++;
continue;
}
if ((c = *fmt++) != '%')
goto literal;
again: switch (c = *fmt++) {
case '%': /* "%%" is converted to "%". */
literal:
if (c != *bp++)
return (NULL);
break;
/*
* "Alternative" modifiers. Just set the appropriate flag
* and start over again.
*/
case 'E': /* "%E?" alternative conversion modifier. */
_LEGAL_ALT(0);
alt_format |= _ALT_E;
goto again;
case 'O': /* "%O?" alternative conversion modifier. */
_LEGAL_ALT(0);
alt_format |= _ALT_O;
goto again;
/*
* "Complex" conversion rules, implemented through recursion.
*/
case 'c': /* Date and time, using the locale's format. */
_LEGAL_ALT(_ALT_E);
if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
return (NULL);
break;
case 'D': /* The date as "%m/%d/%y". */
_LEGAL_ALT(0);
if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
return (NULL);
break;
case 'F': /* The date as "%Y-%m-%d". */
_LEGAL_ALT(0);
if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
return (NULL);
continue;
case 'R': /* The time as "%H:%M". */
_LEGAL_ALT(0);
if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
return (NULL);
break;
case 'r': /* The time as "%I:%M:%S %p". */
_LEGAL_ALT(0);
if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
return (NULL);
break;
case 'T': /* The time as "%H:%M:%S". */
_LEGAL_ALT(0);
if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
return (NULL);
break;
case 'v': /* Android: the date as "%e-%b-%Y" for strftime() compat; glibc does this too. */
_LEGAL_ALT(0);
if (!(bp = _strptime(bp, "%e-%b-%Y", tm, 0)))
return (NULL);
break;
case 'X': /* The time, using the locale's format. */
_LEGAL_ALT(_ALT_E);
if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
return (NULL);
break;
case 'x': /* The date, using the locale's format. */
_LEGAL_ALT(_ALT_E);
if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
return (NULL);
break;
/*
* "Elementary" conversion rules.
*/
case 'A': /* The day of week, using the locale's form. */
case 'a':
_LEGAL_ALT(0);
for (i = 0; i < 7; i++) {
/* Full name. */
len = strlen(_ctloc(day[i]));
if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
break;
/* Abbreviated name. */
len = strlen(_ctloc(abday[i]));
if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
break;
}
/* Nothing matched. */
if (i == 7)
return (NULL);
tm->tm_wday = i;
bp += len;
fields |= FIELD_TM_WDAY;
break;
case 'B': /* The month, using the locale's form. */
case 'b':
case 'h':
_LEGAL_ALT(0);
for (i = 0; i < 12; i++) {
/* Full name. */
len = strlen(_ctloc(mon[i]));
if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
break;
/* Abbreviated name. */
len = strlen(_ctloc(abmon[i]));
if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
break;
}
/* Nothing matched. */
if (i == 12)
return (NULL);
tm->tm_mon = i;
bp += len;
fields |= FIELD_TM_MON;
break;
case 'C': /* The century number. */
_LEGAL_ALT(_ALT_E);
if (!(_conv_num(&bp, &i, 0, 99)))
return (NULL);
century = i * 100;
break;
case 'e': /* The day of month. */
if (isspace(*bp))
bp++;
/* FALLTHROUGH */
case 'd':
_LEGAL_ALT(_ALT_O);
if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
return (NULL);
fields |= FIELD_TM_MDAY;
break;
case 'k': /* The hour (24-hour clock representation). */
_LEGAL_ALT(0);
/* FALLTHROUGH */
case 'H':
_LEGAL_ALT(_ALT_O);
if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
return (NULL);
break;
case 'l': /* The hour (12-hour clock representation). */
_LEGAL_ALT(0);
/* FALLTHROUGH */
case 'I':
_LEGAL_ALT(_ALT_O);
if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
return (NULL);
break;
case 'j': /* The day of year. */
_LEGAL_ALT(0);
if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
return (NULL);
tm->tm_yday--;
fields |= FIELD_TM_YDAY;
break;
case 'M': /* The minute. */
_LEGAL_ALT(_ALT_O);
if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
return (NULL);
break;
case 'm': /* The month. */
_LEGAL_ALT(_ALT_O);
if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
return (NULL);
tm->tm_mon--;
fields |= FIELD_TM_MON;
break;
case 'P': /* Android addition for strftime() compat; glibc does this too. */
case 'p': /* The locale's equivalent of AM/PM. */
_LEGAL_ALT(0);
/* AM? */
len = strlen(_ctloc(am_pm[0]));
if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
if (tm->tm_hour > 12) /* i.e., 13:00 AM ?! */
return (NULL);
else if (tm->tm_hour == 12)
tm->tm_hour = 0;
bp += len;
break;
}
/* PM? */
len = strlen(_ctloc(am_pm[1]));
if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
if (tm->tm_hour > 12) /* i.e., 13:00 PM ?! */
return (NULL);
else if (tm->tm_hour < 12)
tm->tm_hour += 12;
bp += len;
break;
}
/* Nothing matched. */
return (NULL);
case 'S': /* The seconds. */
_LEGAL_ALT(_ALT_O);
if (!(_conv_num(&bp, &tm->tm_sec, 0, 60)))
return (NULL);
break;
case 's': /* Seconds since epoch. */
if (!(epoch_to_tm(&bp, tm)))
return (NULL);
fields = 0xffff; /* everything */
break;
case 'U': /* The week of year, beginning on sunday. */
case 'W': /* The week of year, beginning on monday. */
_LEGAL_ALT(_ALT_O);
/*
* XXX This is bogus, as we can not assume any valid
* information present in the tm structure at this
* point to calculate a real value, so just check the
* range for now.
*/
if (!(_conv_num(&bp, &i, 0, 53)))
return (NULL);
break;
case 'w': /* The day of week, beginning on sunday. */
_LEGAL_ALT(_ALT_O);
if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
return (NULL);
fields |= FIELD_TM_WDAY;
break;
case 'u': /* The day of week, monday = 1. */
_LEGAL_ALT(_ALT_O);
if (!(_conv_num(&bp, &i, 1, 7)))
return (NULL);
tm->tm_wday = i % 7;
fields |= FIELD_TM_WDAY;
continue;
case 'g': /* The year corresponding to the ISO week
* number but without the century.
*/
if (!(_conv_num(&bp, &i, 0, 99)))
return (NULL);
continue;
case 'G': /* The year corresponding to the ISO week
* number with century.
*/
do
bp++;
while (isdigit(*bp));
continue;
case 'V': /* The ISO 8601:1988 week number as decimal */
if (!(_conv_num(&bp, &i, 0, 53)))
return (NULL);
continue;
case 'Y': /* The year. */
_LEGAL_ALT(_ALT_E);
if (!(_conv_num(&bp, &i, 0, 9999)))
return (NULL);
relyear = -1;
tm->tm_year = i - TM_YEAR_BASE;
fields |= FIELD_TM_YEAR;
break;
case 'y': /* The year within the century (2 digits). */
_LEGAL_ALT(_ALT_E | _ALT_O);
if (!(_conv_num(&bp, &relyear, 0, 99)))
return (NULL);
break;
case 'Z':
tzset();
if (strncmp((const char *)bp, gmt, 3) == 0) {
tm->tm_isdst = 0;
tm->tm_gmtoff = 0;
tm->tm_zone = gmt;
bp += 3;
} else if (strncmp((const char *)bp, utc, 3) == 0) {
tm->tm_isdst = 0;
tm->tm_gmtoff = 0;
tm->tm_zone = utc;
bp += 3;
} else {
ep = _find_string(bp, &i,
(const char * const *)tzname,
NULL, 2);
if (ep == NULL)
return (NULL);
tm->tm_isdst = i;
tm->tm_gmtoff = -(timezone);
tm->tm_zone = tzname[i];
bp = ep;
}
continue;
case 'z':
/*
* We recognize all ISO 8601 formats:
* Z = Zulu time/UTC
* [+-]hhmm
* [+-]hh:mm
* [+-]hh
* We recognize all RFC-822/RFC-2822 formats:
* UT|GMT
* North American : UTC offsets
* E[DS]T = Eastern : -4 | -5
* C[DS]T = Central : -5 | -6
* M[DS]T = Mountain: -6 | -7
* P[DS]T = Pacific : -7 | -8
*/
while (isspace(*bp))
bp++;
switch (*bp++) {
case 'G':
if (*bp++ != 'M')
return NULL;
/*FALLTHROUGH*/
case 'U':
if (*bp++ != 'T')
return NULL;
/*FALLTHROUGH*/
case 'Z':
tm->tm_isdst = 0;
tm->tm_gmtoff = 0;
tm->tm_zone = utc;
continue;
case '+':
neg = 0;
break;
case '-':
neg = 1;
break;
default:
--bp;
ep = _find_string(bp, &i, nast, NULL, 4);
if (ep != NULL) {
tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
tm->tm_zone = (char *)nast[i];
bp = ep;
continue;
}
ep = _find_string(bp, &i, nadt, NULL, 4);
if (ep != NULL) {
tm->tm_isdst = 1;
tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
tm->tm_zone = (char *)nadt[i];
bp = ep;
continue;
}
return NULL;
}
if (!isdigit(bp[0]) || !isdigit(bp[1]))
return NULL;
offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
bp += 2;
if (*bp == ':')
bp++;
if (isdigit(*bp)) {
offs += (*bp++ - '0') * 10 * SECSPERMIN;
if (!isdigit(*bp))
return NULL;
offs += (*bp++ - '0') * SECSPERMIN;
}
if (neg)
offs = -offs;
tm->tm_isdst = 0; /* XXX */
tm->tm_gmtoff = offs;
tm->tm_zone = NULL; /* XXX */
continue;
/*
* Miscellaneous conversions.
*/
case 'n': /* Any kind of white-space. */
case 't':
_LEGAL_ALT(0);
while (isspace(*bp))
bp++;
break;
default: /* Unknown/unsupported conversion. */
return (NULL);
}
}
/*
* We need to evaluate the two digit year spec (%y)
* last as we can get a century spec (%C) at any time.
*/
if (relyear != -1) {
if (century == TM_YEAR_BASE) {
if (relyear <= 68)
tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
else
tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
} else {
tm->tm_year = relyear + century - TM_YEAR_BASE;
}
fields |= FIELD_TM_YEAR;
}
/* Compute some missing values when possible. */
if (fields & FIELD_TM_YEAR) {
const int year = tm->tm_year + TM_YEAR_BASE;
const int *mon_lens = mon_lengths[isleap(year)];
if (!(fields & FIELD_TM_YDAY) &&
(fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
tm->tm_yday = tm->tm_mday - 1;
for (i = 0; i < tm->tm_mon; i++)
tm->tm_yday += mon_lens[i];
fields |= FIELD_TM_YDAY;
}
if (fields & FIELD_TM_YDAY) {
int days = tm->tm_yday;
if (!(fields & FIELD_TM_WDAY)) {
tm->tm_wday = EPOCH_WDAY +
((year - EPOCH_YEAR) % DAYSPERWEEK) *
(DAYSPERNYEAR % DAYSPERWEEK) +
leaps_thru_end_of(year - 1) -
leaps_thru_end_of(EPOCH_YEAR - 1) +
tm->tm_yday;
tm->tm_wday %= DAYSPERWEEK;
if (tm->tm_wday < 0)
tm->tm_wday += DAYSPERWEEK;
}
if (!(fields & FIELD_TM_MON)) {
tm->tm_mon = 0;
while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
days -= mon_lens[tm->tm_mon++];
}
if (!(fields & FIELD_TM_MDAY))
tm->tm_mday = days + 1;
}
}
return ((char *)bp);
}
static int
_conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
{
int result = 0;
int rulim = ulim;
if (**buf < '0' || **buf > '9')
return (0);
/* we use rulim to break out of the loop when we run out of digits */
do {
result *= 10;
result += *(*buf)++ - '0';
rulim /= 10;
} while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
if (result < llim || result > ulim)
return (0);
*dest = result;
return (1);
}
static int
epoch_to_tm(const unsigned char **buf, struct tm *tm)
{
int saved_errno = errno;
int ret = 0;
time_t secs;
char *ep;
errno = 0;
secs = strtoll(*buf, &ep, 10);
if (*buf == (unsigned char *)ep)
goto done;
if (secs < 0 ||
secs == LLONG_MAX && errno == ERANGE)
goto done;
if (localtime_r(&secs, tm) == NULL)
goto done;
ret = 1;
done:
*buf = ep;
errno = saved_errno;
return (ret);
}
static const u_char *
_find_string(const u_char *bp, int *tgt, const char * const *n1,
const char * const *n2, int c)
{
int i;
unsigned int len;
/* check full name - then abbreviated ones */
for (; n1 != NULL; n1 = n2, n2 = NULL) {
for (i = 0; i < c; i++, n1++) {
len = strlen(*n1);
if (strncasecmp(*n1, (const char *)bp, len) == 0) {
*tgt = i;
return bp + len;
}
}
}
/* Nothing matched */
return NULL;
}
static int
leaps_thru_end_of(const int y)
{
return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
-(leaps_thru_end_of(-(y + 1)) + 1);
}