platform_bionic/libc/stdio/parsefloat.c
Elliott Hughes 7f0849fd11 Fix sscanf/wcstod parsing of NaNs.
The parsefloat routines -- which let us pass NaNs and infinities on to
strto(f|d|ld) -- come from NetBSD.

Also fix LP64's strtold to return a NaN, and fix all the architectures
to return quiet NaNs.

Also fix wcstof/wcstod/wcstold to use parsefloat so they support hex
floats.

Lots of new tests.

Bug: http://b/31101647
Change-Id: Id7d46ac2d8acb8770b5e8c445e87cfabfde6f111
2016-09-07 15:01:54 -07:00

336 lines
7.8 KiB
C

/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Chris Torek.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <ctype.h>
#include <stdlib.h>
#include "local.h"
#include "floatio.h"
#define BUF 513 /* Maximum length of numeric string. */
size_t parsefloat(FILE *fp, char *buf, char *end) {
char *commit, *p;
int infnanpos = 0;
enum {
S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
} state = S_START;
unsigned char c;
int gotmantdig = 0, ishex = 0;
/*
* We set commit = p whenever the string we have read so far
* constitutes a valid representation of a floating point
* number by itself. At some point, the parse will complete
* or fail, and we will ungetc() back to the last commit point.
* To ensure that the file offset gets updated properly, it is
* always necessary to read at least one character that doesn't
* match; thus, we can't short-circuit "infinity" or "nan(...)".
*/
commit = buf - 1;
for (p = buf; p < end; ) {
c = *fp->_p;
reswitch:
switch (state) {
case S_START:
state = S_GOTSIGN;
if (c == '-' || c == '+')
break;
else
goto reswitch;
case S_GOTSIGN:
switch (c) {
case '0':
state = S_MAYBEHEX;
commit = p;
break;
case 'I':
case 'i':
state = S_INF;
break;
case 'N':
case 'n':
state = S_NAN;
break;
default:
state = S_DIGITS;
goto reswitch;
}
break;
case S_INF:
if (infnanpos > 6 ||
(c != "nfinity"[infnanpos] &&
c != "NFINITY"[infnanpos]))
goto parsedone;
if (infnanpos == 1 || infnanpos == 6)
commit = p; /* inf or infinity */
infnanpos++;
break;
case S_NAN:
switch (infnanpos) {
case -1: /* XXX kludge to deal with nan(...) */
goto parsedone;
case 0:
if (c != 'A' && c != 'a')
goto parsedone;
break;
case 1:
if (c != 'N' && c != 'n')
goto parsedone;
else
commit = p;
break;
case 2:
if (c != '(')
goto parsedone;
break;
default:
if (c == ')') {
commit = p;
infnanpos = -2;
} else if (!isalnum(c) && c != '_')
goto parsedone;
break;
}
infnanpos++;
break;
case S_MAYBEHEX:
state = S_DIGITS;
if (c == 'X' || c == 'x') {
ishex = 1;
break;
} else { /* we saw a '0', but no 'x' */
gotmantdig = 1;
goto reswitch;
}
case S_DIGITS:
if ((ishex && isxdigit(c)) || isdigit(c))
gotmantdig = 1;
else {
state = S_FRAC;
if (c != '.')
goto reswitch;
}
if (gotmantdig)
commit = p;
break;
case S_FRAC:
if (((c == 'E' || c == 'e') && !ishex) ||
((c == 'P' || c == 'p') && ishex)) {
if (!gotmantdig)
goto parsedone;
else
state = S_EXP;
} else if ((ishex && isxdigit(c)) || isdigit(c)) {
commit = p;
gotmantdig = 1;
} else
goto parsedone;
break;
case S_EXP:
state = S_EXPDIGITS;
if (c == '-' || c == '+')
break;
else
goto reswitch;
case S_EXPDIGITS:
if (isdigit(c))
commit = p;
else
goto parsedone;
break;
default:
abort();
}
*p++ = c;
if (--fp->_r > 0)
fp->_p++;
else if (__srefill(fp))
break; /* EOF */
}
parsedone:
while (commit < --p)
(void)ungetc(*(unsigned char *)p, fp);
*++commit = '\0';
return commit - buf;
}
size_t wparsefloat(FILE *fp, wchar_t *buf, wchar_t *end) {
wchar_t *commit, *p;
int infnanpos = 0;
enum {
S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
} state = S_START;
wint_t c;
int gotmantdig = 0, ishex = 0;
/*
* We set commit = p whenever the string we have read so far
* constitutes a valid representation of a floating point
* number by itself. At some point, the parse will complete
* or fail, and we will ungetc() back to the last commit point.
* To ensure that the file offset gets updated properly, it is
* always necessary to read at least one character that doesn't
* match; thus, we can't short-circuit "infinity" or "nan(...)".
*/
commit = buf - 1;
c = WEOF;
for (p = buf; p < end; ) {
if ((c = __fgetwc_unlock(fp)) == WEOF)
break;
reswitch:
switch (state) {
case S_START:
state = S_GOTSIGN;
if (c == '-' || c == '+')
break;
else
goto reswitch;
case S_GOTSIGN:
switch (c) {
case '0':
state = S_MAYBEHEX;
commit = p;
break;
case 'I':
case 'i':
state = S_INF;
break;
case 'N':
case 'n':
state = S_NAN;
break;
default:
state = S_DIGITS;
goto reswitch;
}
break;
case S_INF:
if (infnanpos > 6 ||
(c != (wint_t)"nfinity"[infnanpos] &&
c != (wint_t)"NFINITY"[infnanpos]))
goto parsedone;
if (infnanpos == 1 || infnanpos == 6)
commit = p; /* inf or infinity */
infnanpos++;
break;
case S_NAN:
switch (infnanpos) {
case -1: /* XXX kludge to deal with nan(...) */
goto parsedone;
case 0:
if (c != 'A' && c != 'a')
goto parsedone;
break;
case 1:
if (c != 'N' && c != 'n')
goto parsedone;
else
commit = p;
break;
case 2:
if (c != '(')
goto parsedone;
break;
default:
if (c == ')') {
commit = p;
infnanpos = -2;
} else if (!iswalnum(c) && c != '_')
goto parsedone;
break;
}
infnanpos++;
break;
case S_MAYBEHEX:
state = S_DIGITS;
if (c == 'X' || c == 'x') {
ishex = 1;
break;
} else { /* we saw a '0', but no 'x' */
gotmantdig = 1;
goto reswitch;
}
case S_DIGITS:
if ((ishex && iswxdigit(c)) || iswdigit(c))
gotmantdig = 1;
else {
state = S_FRAC;
if (c != L'.')
goto reswitch;
}
if (gotmantdig)
commit = p;
break;
case S_FRAC:
if (((c == 'E' || c == 'e') && !ishex) ||
((c == 'P' || c == 'p') && ishex)) {
if (!gotmantdig)
goto parsedone;
else
state = S_EXP;
} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
commit = p;
gotmantdig = 1;
} else
goto parsedone;
break;
case S_EXP:
state = S_EXPDIGITS;
if (c == '-' || c == '+')
break;
else
goto reswitch;
case S_EXPDIGITS:
if (iswdigit(c))
commit = p;
else
goto parsedone;
break;
default:
abort();
}
*p++ = c;
c = WEOF;
}
parsedone:
if (c != WEOF)
ungetwc(c, fp);
while (commit < --p)
ungetwc(*p, fp);
*++commit = '\0';
return (int)(commit - buf);
}