Add sscanf %[ tests and fix the bug that fell out.

Strictly, POSIX says "If a '-' is in the scanlist and is not the first
wide character, nor the second where the first wide character is a '^',
nor the last wide character, the behavior is implementation-defined",
but it seems unreasonable for swscanf to interpret `a-c` differently
from sscanf. Make ours behave the same as each other by making swscanf
work the same as sscanf.

Bug: http://b/68672236
Test: ran tests
Change-Id: Ia84805897628d7128e901b468e02504373730e61
This commit is contained in:
Elliott Hughes 2017-12-06 16:41:35 -08:00
parent c1f6219c32
commit 0d3ba1f047
3 changed files with 124 additions and 43 deletions

View file

@ -83,9 +83,6 @@
#define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */
#define CT_FLOAT 4 /* floating, i.e., strtod */
#define u_char unsigned char
#define u_long unsigned long
static u_char* __sccl(char*, u_char*);
/*
@ -192,9 +189,6 @@ int __svfscanf(FILE* fp, const char* fmt0, __va_list ap) {
/*
* Conversions.
* Those marked `compat' are for 4.[123]BSD compatibility.
*
* (According to ANSI, E and X formats are supposed
* to the same as e and x. Sorry about that.)
*/
case 'D': /* compat */
flags |= LONG;

View file

@ -84,12 +84,43 @@
#define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */
#define CT_FLOAT 4 /* floating, i.e., strtod */
#define u_char unsigned char
#define u_long unsigned long
// An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
// be a little too expensive, and some kind of compressed version isn't worth the trouble.
static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
// Is this a negated set?
bool member_result = true;
if (*ccl == '^') {
member_result = false;
++ccl;
}
#define INCCL(_c) \
(cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) \
: (wmemchr(ccls, (_c), ccle - ccls) != NULL))
// The first character may be ']' or '-' without being special.
if (*ccl == '-' || *ccl == ']') {
// A literal match?
if (*ccl == wc) return member_result;
++ccl;
}
while (*ccl && *ccl != ']') {
// The last character may be '-' without being special.
if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
wchar_t first = *(ccl - 1);
wchar_t last = *(ccl + 1);
if (first <= last) {
// In the range?
if (wc >= first && wc <= last) return member_result;
ccl += 2;
continue;
}
// A '-' is not considered to be part of a range if the character after
// is not greater than the character before, so fall through...
}
// A literal match?
if (*ccl == wc) return member_result;
++ccl;
}
return !member_result;
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wframe-larger-than="
@ -109,9 +140,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
int nread; /* number of characters consumed from fp */
int base; /* base argument to strtoimax/strtouimax */
wchar_t buf[BUF]; /* buffer for numeric conversions */
const wchar_t* ccls; /* character class start */
const wchar_t* ccle; /* character class end */
int cclcompl; /* ccl is complemented? */
const wchar_t* ccl;
wint_t wi; /* handy wint_t */
char* mbp; /* multibyte string pointer for %c %s %[ */
size_t nconv; /* number of bytes in mb. conversion */
@ -127,7 +156,6 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
nconversions = 0;
nread = 0;
base = 0; /* XXX just to keep gcc happy */
ccls = ccle = NULL;
for (;;) {
c = *fmt++;
if (c == 0) {
@ -210,9 +238,6 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
/*
* Conversions.
* Those marked `compat' are for 4.[123]BSD compatibility.
*
* (According to ANSI, E and X formats are supposed
* to the same as e and x. Sorry about that.)
*/
case 'D': /* compat */
flags |= LONG;
@ -266,15 +291,10 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
break;
case '[':
ccls = fmt;
if (*fmt == '^') {
cclcompl = 1;
fmt++;
} else
cclcompl = 0;
ccl = fmt;
if (*fmt == '^') fmt++;
if (*fmt == ']') fmt++;
while (*fmt != '\0' && *fmt != ']') fmt++;
ccle = fmt;
fmt++;
flags |= NOSKIP;
c = CT_CCL;
@ -387,12 +407,12 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
/* take only those things in the class */
if ((flags & SUPPRESS) && (flags & LONG)) {
n = 0;
while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && INCCL(wi)) n++;
while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && in_ccl(wi, ccl)) n++;
if (wi != WEOF) __ungetwc(wi, fp);
if (n == 0) goto match_failure;
} else if (flags & LONG) {
p0 = p = va_arg(ap, wchar_t*);
while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && INCCL(wi))
while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && in_ccl(wi, ccl))
*p++ = (wchar_t)wi;
if (wi != WEOF) __ungetwc(wi, fp);
n = p - p0;
@ -403,7 +423,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
n = 0;
memset(&mbs, 0, sizeof(mbs));
while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && INCCL(wi)) {
while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && in_ccl(wi, ccl)) {
if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
nconv = wcrtomb(mbp, wi, &mbs);
if (nconv == (size_t)-1) goto input_failure;
@ -418,6 +438,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
n++;
}
if (wi != WEOF) __ungetwc(wi, fp);
if (n == 0) goto match_failure;
if (!(flags & SUPPRESS)) {
*mbp = 0;
nassigned++;

View file

@ -923,33 +923,99 @@ TEST(STDIO_TEST, putc) {
TEST(STDIO_TEST, sscanf_swscanf) {
struct stuff {
char s1[123];
int i1;
int i1, i2;
char cs1[3];
char s2[3];
char c1;
double d1;
float f1;
char s2[123];
char s3[123];
void Check() {
ASSERT_STREQ("hello", s1);
ASSERT_EQ(123, i1);
ASSERT_DOUBLE_EQ(1.23, d1);
ASSERT_FLOAT_EQ(9.0f, f1);
ASSERT_STREQ("world", s2);
EXPECT_STREQ("hello", s1);
EXPECT_EQ(123, i1);
EXPECT_EQ(456, i2);
EXPECT_EQ('a', cs1[0]);
EXPECT_EQ('b', cs1[1]);
EXPECT_EQ('x', cs1[2]); // No terminating NUL.
EXPECT_STREQ("AB", s2); // Terminating NUL.
EXPECT_EQ('!', c1);
EXPECT_DOUBLE_EQ(1.23, d1);
EXPECT_FLOAT_EQ(9.0f, f1);
EXPECT_STREQ("world", s3);
}
} s;
memset(&s, 0, sizeof(s));
ASSERT_EQ(5, sscanf(" hello 123 1.23 0x1.2p3 world",
"%s %i %lf %f %s",
s.s1, &s.i1, &s.d1, &s.f1, s.s2));
memset(&s, 'x', sizeof(s));
ASSERT_EQ(9, sscanf(" hello 123 456abAB! 1.23 0x1.2p3 world",
"%s %i%i%2c%[A-Z]%c %lf %f %s",
s.s1, &s.i1, &s.i2, s.cs1, s.s2, &s.c1, &s.d1, &s.f1, s.s3));
s.Check();
memset(&s, 0, sizeof(s));
ASSERT_EQ(5, swscanf(L" hello 123 1.23 0x1.2p3 world",
L"%s %i %lf %f %s",
s.s1, &s.i1, &s.d1, &s.f1, s.s2));
memset(&s, 'x', sizeof(s));
ASSERT_EQ(9, swscanf(L" hello 123 456abAB! 1.23 0x1.2p3 world",
L"%s %i%i%2c%[A-Z]%c %lf %f %s",
s.s1, &s.i1, &s.i2, s.cs1, s.s2, &s.c1, &s.d1, &s.f1, s.s3));
s.Check();
}
template <typename T>
static void CheckScanf(int sscanf_fn(const T*, const T*, ...),
const T* input, const T* fmt,
int expected_count, const char* expected_string) {
char buf[256] = {};
ASSERT_EQ(expected_count, sscanf_fn(input, fmt, &buf)) << fmt;
ASSERT_STREQ(expected_string, buf) << fmt;
}
TEST(STDIO_TEST, sscanf_ccl) {
// `abc` is just those characters.
CheckScanf(sscanf, "abcd", "%[abc]", 1, "abc");
// `a-c` is the range 'a' .. 'c'.
CheckScanf(sscanf, "abcd", "%[a-c]", 1, "abc");
CheckScanf(sscanf, "-d", "%[a-c]", 0, "");
CheckScanf(sscanf, "ac-bAd", "%[a--c]", 1, "ac-bA");
// `a-c-e` is equivalent to `a-e`.
CheckScanf(sscanf, "abcdefg", "%[a-c-e]", 1, "abcde");
// `e-a` is equivalent to `ae-` (because 'e' > 'a').
CheckScanf(sscanf, "-a-e-b", "%[e-a]", 1, "-a-e-");
// An initial '^' negates the set.
CheckScanf(sscanf, "abcde", "%[^d]", 1, "abc");
CheckScanf(sscanf, "abcdefgh", "%[^c-d]", 1, "ab");
CheckScanf(sscanf, "hgfedcba", "%[^c-d]", 1, "hgfe");
// The first character may be ']' or '-' without being special.
CheckScanf(sscanf, "[[]]x", "%[][]", 1, "[[]]");
CheckScanf(sscanf, "-a-x", "%[-a]", 1, "-a-");
// The last character may be '-' without being special.
CheckScanf(sscanf, "-a-x", "%[a-]", 1, "-a-");
// X--Y is [X--] + Y, not [X--] + [--Y] (a bug in my initial implementation).
CheckScanf(sscanf, "+,-/.", "%[+--/]", 1, "+,-/");
}
TEST(STDIO_TEST, swscanf_ccl) {
// `abc` is just those characters.
CheckScanf(swscanf, L"abcd", L"%[abc]", 1, "abc");
// `a-c` is the range 'a' .. 'c'.
CheckScanf(swscanf, L"abcd", L"%[a-c]", 1, "abc");
CheckScanf(swscanf, L"-d", L"%[a-c]", 0, "");
CheckScanf(swscanf, L"ac-bAd", L"%[a--c]", 1, "ac-bA");
// `a-c-e` is equivalent to `a-e`.
CheckScanf(swscanf, L"abcdefg", L"%[a-c-e]", 1, "abcde");
// `e-a` is equivalent to `ae-` (because 'e' > 'a').
CheckScanf(swscanf, L"-a-e-b", L"%[e-a]", 1, "-a-e-");
// An initial '^' negates the set.
CheckScanf(swscanf, L"abcde", L"%[^d]", 1, "abc");
CheckScanf(swscanf, L"abcdefgh", L"%[^c-d]", 1, "ab");
CheckScanf(swscanf, L"hgfedcba", L"%[^c-d]", 1, "hgfe");
// The first character may be ']' or '-' without being special.
CheckScanf(swscanf, L"[[]]x", L"%[][]", 1, "[[]]");
CheckScanf(swscanf, L"-a-x", L"%[-a]", 1, "-a-");
// The last character may be '-' without being special.
CheckScanf(swscanf, L"-a-x", L"%[a-]", 1, "-a-");
// X--Y is [X--] + Y, not [X--] + [--Y] (a bug in my initial implementation).
CheckScanf(swscanf, L"+,-/.", L"%[+--/]", 1, "+,-/");
}
TEST(STDIO_TEST, cantwrite_EBADF) {
// If we open a file read-only...
FILE* fp = fopen("/proc/version", "r");