From 0d3ba1f04701e8ba0f8bd0cd6a0f4c8bb9d558b5 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Wed, 6 Dec 2017 16:41:35 -0800 Subject: [PATCH] Add sscanf %[ tests and fix the bug that fell out. Strictly, POSIX says "If a '-' is in the scanlist and is not the first wide character, nor the second where the first wide character is a '^', nor the last wide character, the behavior is implementation-defined", but it seems unreasonable for swscanf to interpret `a-c` differently from sscanf. Make ours behave the same as each other by making swscanf work the same as sscanf. Bug: http://b/68672236 Test: ran tests Change-Id: Ia84805897628d7128e901b468e02504373730e61 --- libc/stdio/vfscanf.c | 6 --- libc/stdio/vfwscanf.c | 65 +++++++++++++++++++---------- tests/stdio_test.cpp | 96 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 124 insertions(+), 43 deletions(-) diff --git a/libc/stdio/vfscanf.c b/libc/stdio/vfscanf.c index 853e0cfed..887b4350d 100644 --- a/libc/stdio/vfscanf.c +++ b/libc/stdio/vfscanf.c @@ -83,9 +83,6 @@ #define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */ #define CT_FLOAT 4 /* floating, i.e., strtod */ -#define u_char unsigned char -#define u_long unsigned long - static u_char* __sccl(char*, u_char*); /* @@ -192,9 +189,6 @@ int __svfscanf(FILE* fp, const char* fmt0, __va_list ap) { /* * Conversions. * Those marked `compat' are for 4.[123]BSD compatibility. - * - * (According to ANSI, E and X formats are supposed - * to the same as e and x. Sorry about that.) */ case 'D': /* compat */ flags |= LONG; diff --git a/libc/stdio/vfwscanf.c b/libc/stdio/vfwscanf.c index 206f4a210..1030a62e0 100644 --- a/libc/stdio/vfwscanf.c +++ b/libc/stdio/vfwscanf.c @@ -84,12 +84,43 @@ #define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */ #define CT_FLOAT 4 /* floating, i.e., strtod */ -#define u_char unsigned char -#define u_long unsigned long +// An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would +// be a little too expensive, and some kind of compressed version isn't worth the trouble. +static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) { + // Is this a negated set? + bool member_result = true; + if (*ccl == '^') { + member_result = false; + ++ccl; + } -#define INCCL(_c) \ - (cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) \ - : (wmemchr(ccls, (_c), ccle - ccls) != NULL)) + // The first character may be ']' or '-' without being special. + if (*ccl == '-' || *ccl == ']') { + // A literal match? + if (*ccl == wc) return member_result; + ++ccl; + } + + while (*ccl && *ccl != ']') { + // The last character may be '-' without being special. + if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') { + wchar_t first = *(ccl - 1); + wchar_t last = *(ccl + 1); + if (first <= last) { + // In the range? + if (wc >= first && wc <= last) return member_result; + ccl += 2; + continue; + } + // A '-' is not considered to be part of a range if the character after + // is not greater than the character before, so fall through... + } + // A literal match? + if (*ccl == wc) return member_result; + ++ccl; + } + return !member_result; +} #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wframe-larger-than=" @@ -109,9 +140,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) int nread; /* number of characters consumed from fp */ int base; /* base argument to strtoimax/strtouimax */ wchar_t buf[BUF]; /* buffer for numeric conversions */ - const wchar_t* ccls; /* character class start */ - const wchar_t* ccle; /* character class end */ - int cclcompl; /* ccl is complemented? */ + const wchar_t* ccl; wint_t wi; /* handy wint_t */ char* mbp; /* multibyte string pointer for %c %s %[ */ size_t nconv; /* number of bytes in mb. conversion */ @@ -127,7 +156,6 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) nconversions = 0; nread = 0; base = 0; /* XXX just to keep gcc happy */ - ccls = ccle = NULL; for (;;) { c = *fmt++; if (c == 0) { @@ -210,9 +238,6 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) /* * Conversions. * Those marked `compat' are for 4.[123]BSD compatibility. - * - * (According to ANSI, E and X formats are supposed - * to the same as e and x. Sorry about that.) */ case 'D': /* compat */ flags |= LONG; @@ -266,15 +291,10 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) break; case '[': - ccls = fmt; - if (*fmt == '^') { - cclcompl = 1; - fmt++; - } else - cclcompl = 0; + ccl = fmt; + if (*fmt == '^') fmt++; if (*fmt == ']') fmt++; while (*fmt != '\0' && *fmt != ']') fmt++; - ccle = fmt; fmt++; flags |= NOSKIP; c = CT_CCL; @@ -387,12 +407,12 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) /* take only those things in the class */ if ((flags & SUPPRESS) && (flags & LONG)) { n = 0; - while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && INCCL(wi)) n++; + while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && in_ccl(wi, ccl)) n++; if (wi != WEOF) __ungetwc(wi, fp); if (n == 0) goto match_failure; } else if (flags & LONG) { p0 = p = va_arg(ap, wchar_t*); - while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && INCCL(wi)) + while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && in_ccl(wi, ccl)) *p++ = (wchar_t)wi; if (wi != WEOF) __ungetwc(wi, fp); n = p - p0; @@ -403,7 +423,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*); n = 0; memset(&mbs, 0, sizeof(mbs)); - while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && INCCL(wi)) { + while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && in_ccl(wi, ccl)) { if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) { nconv = wcrtomb(mbp, wi, &mbs); if (nconv == (size_t)-1) goto input_failure; @@ -418,6 +438,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) n++; } if (wi != WEOF) __ungetwc(wi, fp); + if (n == 0) goto match_failure; if (!(flags & SUPPRESS)) { *mbp = 0; nassigned++; diff --git a/tests/stdio_test.cpp b/tests/stdio_test.cpp index 05a19d103..f0e0ab6d4 100644 --- a/tests/stdio_test.cpp +++ b/tests/stdio_test.cpp @@ -923,33 +923,99 @@ TEST(STDIO_TEST, putc) { TEST(STDIO_TEST, sscanf_swscanf) { struct stuff { char s1[123]; - int i1; + int i1, i2; + char cs1[3]; + char s2[3]; + char c1; double d1; float f1; - char s2[123]; + char s3[123]; void Check() { - ASSERT_STREQ("hello", s1); - ASSERT_EQ(123, i1); - ASSERT_DOUBLE_EQ(1.23, d1); - ASSERT_FLOAT_EQ(9.0f, f1); - ASSERT_STREQ("world", s2); + EXPECT_STREQ("hello", s1); + EXPECT_EQ(123, i1); + EXPECT_EQ(456, i2); + EXPECT_EQ('a', cs1[0]); + EXPECT_EQ('b', cs1[1]); + EXPECT_EQ('x', cs1[2]); // No terminating NUL. + EXPECT_STREQ("AB", s2); // Terminating NUL. + EXPECT_EQ('!', c1); + EXPECT_DOUBLE_EQ(1.23, d1); + EXPECT_FLOAT_EQ(9.0f, f1); + EXPECT_STREQ("world", s3); } } s; - memset(&s, 0, sizeof(s)); - ASSERT_EQ(5, sscanf(" hello 123 1.23 0x1.2p3 world", - "%s %i %lf %f %s", - s.s1, &s.i1, &s.d1, &s.f1, s.s2)); + memset(&s, 'x', sizeof(s)); + ASSERT_EQ(9, sscanf(" hello 123 456abAB! 1.23 0x1.2p3 world", + "%s %i%i%2c%[A-Z]%c %lf %f %s", + s.s1, &s.i1, &s.i2, s.cs1, s.s2, &s.c1, &s.d1, &s.f1, s.s3)); s.Check(); - memset(&s, 0, sizeof(s)); - ASSERT_EQ(5, swscanf(L" hello 123 1.23 0x1.2p3 world", - L"%s %i %lf %f %s", - s.s1, &s.i1, &s.d1, &s.f1, s.s2)); + memset(&s, 'x', sizeof(s)); + ASSERT_EQ(9, swscanf(L" hello 123 456abAB! 1.23 0x1.2p3 world", + L"%s %i%i%2c%[A-Z]%c %lf %f %s", + s.s1, &s.i1, &s.i2, s.cs1, s.s2, &s.c1, &s.d1, &s.f1, s.s3)); s.Check(); } +template +static void CheckScanf(int sscanf_fn(const T*, const T*, ...), + const T* input, const T* fmt, + int expected_count, const char* expected_string) { + char buf[256] = {}; + ASSERT_EQ(expected_count, sscanf_fn(input, fmt, &buf)) << fmt; + ASSERT_STREQ(expected_string, buf) << fmt; +} + +TEST(STDIO_TEST, sscanf_ccl) { + // `abc` is just those characters. + CheckScanf(sscanf, "abcd", "%[abc]", 1, "abc"); + // `a-c` is the range 'a' .. 'c'. + CheckScanf(sscanf, "abcd", "%[a-c]", 1, "abc"); + CheckScanf(sscanf, "-d", "%[a-c]", 0, ""); + CheckScanf(sscanf, "ac-bAd", "%[a--c]", 1, "ac-bA"); + // `a-c-e` is equivalent to `a-e`. + CheckScanf(sscanf, "abcdefg", "%[a-c-e]", 1, "abcde"); + // `e-a` is equivalent to `ae-` (because 'e' > 'a'). + CheckScanf(sscanf, "-a-e-b", "%[e-a]", 1, "-a-e-"); + // An initial '^' negates the set. + CheckScanf(sscanf, "abcde", "%[^d]", 1, "abc"); + CheckScanf(sscanf, "abcdefgh", "%[^c-d]", 1, "ab"); + CheckScanf(sscanf, "hgfedcba", "%[^c-d]", 1, "hgfe"); + // The first character may be ']' or '-' without being special. + CheckScanf(sscanf, "[[]]x", "%[][]", 1, "[[]]"); + CheckScanf(sscanf, "-a-x", "%[-a]", 1, "-a-"); + // The last character may be '-' without being special. + CheckScanf(sscanf, "-a-x", "%[a-]", 1, "-a-"); + // X--Y is [X--] + Y, not [X--] + [--Y] (a bug in my initial implementation). + CheckScanf(sscanf, "+,-/.", "%[+--/]", 1, "+,-/"); +} + +TEST(STDIO_TEST, swscanf_ccl) { + // `abc` is just those characters. + CheckScanf(swscanf, L"abcd", L"%[abc]", 1, "abc"); + // `a-c` is the range 'a' .. 'c'. + CheckScanf(swscanf, L"abcd", L"%[a-c]", 1, "abc"); + CheckScanf(swscanf, L"-d", L"%[a-c]", 0, ""); + CheckScanf(swscanf, L"ac-bAd", L"%[a--c]", 1, "ac-bA"); + // `a-c-e` is equivalent to `a-e`. + CheckScanf(swscanf, L"abcdefg", L"%[a-c-e]", 1, "abcde"); + // `e-a` is equivalent to `ae-` (because 'e' > 'a'). + CheckScanf(swscanf, L"-a-e-b", L"%[e-a]", 1, "-a-e-"); + // An initial '^' negates the set. + CheckScanf(swscanf, L"abcde", L"%[^d]", 1, "abc"); + CheckScanf(swscanf, L"abcdefgh", L"%[^c-d]", 1, "ab"); + CheckScanf(swscanf, L"hgfedcba", L"%[^c-d]", 1, "hgfe"); + // The first character may be ']' or '-' without being special. + CheckScanf(swscanf, L"[[]]x", L"%[][]", 1, "[[]]"); + CheckScanf(swscanf, L"-a-x", L"%[-a]", 1, "-a-"); + // The last character may be '-' without being special. + CheckScanf(swscanf, L"-a-x", L"%[a-]", 1, "-a-"); + // X--Y is [X--] + Y, not [X--] + [--Y] (a bug in my initial implementation). + CheckScanf(swscanf, L"+,-/.", L"%[+--/]", 1, "+,-/"); +} + TEST(STDIO_TEST, cantwrite_EBADF) { // If we open a file read-only... FILE* fp = fopen("/proc/version", "r");