Add sscanf %[ tests and fix the bug that fell out.
Strictly, POSIX says "If a '-' is in the scanlist and is not the first wide character, nor the second where the first wide character is a '^', nor the last wide character, the behavior is implementation-defined", but it seems unreasonable for swscanf to interpret `a-c` differently from sscanf. Make ours behave the same as each other by making swscanf work the same as sscanf. Bug: http://b/68672236 Test: ran tests Change-Id: Ia84805897628d7128e901b468e02504373730e61
This commit is contained in:
parent
c1f6219c32
commit
0d3ba1f047
3 changed files with 124 additions and 43 deletions
|
@ -83,9 +83,6 @@
|
|||
#define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */
|
||||
#define CT_FLOAT 4 /* floating, i.e., strtod */
|
||||
|
||||
#define u_char unsigned char
|
||||
#define u_long unsigned long
|
||||
|
||||
static u_char* __sccl(char*, u_char*);
|
||||
|
||||
/*
|
||||
|
@ -192,9 +189,6 @@ int __svfscanf(FILE* fp, const char* fmt0, __va_list ap) {
|
|||
/*
|
||||
* Conversions.
|
||||
* Those marked `compat' are for 4.[123]BSD compatibility.
|
||||
*
|
||||
* (According to ANSI, E and X formats are supposed
|
||||
* to the same as e and x. Sorry about that.)
|
||||
*/
|
||||
case 'D': /* compat */
|
||||
flags |= LONG;
|
||||
|
|
|
@ -84,12 +84,43 @@
|
|||
#define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */
|
||||
#define CT_FLOAT 4 /* floating, i.e., strtod */
|
||||
|
||||
#define u_char unsigned char
|
||||
#define u_long unsigned long
|
||||
// An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
|
||||
// be a little too expensive, and some kind of compressed version isn't worth the trouble.
|
||||
static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
|
||||
// Is this a negated set?
|
||||
bool member_result = true;
|
||||
if (*ccl == '^') {
|
||||
member_result = false;
|
||||
++ccl;
|
||||
}
|
||||
|
||||
#define INCCL(_c) \
|
||||
(cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) \
|
||||
: (wmemchr(ccls, (_c), ccle - ccls) != NULL))
|
||||
// The first character may be ']' or '-' without being special.
|
||||
if (*ccl == '-' || *ccl == ']') {
|
||||
// A literal match?
|
||||
if (*ccl == wc) return member_result;
|
||||
++ccl;
|
||||
}
|
||||
|
||||
while (*ccl && *ccl != ']') {
|
||||
// The last character may be '-' without being special.
|
||||
if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
|
||||
wchar_t first = *(ccl - 1);
|
||||
wchar_t last = *(ccl + 1);
|
||||
if (first <= last) {
|
||||
// In the range?
|
||||
if (wc >= first && wc <= last) return member_result;
|
||||
ccl += 2;
|
||||
continue;
|
||||
}
|
||||
// A '-' is not considered to be part of a range if the character after
|
||||
// is not greater than the character before, so fall through...
|
||||
}
|
||||
// A literal match?
|
||||
if (*ccl == wc) return member_result;
|
||||
++ccl;
|
||||
}
|
||||
return !member_result;
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wframe-larger-than="
|
||||
|
@ -109,9 +140,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
|
|||
int nread; /* number of characters consumed from fp */
|
||||
int base; /* base argument to strtoimax/strtouimax */
|
||||
wchar_t buf[BUF]; /* buffer for numeric conversions */
|
||||
const wchar_t* ccls; /* character class start */
|
||||
const wchar_t* ccle; /* character class end */
|
||||
int cclcompl; /* ccl is complemented? */
|
||||
const wchar_t* ccl;
|
||||
wint_t wi; /* handy wint_t */
|
||||
char* mbp; /* multibyte string pointer for %c %s %[ */
|
||||
size_t nconv; /* number of bytes in mb. conversion */
|
||||
|
@ -127,7 +156,6 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
|
|||
nconversions = 0;
|
||||
nread = 0;
|
||||
base = 0; /* XXX just to keep gcc happy */
|
||||
ccls = ccle = NULL;
|
||||
for (;;) {
|
||||
c = *fmt++;
|
||||
if (c == 0) {
|
||||
|
@ -210,9 +238,6 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
|
|||
/*
|
||||
* Conversions.
|
||||
* Those marked `compat' are for 4.[123]BSD compatibility.
|
||||
*
|
||||
* (According to ANSI, E and X formats are supposed
|
||||
* to the same as e and x. Sorry about that.)
|
||||
*/
|
||||
case 'D': /* compat */
|
||||
flags |= LONG;
|
||||
|
@ -266,15 +291,10 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
|
|||
break;
|
||||
|
||||
case '[':
|
||||
ccls = fmt;
|
||||
if (*fmt == '^') {
|
||||
cclcompl = 1;
|
||||
fmt++;
|
||||
} else
|
||||
cclcompl = 0;
|
||||
ccl = fmt;
|
||||
if (*fmt == '^') fmt++;
|
||||
if (*fmt == ']') fmt++;
|
||||
while (*fmt != '\0' && *fmt != ']') fmt++;
|
||||
ccle = fmt;
|
||||
fmt++;
|
||||
flags |= NOSKIP;
|
||||
c = CT_CCL;
|
||||
|
@ -387,12 +407,12 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
|
|||
/* take only those things in the class */
|
||||
if ((flags & SUPPRESS) && (flags & LONG)) {
|
||||
n = 0;
|
||||
while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && INCCL(wi)) n++;
|
||||
while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && in_ccl(wi, ccl)) n++;
|
||||
if (wi != WEOF) __ungetwc(wi, fp);
|
||||
if (n == 0) goto match_failure;
|
||||
} else if (flags & LONG) {
|
||||
p0 = p = va_arg(ap, wchar_t*);
|
||||
while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && INCCL(wi))
|
||||
while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && in_ccl(wi, ccl))
|
||||
*p++ = (wchar_t)wi;
|
||||
if (wi != WEOF) __ungetwc(wi, fp);
|
||||
n = p - p0;
|
||||
|
@ -403,7 +423,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
|
|||
if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
|
||||
n = 0;
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && INCCL(wi)) {
|
||||
while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && in_ccl(wi, ccl)) {
|
||||
if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
|
||||
nconv = wcrtomb(mbp, wi, &mbs);
|
||||
if (nconv == (size_t)-1) goto input_failure;
|
||||
|
@ -418,6 +438,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap)
|
|||
n++;
|
||||
}
|
||||
if (wi != WEOF) __ungetwc(wi, fp);
|
||||
if (n == 0) goto match_failure;
|
||||
if (!(flags & SUPPRESS)) {
|
||||
*mbp = 0;
|
||||
nassigned++;
|
||||
|
|
|
@ -923,33 +923,99 @@ TEST(STDIO_TEST, putc) {
|
|||
TEST(STDIO_TEST, sscanf_swscanf) {
|
||||
struct stuff {
|
||||
char s1[123];
|
||||
int i1;
|
||||
int i1, i2;
|
||||
char cs1[3];
|
||||
char s2[3];
|
||||
char c1;
|
||||
double d1;
|
||||
float f1;
|
||||
char s2[123];
|
||||
char s3[123];
|
||||
|
||||
void Check() {
|
||||
ASSERT_STREQ("hello", s1);
|
||||
ASSERT_EQ(123, i1);
|
||||
ASSERT_DOUBLE_EQ(1.23, d1);
|
||||
ASSERT_FLOAT_EQ(9.0f, f1);
|
||||
ASSERT_STREQ("world", s2);
|
||||
EXPECT_STREQ("hello", s1);
|
||||
EXPECT_EQ(123, i1);
|
||||
EXPECT_EQ(456, i2);
|
||||
EXPECT_EQ('a', cs1[0]);
|
||||
EXPECT_EQ('b', cs1[1]);
|
||||
EXPECT_EQ('x', cs1[2]); // No terminating NUL.
|
||||
EXPECT_STREQ("AB", s2); // Terminating NUL.
|
||||
EXPECT_EQ('!', c1);
|
||||
EXPECT_DOUBLE_EQ(1.23, d1);
|
||||
EXPECT_FLOAT_EQ(9.0f, f1);
|
||||
EXPECT_STREQ("world", s3);
|
||||
}
|
||||
} s;
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
ASSERT_EQ(5, sscanf(" hello 123 1.23 0x1.2p3 world",
|
||||
"%s %i %lf %f %s",
|
||||
s.s1, &s.i1, &s.d1, &s.f1, s.s2));
|
||||
memset(&s, 'x', sizeof(s));
|
||||
ASSERT_EQ(9, sscanf(" hello 123 456abAB! 1.23 0x1.2p3 world",
|
||||
"%s %i%i%2c%[A-Z]%c %lf %f %s",
|
||||
s.s1, &s.i1, &s.i2, s.cs1, s.s2, &s.c1, &s.d1, &s.f1, s.s3));
|
||||
s.Check();
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
ASSERT_EQ(5, swscanf(L" hello 123 1.23 0x1.2p3 world",
|
||||
L"%s %i %lf %f %s",
|
||||
s.s1, &s.i1, &s.d1, &s.f1, s.s2));
|
||||
memset(&s, 'x', sizeof(s));
|
||||
ASSERT_EQ(9, swscanf(L" hello 123 456abAB! 1.23 0x1.2p3 world",
|
||||
L"%s %i%i%2c%[A-Z]%c %lf %f %s",
|
||||
s.s1, &s.i1, &s.i2, s.cs1, s.s2, &s.c1, &s.d1, &s.f1, s.s3));
|
||||
s.Check();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void CheckScanf(int sscanf_fn(const T*, const T*, ...),
|
||||
const T* input, const T* fmt,
|
||||
int expected_count, const char* expected_string) {
|
||||
char buf[256] = {};
|
||||
ASSERT_EQ(expected_count, sscanf_fn(input, fmt, &buf)) << fmt;
|
||||
ASSERT_STREQ(expected_string, buf) << fmt;
|
||||
}
|
||||
|
||||
TEST(STDIO_TEST, sscanf_ccl) {
|
||||
// `abc` is just those characters.
|
||||
CheckScanf(sscanf, "abcd", "%[abc]", 1, "abc");
|
||||
// `a-c` is the range 'a' .. 'c'.
|
||||
CheckScanf(sscanf, "abcd", "%[a-c]", 1, "abc");
|
||||
CheckScanf(sscanf, "-d", "%[a-c]", 0, "");
|
||||
CheckScanf(sscanf, "ac-bAd", "%[a--c]", 1, "ac-bA");
|
||||
// `a-c-e` is equivalent to `a-e`.
|
||||
CheckScanf(sscanf, "abcdefg", "%[a-c-e]", 1, "abcde");
|
||||
// `e-a` is equivalent to `ae-` (because 'e' > 'a').
|
||||
CheckScanf(sscanf, "-a-e-b", "%[e-a]", 1, "-a-e-");
|
||||
// An initial '^' negates the set.
|
||||
CheckScanf(sscanf, "abcde", "%[^d]", 1, "abc");
|
||||
CheckScanf(sscanf, "abcdefgh", "%[^c-d]", 1, "ab");
|
||||
CheckScanf(sscanf, "hgfedcba", "%[^c-d]", 1, "hgfe");
|
||||
// The first character may be ']' or '-' without being special.
|
||||
CheckScanf(sscanf, "[[]]x", "%[][]", 1, "[[]]");
|
||||
CheckScanf(sscanf, "-a-x", "%[-a]", 1, "-a-");
|
||||
// The last character may be '-' without being special.
|
||||
CheckScanf(sscanf, "-a-x", "%[a-]", 1, "-a-");
|
||||
// X--Y is [X--] + Y, not [X--] + [--Y] (a bug in my initial implementation).
|
||||
CheckScanf(sscanf, "+,-/.", "%[+--/]", 1, "+,-/");
|
||||
}
|
||||
|
||||
TEST(STDIO_TEST, swscanf_ccl) {
|
||||
// `abc` is just those characters.
|
||||
CheckScanf(swscanf, L"abcd", L"%[abc]", 1, "abc");
|
||||
// `a-c` is the range 'a' .. 'c'.
|
||||
CheckScanf(swscanf, L"abcd", L"%[a-c]", 1, "abc");
|
||||
CheckScanf(swscanf, L"-d", L"%[a-c]", 0, "");
|
||||
CheckScanf(swscanf, L"ac-bAd", L"%[a--c]", 1, "ac-bA");
|
||||
// `a-c-e` is equivalent to `a-e`.
|
||||
CheckScanf(swscanf, L"abcdefg", L"%[a-c-e]", 1, "abcde");
|
||||
// `e-a` is equivalent to `ae-` (because 'e' > 'a').
|
||||
CheckScanf(swscanf, L"-a-e-b", L"%[e-a]", 1, "-a-e-");
|
||||
// An initial '^' negates the set.
|
||||
CheckScanf(swscanf, L"abcde", L"%[^d]", 1, "abc");
|
||||
CheckScanf(swscanf, L"abcdefgh", L"%[^c-d]", 1, "ab");
|
||||
CheckScanf(swscanf, L"hgfedcba", L"%[^c-d]", 1, "hgfe");
|
||||
// The first character may be ']' or '-' without being special.
|
||||
CheckScanf(swscanf, L"[[]]x", L"%[][]", 1, "[[]]");
|
||||
CheckScanf(swscanf, L"-a-x", L"%[-a]", 1, "-a-");
|
||||
// The last character may be '-' without being special.
|
||||
CheckScanf(swscanf, L"-a-x", L"%[a-]", 1, "-a-");
|
||||
// X--Y is [X--] + Y, not [X--] + [--Y] (a bug in my initial implementation).
|
||||
CheckScanf(swscanf, L"+,-/.", L"%[+--/]", 1, "+,-/");
|
||||
}
|
||||
|
||||
TEST(STDIO_TEST, cantwrite_EBADF) {
|
||||
// If we open a file read-only...
|
||||
FILE* fp = fopen("/proc/version", "r");
|
||||
|
|
Loading…
Reference in a new issue