From 7890484caebc9fcb69e2dcdbc733809599d4d165 Mon Sep 17 00:00:00 2001 From: zijunzhao Date: Tue, 9 May 2023 00:54:00 +0000 Subject: [PATCH] Implement C23 scanf 'w' length modifiers wN: Specifies that a following b, d, i, o, u, x, or X conversion specifier applies to an integer argument with a specific width where N is a positive decimal integer with no leading zeros (the argument will have been promoted according to the integer promotions, but its value shall be converted to the unpromoted type); or that a following n conversion specifier applies to a pointer to an integer type argument with a width of N bits. All minimum-width integer types (7.22.1.2) and exact-width integer types (7.22.1.1) defined in the header shall be supported. Other supported values of N are implementation-defined. Bug: b/271903607 Test: adb shell Change-Id: I595fd2ac7bc40d9fb7f1935b39933a6cc068eeff --- libc/stdio/scanf_common.h | 115 +++++++++++++++++++++++++++++++ libc/stdio/vfscanf.cpp | 61 ++++------------- libc/stdio/vfwscanf.cpp | 70 ++++--------------- tests/stdio_test.cpp | 138 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 281 insertions(+), 103 deletions(-) create mode 100644 libc/stdio/scanf_common.h diff --git a/libc/stdio/scanf_common.h b/libc/stdio/scanf_common.h new file mode 100644 index 000000000..8132e90de --- /dev/null +++ b/libc/stdio/scanf_common.h @@ -0,0 +1,115 @@ +/* $OpenBSD: vfscanf.c,v 1.31 2014/03/19 05:17:01 guenther Exp $ */ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "local.h" + +#include +#include +#include + +#define BUF 513 /* Maximum length of numeric string. */ + +// Flags used during conversion. +// Size/type: +#define LONG 0x00001 // l: long or double +#define LONGDBL 0x00002 // L: long double +#define SHORT 0x00004 // h: short +#define SHORTSHORT 0x00008 // hh: 8 bit integer +#define LLONG 0x00010 // ll: long long (+ deprecated q: quad) +#define POINTER 0x00020 // p: void* (as hex) +#define SIZEINT 0x00040 // z: (signed) size_t +#define MAXINT 0x00080 // j: intmax_t +#define PTRINT 0x00100 // t: ptrdiff_t +#define NOSKIP 0x00200 // [ or c: do not skip blanks +// Modifiers: +#define SUPPRESS 0x00400 // *: suppress assignment +#define UNSIGNED 0x00800 // %[oupxX] conversions +#define ALLOCATE 0x01000 // m: allocate a char* +// Internal use during integer parsing: +#define SIGNOK 0x02000 // +/- is (still) legal +#define HAVESIGN 0x04000 // Sign detected +#define NDIGITS 0x08000 // No digits detected +#define PFXOK 0x10000 // "0x" prefix is (still) legal +#define PFBOK 0x20000 // "0b" prefix is (still) legal +#define NZDIGITS 0x40000 // No zero digits detected + +// Conversion types. +#define CT_CHAR 0 // %c conversion +#define CT_CCL 1 // %[...] conversion +#define CT_STRING 2 // %s conversion +#define CT_INT 3 // Integer: strtoimax/strtoumax +#define CT_FLOAT 4 // Float: strtod + +#define to_digit(c) static_cast((c) - '0') +#define is_digit(c) ((unsigned)to_digit(c) <= 9) + +// Append a digit to a value and check for overflow. +#define APPEND_DIGIT(val, dig) \ + do { \ + if ((val) > INT_MAX / 10) \ + errno = ENOMEM; \ + else { \ + (val) *= 10; \ + if ((val) > INT_MAX - to_digit((dig))) \ + errno = ENOMEM; \ + else \ + (val) += to_digit((dig)); \ + } \ + } while (0) + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" +// Trasnlate a fixed size integer argument for the %w/%wf format to a +// flag representation. Supported sizes are 8, 16, 32, and 64 so far. +// See details in bionic/libc/include/stdint.h +static int w_to_flag(int size, bool fast) { + static constexpr int fast_size = sizeof(void*) == 8 ? LLONG : 0; + if (size == 8) return SHORTSHORT; + if (size == 16) return fast ? fast_size : SHORT; + if (size == 32) return fast ? fast_size : 0; + if (size == 64) return LLONG; + __fortify_fatal("%%w%s%d is unsupported", fast ? "f" : "", size); +} + +#pragma clang diagnostic pop \ No newline at end of file diff --git a/libc/stdio/vfscanf.cpp b/libc/stdio/vfscanf.cpp index dfd001d35..65f54a521 100644 --- a/libc/stdio/vfscanf.cpp +++ b/libc/stdio/vfscanf.cpp @@ -31,53 +31,7 @@ * SUCH DAMAGE. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "local.h" - -#include -#include -#include - -#define BUF 513 /* Maximum length of numeric string. */ - -// Flags used during conversion. -// Size/type: -#define LONG 0x00001 // l: long or double -#define LONGDBL 0x00002 // L: long double -#define SHORT 0x00004 // h: short -#define SHORTSHORT 0x00008 // hh: 8 bit integer -#define LLONG 0x00010 // ll: long long (+ deprecated q: quad) -#define POINTER 0x00020 // p: void* (as hex) -#define SIZEINT 0x00040 // z: (signed) size_t -#define MAXINT 0x00080 // j: intmax_t -#define PTRINT 0x00100 // t: ptrdiff_t -#define NOSKIP 0x00200 // [ or c: do not skip blanks -// Modifiers: -#define SUPPRESS 0x00400 // *: suppress assignment -#define UNSIGNED 0x00800 // %[oupxX] conversions -#define ALLOCATE 0x01000 // m: allocate a char* -// Internal use during integer parsing: -#define SIGNOK 0x02000 // +/- is (still) legal -#define HAVESIGN 0x04000 // Sign detected -#define NDIGITS 0x08000 // No digits detected -#define PFXOK 0x10000 // "0x" prefix is (still) legal -#define PFBOK 0x20000 // "0b" prefix is (still) legal -#define NZDIGITS 0x40000 // No zero digits detected - -// Conversion types. -#define CT_CHAR 0 // %c conversion -#define CT_CCL 1 // %[...] conversion -#define CT_STRING 2 // %s conversion -#define CT_INT 3 // Integer: strtoimax/strtoumax -#define CT_FLOAT 4 // Float: strtod +#include "scanf_common.h" static const unsigned char* __sccl(char*, const unsigned char*); @@ -122,6 +76,7 @@ int __svfscanf(FILE* fp, const char* fmt0, va_list ap) { */ again: c = *fmt++; +reswitch: switch (c) { case '%': literal: @@ -220,6 +175,18 @@ literal: base = 10; break; + case 'w': { + int size = 0; + bool fast = false; + c = *fmt++; + while (is_digit(c)) { + APPEND_DIGIT(size, c); + c = *fmt++; + } + flags |= w_to_flag(size, fast); + goto reswitch; + } + case 'X': case 'x': flags |= PFXOK; /* enable 0x prefixing */ diff --git a/libc/stdio/vfwscanf.cpp b/libc/stdio/vfwscanf.cpp index 5f21acd0b..14b175415 100644 --- a/libc/stdio/vfwscanf.cpp +++ b/libc/stdio/vfwscanf.cpp @@ -31,62 +31,7 @@ * SUCH DAMAGE. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "local.h" - -#include - -#define BUF 513 /* Maximum length of numeric string. */ - -/* - * Flags used during conversion. - */ -#define LONG 0x00001 /* l: long or double */ -#define LONGDBL 0x00002 /* L: long double */ -#define SHORT 0x00004 /* h: short */ -#define SHORTSHORT 0x00008 /* hh: 8 bit integer */ -#define LLONG 0x00010 /* ll: long long (+ deprecated q: quad) */ -#define POINTER 0x00020 /* p: void * (as hex) */ -#define SIZEINT 0x00040 /* z: (signed) size_t */ -#define MAXINT 0x00080 /* j: intmax_t */ -#define PTRINT 0x00100 /* t: ptrdiff_t */ -#define NOSKIP 0x00200 /* [ or c: do not skip blanks */ -#define SUPPRESS 0x00400 /* *: suppress assignment */ -#define UNSIGNED 0x00800 /* %[oupxX] conversions */ - -/* - * The following are used in numeric conversions only: - * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point; - * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral. - */ -#define SIGNOK 0x01000 /* +/- is (still) legal */ -#define HAVESIGN 0x02000 /* sign detected */ -#define NDIGITS 0x04000 /* no digits detected */ - -#define DPTOK 0x08000 /* (float) decimal point is still legal */ -#define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */ - -#define PFBOK 0x20000 /* 0x prefix is (still) legal */ -#define PFXOK 0x40000 /* 0x prefix is (still) legal */ -#define NZDIGITS 0x80000 /* no zero digits detected */ - -/* - * Conversion types. - */ -#define CT_CHAR 0 /* %c conversion */ -#define CT_CCL 1 /* %[...] conversion */ -#define CT_STRING 2 /* %s conversion */ -#define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */ -#define CT_FLOAT 4 /* floating, i.e., strtod */ - +#include "scanf_common.h" // An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would // be a little too expensive, and some kind of compressed version isn't worth the trouble. static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) { @@ -176,6 +121,7 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) */ again: c = *fmt++; + reswitch: switch (c) { case '%': literal: @@ -273,6 +219,18 @@ int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) base = 10; break; + case 'w': { + int size = 0; + bool fast = false; + c = *fmt++; + while (is_digit(c)) { + APPEND_DIGIT(size, c); + c = *fmt++; + } + flags |= w_to_flag(size, fast); + goto reswitch; + } + case 'X': case 'x': flags |= PFXOK; /* enable 0x prefixing */ diff --git a/tests/stdio_test.cpp b/tests/stdio_test.cpp index b85edfb2a..c20597b37 100644 --- a/tests/stdio_test.cpp +++ b/tests/stdio_test.cpp @@ -3486,3 +3486,141 @@ TEST(STDIO_TEST, swprintf_invalid_wf_width) { GTEST_SKIP() << "no %w in glibc"; #endif } + +TEST(STDIO_TEST, sscanf_w_base) { +#if defined(__BIONIC__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat-invalid-specifier" + int8_t a; + EXPECT_EQ(1, sscanf("<0b101>", "<%w8b>", &a)); + EXPECT_EQ(0b101, a); + int8_t b1; + EXPECT_EQ(1, sscanf("<0xFF>", "<%w8i>", &b1)); + EXPECT_EQ(-1, b1); + int8_t b2; + EXPECT_EQ(1, sscanf("<0x1FF>", "<%w8i>", &b2)); + EXPECT_EQ(-1, b2); + int16_t c1; + EXPECT_EQ(1, sscanf("<0xFFFF>", "<%w16i>", &c1)); + EXPECT_EQ(-1, c1); + uint16_t c2; + EXPECT_EQ(1, sscanf("<64>", "<%w16d>", &c2)); + EXPECT_EQ(64, c2); + int32_t d; + EXPECT_EQ(1, sscanf("<021>", "<%w32o>", &d)); + EXPECT_EQ(021, d); + uint32_t e; + EXPECT_EQ(1, sscanf("<-1>", "<%w32u>", &e)); + EXPECT_EQ(4294967295, e); + int64_t f; + EXPECT_EQ(1, sscanf("<0x3b>", "<%w64x>", &f)); + EXPECT_EQ(0x3b, f); + EXPECT_EQ(1, sscanf("<0x3b>", "<%w64X>", &f)); + EXPECT_EQ(0x3B, f); +#pragma clang diagnostic pop +#else + GTEST_SKIP() << "no %w in glibc"; +#endif +} + +TEST(STDIO_TEST, sscanf_w_combination) { +#if defined(__BIONIC__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat" +#pragma clang diagnostic ignored "-Wformat-invalid-specifier" +#pragma clang diagnostic ignored "-Wformat-extra-args" + uint32_t a; + int64_t b; + char c; + + EXPECT_EQ(3, sscanf("<0b10101010101010101010101010101010 0x3333333344444444 1>", + "<%w32b %w64x %c>", &a, &b, &c)); + EXPECT_EQ(0xaaaaaaaa, a); + EXPECT_EQ(0x3333333344444444, b); + EXPECT_EQ('1', c); +#pragma clang diagnostic pop +#else + GTEST_SKIP() << "no %w in glibc"; +#endif +} + +TEST(STDIO_TEST, sscanf_invalid_w_width) { +#if defined(__BIONIC__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat-invalid-specifier" + int32_t a; + EXPECT_DEATH(sscanf("<100>", "<%w20d>", &a), "%w20 is unsupported"); +#pragma clang diagnostic pop +#else + GTEST_SKIP() << "no %w in glibc"; +#endif +} + +TEST(STDIO_TEST, swscanf_w_base) { +#if defined(__BIONIC__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat-invalid-specifier" + int8_t a; + EXPECT_EQ(1, swscanf(L"<0b101>", L"<%w8b>", &a)); + EXPECT_EQ(0b101, a); + int8_t b1; + EXPECT_EQ(1, swscanf(L"<0xFF>", L"<%w8i>", &b1)); + EXPECT_EQ(-1, b1); + int8_t b2; + EXPECT_EQ(1, swscanf(L"<0x1FF>", L"<%w8i>", &b2)); + EXPECT_EQ(-1, b2); + int16_t c1; + EXPECT_EQ(1, swscanf(L"<0xFFFF>", L"<%w16i>", &c1)); + EXPECT_EQ(-1, c1); + uint16_t c2; + EXPECT_EQ(1, swscanf(L"<64>", L"<%w16d>", &c2)); + EXPECT_EQ(64, c2); + int32_t d; + EXPECT_EQ(1, swscanf(L"<021>", L"<%w32o>", &d)); + EXPECT_EQ(021, d); + uint32_t e; + EXPECT_EQ(1, swscanf(L"<-1>", L"<%w32u>", &e)); + EXPECT_EQ(4294967295, e); + int64_t f; + EXPECT_EQ(1, swscanf(L"<0x3b>", L"<%w64x>", &f)); + EXPECT_EQ(0x3b, f); + EXPECT_EQ(1, swscanf(L"<0x3b>", L"<%w64X>", &f)); + EXPECT_EQ(0x3B, f); +#pragma clang diagnostic pop +#else + GTEST_SKIP() << "no %w in glibc"; +#endif +} + +TEST(STDIO_TEST, swscanf_w_combination) { +#if defined(__BIONIC__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat" +#pragma clang diagnostic ignored "-Wformat-invalid-specifier" +#pragma clang diagnostic ignored "-Wformat-extra-args" + uint32_t a; + int64_t b; + char c; + + EXPECT_EQ(3, swscanf(L"<0b10101010101010101010101010101010 0x3333333344444444 1>", + L"<%w32b %w64x %c>", &a, &b, &c)); + EXPECT_EQ(0xaaaaaaaa, a); + EXPECT_EQ(0x3333333344444444, b); + EXPECT_EQ('1', c); +#pragma clang diagnostic pop +#else + GTEST_SKIP() << "no %w in glibc"; +#endif +} + +TEST(STDIO_TEST, swscanf_invalid_w_width) { +#if defined(__BIONIC__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat-invalid-specifier" + int32_t a; + EXPECT_DEATH(swscanf(L"<100>", L"<%w20d>", &a), "%w20 is unsupported"); +#pragma clang diagnostic pop +#else + GTEST_SKIP() << "no %w in glibc"; +#endif +} \ No newline at end of file