diff --git a/libc/Android.bp b/libc/Android.bp index 845945aed..7b0ac23f3 100644 --- a/libc/Android.bp +++ b/libc/Android.bp @@ -1409,7 +1409,6 @@ cc_library_static { "bionic/mblen.cpp", "bionic/mbrtoc16.cpp", "bionic/mbrtoc32.cpp", - "bionic/mbstate.cpp", "bionic/memmem.cpp", "bionic/mempcpy.cpp", "bionic/mkdir.cpp", diff --git a/libc/bionic/c16rtomb.cpp b/libc/bionic/c16rtomb.cpp index 77512bec9..93749c63d 100644 --- a/libc/bionic/c16rtomb.cpp +++ b/libc/bionic/c16rtomb.cpp @@ -50,18 +50,18 @@ size_t c16rtomb(char* s, char16_t c16, mbstate_t* ps) { mbstate_set_byte(state, 2, (c32 & 0x00ff00) >> 8); return 0; } else if (is_low_surrogate(c16)) { - return reset_and_return_illegal(EINVAL, state); + return mbstate_reset_and_return_illegal(EINVAL, state); } else { return c32rtomb(s, static_cast(c16), state); } } else { if (!is_low_surrogate(c16)) { - return reset_and_return_illegal(EINVAL, state); + return mbstate_reset_and_return_illegal(EINVAL, state); } char32_t c32 = ((mbstate_get_byte(state, 3) << 16) | (mbstate_get_byte(state, 2) << 8) | (c16 & ~0xdc00)) + 0x10000; - return reset_and_return(c32rtomb(s, c32, NULL), state); + return mbstate_reset_and_return(c32rtomb(s, c32, NULL), state); } } diff --git a/libc/bionic/c32rtomb.cpp b/libc/bionic/c32rtomb.cpp index d3231c027..ebe9cd335 100644 --- a/libc/bionic/c32rtomb.cpp +++ b/libc/bionic/c32rtomb.cpp @@ -38,7 +38,7 @@ size_t c32rtomb(char* s, char32_t c32, mbstate_t* ps) { if (s == NULL) { // Equivalent to c32rtomb(buf, U'\0', ps). - return reset_and_return(1, state); + return mbstate_reset_and_return(1, state); } // POSIX states that if char32_t is a null wide character, a null byte shall @@ -47,11 +47,11 @@ size_t c32rtomb(char* s, char32_t c32, mbstate_t* ps) { // stored. if (c32 == U'\0') { *s = '\0'; - reset_and_return(1, state); + return mbstate_reset_and_return(1, state); } if (!mbsinit(state)) { - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } if ((c32 & ~0x7f) == 0) { diff --git a/libc/bionic/mbrtoc16.cpp b/libc/bionic/mbrtoc16.cpp index 6878a1158..21805168d 100644 --- a/libc/bionic/mbrtoc16.cpp +++ b/libc/bionic/mbrtoc16.cpp @@ -55,7 +55,7 @@ static size_t finish_surrogate(char16_t* pc16, mbstate_t* state) { char16_t trail = mbstate_get_byte(state, 1) << 8 | mbstate_get_byte(state, 0); *pc16 = trail; - return reset_and_return(mbstate_get_byte(state, 3), state); + return mbstate_reset_and_return(mbstate_get_byte(state, 3), state); } size_t mbrtoc16(char16_t* pc16, const char* s, size_t n, mbstate_t* ps) { @@ -76,13 +76,13 @@ size_t mbrtoc16(char16_t* pc16, const char* s, size_t n, mbstate_t* ps) { if (__MB_IS_ERR(nconv)) { return nconv; } else if (nconv == 0) { - return reset_and_return(nconv, state); + return mbstate_reset_and_return(nconv, state); } else if (c32 > 0x10ffff) { // Input cannot be encoded as UTF-16. - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } else if (c32 < 0x10000) { *pc16 = static_cast(c32); - return reset_and_return(nconv, state); + return mbstate_reset_and_return(nconv, state); } else { return begin_surrogate(c32, pc16, nconv, state); } diff --git a/libc/bionic/mbrtoc32.cpp b/libc/bionic/mbrtoc32.cpp index bd40ecf72..f004b784e 100644 --- a/libc/bionic/mbrtoc32.cpp +++ b/libc/bionic/mbrtoc32.cpp @@ -41,7 +41,7 @@ size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps) { // Full state verification is done when decoding the sequence (after we have // all the bytes). if (mbstate_get_byte(state, 3) != 0) { - return reset_and_return_illegal(EINVAL, state); + return mbstate_reset_and_return_illegal(EINVAL, state); } if (s == NULL) { @@ -98,7 +98,7 @@ size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps) { lower_bound = 0x10000; } else { // Malformed input; input is not UTF-8. See RFC 3629. - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } // Fill in the state. @@ -107,7 +107,7 @@ size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps) { for (i = 0; i < MIN(bytes_wanted, n); i++) { if (!mbsinit(state) && ((*s & 0xc0) != 0x80)) { // Malformed input; bad characters in the middle of a character. - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } mbstate_set_byte(state, bytes_so_far + i, *s++); } @@ -125,14 +125,14 @@ size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps) { if (c32 < lower_bound) { // Malformed input; redundant encoding. - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } if ((c32 >= 0xd800 && c32 <= 0xdfff) || c32 == 0xfffe || c32 == 0xffff) { // Malformed input; invalid code points. - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } if (pc32 != NULL) { *pc32 = c32; } - return reset_and_return(c32 == U'\0' ? 0 : bytes_wanted, state); + return mbstate_reset_and_return(c32 == U'\0' ? 0 : bytes_wanted, state); } diff --git a/libc/bionic/mbstate.cpp b/libc/bionic/mbstate.cpp deleted file mode 100644 index cb327d839..000000000 --- a/libc/bionic/mbstate.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "private/bionic_mbstate.h" - -#include - -__LIBC_HIDDEN__ size_t mbstate_bytes_so_far(const mbstate_t* ps) { - return - (ps->__seq[2] != 0) ? 3 : - (ps->__seq[1] != 0) ? 2 : - (ps->__seq[0] != 0) ? 1 : 0; -} - -__LIBC_HIDDEN__ void mbstate_set_byte(mbstate_t* ps, int i, char byte) { - ps->__seq[i] = static_cast(byte); -} - -__LIBC_HIDDEN__ uint8_t mbstate_get_byte(const mbstate_t* ps, int n) { - return ps->__seq[n]; -} - -__LIBC_HIDDEN__ size_t reset_and_return_illegal(int _errno, mbstate_t* ps) { - errno = _errno; - *(reinterpret_cast(ps->__seq)) = 0; - return __MB_ERR_ILLEGAL_SEQUENCE; -} - -__LIBC_HIDDEN__ size_t reset_and_return(int _return, mbstate_t* ps) { - *(reinterpret_cast(ps->__seq)) = 0; - return _return; -} diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp index 36fc2a253..62023d609 100644 --- a/libc/bionic/wchar.cpp +++ b/libc/bionic/wchar.cpp @@ -74,7 +74,7 @@ size_t mbsnrtowcs(wchar_t* dst, const char** src, size_t nmc, size_t len, mbstat // character appears as anything but the first byte of a // multibyte sequence. Check now to avoid doing it in the loops. if (nmc > 0 && mbstate_bytes_so_far(state) > 0 && static_cast((*src)[0]) < 0x80) { - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } // Measure only? @@ -83,23 +83,23 @@ size_t mbsnrtowcs(wchar_t* dst, const char** src, size_t nmc, size_t len, mbstat if (static_cast((*src)[i]) < 0x80) { // Fast path for plain ASCII characters. if ((*src)[i] == '\0') { - return reset_and_return(o, state); + return mbstate_reset_and_return(o, state); } r = 1; } else { r = mbrtowc(NULL, *src + i, nmc - i, state); if (r == __MB_ERR_ILLEGAL_SEQUENCE) { - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } if (r == __MB_ERR_INCOMPLETE_SEQUENCE) { - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } if (r == 0) { - return reset_and_return(o, state); + return mbstate_reset_and_return(o, state); } } } - return reset_and_return(o, state); + return mbstate_reset_and_return(o, state); } // Actually convert, updating `dst` and `src`. @@ -110,26 +110,26 @@ size_t mbsnrtowcs(wchar_t* dst, const char** src, size_t nmc, size_t len, mbstat r = 1; if ((*src)[i] == '\0') { *src = nullptr; - return reset_and_return(o, state); + return mbstate_reset_and_return(o, state); } } else { r = mbrtowc(dst + o, *src + i, nmc - i, state); if (r == __MB_ERR_ILLEGAL_SEQUENCE) { *src += i; - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } if (r == __MB_ERR_INCOMPLETE_SEQUENCE) { *src += nmc; - return reset_and_return(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } if (r == 0) { *src = NULL; - return reset_and_return(o, state); + return mbstate_reset_and_return(o, state); } } } *src += i; - return reset_and_return(o, state); + return mbstate_reset_and_return(o, state); } size_t mbsrtowcs(wchar_t* dst, const char** src, size_t len, mbstate_t* ps) { @@ -149,7 +149,7 @@ size_t wcsnrtombs(char* dst, const wchar_t** src, size_t nwc, size_t len, mbstat mbstate_t* state = (ps == NULL) ? &__private_state : ps; if (!mbsinit(state)) { - return reset_and_return_illegal(EILSEQ, state); + return mbstate_reset_and_return_illegal(EILSEQ, state); } char buf[MB_LEN_MAX]; diff --git a/libc/private/bionic_mbstate.h b/libc/private/bionic_mbstate.h index 018b47ce2..292959af5 100644 --- a/libc/private/bionic_mbstate.h +++ b/libc/private/bionic_mbstate.h @@ -43,11 +43,31 @@ __BEGIN_DECLS #define __MB_IS_ERR(rv) (rv == __MB_ERR_ILLEGAL_SEQUENCE || \ rv == __MB_ERR_INCOMPLETE_SEQUENCE) -size_t mbstate_bytes_so_far(const mbstate_t* ps); -void mbstate_set_byte(mbstate_t* ps, int i, char byte); -uint8_t mbstate_get_byte(const mbstate_t* ps, int n); -size_t reset_and_return_illegal(int _errno, mbstate_t* ps); -size_t reset_and_return(int _return, mbstate_t* ps); +static inline __wur size_t mbstate_bytes_so_far(const mbstate_t* ps) { + return + (ps->__seq[2] != 0) ? 3 : + (ps->__seq[1] != 0) ? 2 : + (ps->__seq[0] != 0) ? 1 : 0; +} + +static inline void mbstate_set_byte(mbstate_t* ps, int i, char byte) { + ps->__seq[i] = static_cast(byte); +} + +static inline __wur uint8_t mbstate_get_byte(const mbstate_t* ps, int n) { + return ps->__seq[n]; +} + +static inline __wur size_t mbstate_reset_and_return_illegal(int _errno, mbstate_t* ps) { + errno = _errno; + *(reinterpret_cast(ps->__seq)) = 0; + return __MB_ERR_ILLEGAL_SEQUENCE; +} + +static inline __wur size_t mbstate_reset_and_return(int _return, mbstate_t* ps) { + *(reinterpret_cast(ps->__seq)) = 0; + return _return; +} __END_DECLS diff --git a/tests/uchar_test.cpp b/tests/uchar_test.cpp index c887f8abd..8b2966731 100644 --- a/tests/uchar_test.cpp +++ b/tests/uchar_test.cpp @@ -280,7 +280,10 @@ TEST(uchar, c32rtomb) { char bytes[MB_LEN_MAX]; + memset(bytes, 1, sizeof(bytes)); EXPECT_EQ(1U, c32rtomb(bytes, L'\0', NULL)); + EXPECT_EQ('\0', bytes[0]); + EXPECT_EQ('\x01', bytes[1]); memset(bytes, 0, sizeof(bytes)); EXPECT_EQ(1U, c32rtomb(bytes, L'h', NULL)); @@ -408,4 +411,3 @@ TEST(uchar, mbrtoc32_incomplete) { GTEST_LOG_(INFO) << "uchar.h is unavailable.\n"; #endif } - diff --git a/tests/wchar_test.cpp b/tests/wchar_test.cpp index 830eb70aa..097647fe5 100644 --- a/tests/wchar_test.cpp +++ b/tests/wchar_test.cpp @@ -445,6 +445,18 @@ TEST(wchar, mbsnrtowcs) { ASSERT_EQ(L'e', dst[1]); ASSERT_EQ(L'l', dst[2]); ASSERT_EQ(&s[3], src); + + memset(dst, 0, sizeof(dst)); + const char* incomplete = "\xc2"; // Incomplete UTF-8 sequence. + src = incomplete; + errno = 0; + ASSERT_EQ(static_cast(-1), mbsnrtowcs(dst, &src, SIZE_MAX, 3, nullptr)); + ASSERT_EQ(EILSEQ, errno); + + src = incomplete; + errno = 0; + ASSERT_EQ(static_cast(-1), mbsnrtowcs(nullptr, &src, SIZE_MAX, 3, nullptr)); + ASSERT_EQ(EILSEQ, errno); } TEST(wchar, wcsftime) {