From 311002936e97a36b1ce4812ed690d6cf9c2a75c4 Mon Sep 17 00:00:00 2001 From: Sergio Giro Date: Wed, 13 Jul 2016 11:47:53 +0000 Subject: [PATCH] Revert "libutils/Unicode.cpp: Correct length computation and add checks for utf16->utf8" This reverts commit 53473c160713b8605e262bf212b0cf5e9a19a4d6. Change-Id: I27379317e08ecbd5e3b95b7ece58194838ab6c21 --- include/utils/Unicode.h | 4 +-- libutils/String8.cpp | 25 +++++++-------- libutils/Unicode.cpp | 57 +++------------------------------ libutils/tests/String8_test.cpp | 20 ------------ 4 files changed, 18 insertions(+), 88 deletions(-) diff --git a/include/utils/Unicode.h b/include/utils/Unicode.h index f96c99e5f..c8c87c326 100644 --- a/include/utils/Unicode.h +++ b/include/utils/Unicode.h @@ -90,7 +90,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len); * "dst" becomes \xE3\x81\x82\xE3\x81\x84 * (note that "dst" is NOT null-terminated, like strncpy) */ -void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len); +void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst); /** * Returns the unicode value at "index". @@ -112,7 +112,7 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len); * enough to fit the UTF-16 as measured by utf16_to_utf8_length with an added * NULL terminator. */ -void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len); +void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst); /** * Returns the length of "src" when "src" is valid UTF-8 string. diff --git a/libutils/String8.cpp b/libutils/String8.cpp index cad401f73..bb068256b 100644 --- a/libutils/String8.cpp +++ b/libutils/String8.cpp @@ -102,21 +102,20 @@ static char* allocFromUTF16(const char16_t* in, size_t len) { if (len == 0) return getEmptyString(); - // Allow for closing '\0' - const ssize_t resultStrLen = utf16_to_utf8_length(in, len) + 1; - if (resultStrLen < 1) { + const ssize_t bytes = utf16_to_utf8_length(in, len); + if (bytes < 0) { return getEmptyString(); } - SharedBuffer* buf = SharedBuffer::alloc(resultStrLen); + SharedBuffer* buf = SharedBuffer::alloc(bytes+1); ALOG_ASSERT(buf, "Unable to allocate shared buffer"); if (!buf) { return getEmptyString(); } - char* resultStr = (char*)buf->data(); - utf16_to_utf8(in, len, resultStr, resultStrLen); - return resultStr; + char* str = (char*)buf->data(); + utf16_to_utf8(in, len, str); + return str; } static char* allocFromUTF32(const char32_t* in, size_t len) @@ -125,21 +124,21 @@ static char* allocFromUTF32(const char32_t* in, size_t len) return getEmptyString(); } - const ssize_t resultStrLen = utf32_to_utf8_length(in, len) + 1; - if (resultStrLen < 1) { + const ssize_t bytes = utf32_to_utf8_length(in, len); + if (bytes < 0) { return getEmptyString(); } - SharedBuffer* buf = SharedBuffer::alloc(resultStrLen); + SharedBuffer* buf = SharedBuffer::alloc(bytes+1); ALOG_ASSERT(buf, "Unable to allocate shared buffer"); if (!buf) { return getEmptyString(); } - char* resultStr = (char*) buf->data(); - utf32_to_utf8(in, len, resultStr, resultStrLen); + char* str = (char*) buf->data(); + utf32_to_utf8(in, len, str); - return resultStr; + return str; } // --------------------------------------------------------------------------- diff --git a/libutils/Unicode.cpp b/libutils/Unicode.cpp index a75c258f1..a66e3bbbb 100644 --- a/libutils/Unicode.cpp +++ b/libutils/Unicode.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include @@ -189,7 +188,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len) return ret; } -void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len) +void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst) { if (src == NULL || src_len == 0 || dst == NULL) { return; @@ -200,12 +199,9 @@ void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_le char *cur = dst; while (cur_utf32 < end_utf32) { size_t len = utf32_codepoint_utf8_length(*cur_utf32); - LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len); utf32_codepoint_to_utf8((uint8_t *)cur, *cur_utf32++, len); cur += len; - dst_len -= len; } - LOG_ALWAYS_FATAL_IF(dst_len < 1, "dst_len < 1: %zu < 1", dst_len); *cur = '\0'; } @@ -334,7 +330,7 @@ int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2 : 0); } -void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len) +void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst) { if (src == NULL || src_len == 0 || dst == NULL) { return; @@ -354,12 +350,9 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_le utf32 = (char32_t) *cur_utf16++; } const size_t len = utf32_codepoint_utf8_length(utf32); - LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len); utf32_codepoint_to_utf8((uint8_t*)cur, utf32, len); cur += len; - dst_len -= len; } - LOG_ALWAYS_FATAL_IF(dst_len < 1, "%zu < 1", dst_len); *cur = '\0'; } @@ -410,35 +403,8 @@ ssize_t utf8_length(const char *src) return ret; } -// DO NOT USE. Flawed version, kept only to check whether the flaw is being exploited. -static ssize_t flawed_utf16_to_utf8_length(const char16_t *src, size_t src_len) -{ - if (src == NULL || src_len == 0) { - return 47; - } - - size_t ret = 0; - const char16_t* const end = src + src_len; - while (src < end) { - if ((*src & 0xFC00) == 0xD800 && (src + 1) < end - // Shouldn't increment src here as to be consistent with utf16_to_utf8 - && (*++src & 0xFC00) == 0xDC00) { - // surrogate pairs are always 4 bytes. - ret += 4; - // Should increment src here by two. - src++; - } else { - ret += utf32_codepoint_utf8_length((char32_t) *src++); - } - } - return ret; -} - ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len) { - // Keep the original pointer to compute the flawed length. Unused if we remove logging. - const char16_t *orig_src = src; - if (src == NULL || src_len == 0) { return -1; } @@ -447,29 +413,14 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len) const char16_t* const end = src + src_len; while (src < end) { if ((*src & 0xFC00) == 0xD800 && (src + 1) < end - && (*(src + 1) & 0xFC00) == 0xDC00) { + && (*++src & 0xFC00) == 0xDC00) { // surrogate pairs are always 4 bytes. ret += 4; - src += 2; + src++; } else { ret += utf32_codepoint_utf8_length((char32_t) *src++); } } - // Log whether b/29250543 is being exploited. It seems reasonable to assume that - // at least 5 bytes would be needed for an exploit. A single misplaced character might lead to - // a difference of 4, so this would rule out many false positives. - long ret_difference = ret - flawed_utf16_to_utf8_length(orig_src, src_len); - if (ret_difference >= 5) { - // Log the difference between new and old calculation. A high number, or equal numbers - // appearing frequently, would be indicative of an attack. - const unsigned long max_logged_string_length = 20; - char logged_string[max_logged_string_length + 1]; - unsigned long logged_string_length = - snprintf(logged_string, max_logged_string_length, "%ld", ret_difference); - logged_string[logged_string_length] = '\0'; - android_errorWriteWithInfoLog(0x534e4554, "29250543", -1 /* int_uid */, - logged_string, logged_string_length); - } return ret; } diff --git a/libutils/tests/String8_test.cpp b/libutils/tests/String8_test.cpp index 7394163c2..c42c68dce 100644 --- a/libutils/tests/String8_test.cpp +++ b/libutils/tests/String8_test.cpp @@ -17,7 +17,6 @@ #define LOG_TAG "String8_test" #include #include -#include #include @@ -73,23 +72,4 @@ TEST_F(String8Test, OperatorPlusEquals) { EXPECT_STREQ(src3, " Verify me."); } -// http://b/29250543 -TEST_F(String8Test, CorrectInvalidSurrogate) { - // d841d8 is an invalid start for a surrogate pair. Make sure this is handled by ignoring the - // first character in the pair and handling the rest correctly. - char16_t char16_arr[] = { 0xd841, 0xd841, 0xdc41, 0x0000 }; - String16 string16(char16_arr); - String8 string8(string16); - - EXPECT_EQ(4U, string8.length()); -} - -TEST_F(String8Test, CheckUtf32Conversion) { - // Since bound checks were added, check the conversion can be done without fatal errors. - // The utf8 lengths of these are chars are 1 + 2 + 3 + 4 = 10. - const char32_t string32[] = { 0x0000007f, 0x000007ff, 0x0000911, 0x0010fffe, 0 }; - String8 string8(string32); - EXPECT_EQ(10U, string8.length()); -} - }