From c1fd492ac5c14a42acfbbd9b47ed178fbf1378d3 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Wed, 11 Nov 2015 18:02:29 +0000 Subject: [PATCH] Revert "Revert "adb/base: fix adb push of Unicode filenames on Win32"" This reverts commit cc8cd59456ca485a51cd6fd388c8bcb1af4a8f9b. With the dependency on libcutils (for gettid for non-bionic) removed, this no longer breaks the build. Change-Id: I645bd6876e2502ddc1535b69af1e645c0df9d178 --- adb/sysdeps_win32.cpp | 137 +++++-------- base/Android.mk | 18 ++ base/file.cpp | 4 + base/include/base/utf8.h | 83 ++++++++ base/logging.cpp | 29 ++- base/utf8.cpp | 170 +++++++++++++++++ base/utf8_test.cpp | 402 +++++++++++++++++++++++++++++++++++++++ fastboot/Android.mk | 2 +- 8 files changed, 757 insertions(+), 88 deletions(-) create mode 100755 base/include/base/utf8.h create mode 100755 base/utf8.cpp create mode 100755 base/utf8_test.cpp diff --git a/adb/sysdeps_win32.cpp b/adb/sysdeps_win32.cpp index beaca16a5..0634da5d5 100644 --- a/adb/sysdeps_win32.cpp +++ b/adb/sysdeps_win32.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include "adb.h" @@ -3466,100 +3467,61 @@ static void _widen_fatal(const char *fmt, ...) { exit(-1); } -// TODO: Consider implementing widen() and narrow() out of std::wstring_convert -// once libcxx is supported on Windows. Or, consider libutils/Unicode.cpp. - -// Convert from UTF-8 to UTF-16. A size of -1 specifies a NULL terminated -// string. Any other size specifies the number of chars to convert, excluding -// any NULL terminator (if you're passing an explicit size, you probably don't -// have a NULL terminated string in the first place). -std::wstring widen(const char* utf8, const int size) { - // Note: Do not call SystemErrorCodeToString() from widen() because - // SystemErrorCodeToString() calls narrow() which may call fatal() which - // calls adb_vfprintf() which calls widen(), potentially causing infinite - // recursion. - const int chars_to_convert = MultiByteToWideChar(CP_UTF8, 0, utf8, size, - NULL, 0); - if (chars_to_convert <= 0) { - // UTF-8 to UTF-16 should be lossless, so we don't expect this to fail. - _widen_fatal("MultiByteToWideChar failed counting: %d, " - "GetLastError: %lu", chars_to_convert, GetLastError()); - } - +// Convert size number of UTF-8 char's to UTF-16. Fatal exit on error. +std::wstring widen(const char* utf8, const size_t size) { std::wstring utf16; - size_t chars_to_allocate = chars_to_convert; - if (size == -1) { - // chars_to_convert includes a NULL terminator, so subtract space - // for that because resize() includes that itself. - --chars_to_allocate; + if (!android::base::UTF8ToWide(utf8, size, &utf16)) { + // If we call fatal() here and fatal() calls widen(), then there may be + // infinite recursion. To avoid this, call _widen_fatal() instead. + _widen_fatal("cannot convert from UTF-8 to UTF-16"); } - utf16.resize(chars_to_allocate); - - // This uses &string[0] to get write-access to the entire string buffer - // which may be assuming that the chars are all contiguous, but it seems - // to work and saves us the hassle of using a temporary - // std::vector. - const int result = MultiByteToWideChar(CP_UTF8, 0, utf8, size, &utf16[0], - chars_to_convert); - if (result != chars_to_convert) { - // UTF-8 to UTF-16 should be lossless, so we don't expect this to fail. - _widen_fatal("MultiByteToWideChar failed conversion: %d, " - "GetLastError: %lu", result, GetLastError()); - } - - // If a size was passed in (size != -1), then the string is NULL terminated - // by a NULL char that was written by std::string::resize(). If size == -1, - // then MultiByteToWideChar() read a NULL terminator from the original - // string and converted it to a NULL UTF-16 char in the output. return utf16; } -// Convert a NULL terminated string from UTF-8 to UTF-16. +// Convert a NULL-terminated string of UTF-8 characters to UTF-16. Fatal exit +// on error. std::wstring widen(const char* utf8) { - // Pass -1 to let widen() determine the string length. - return widen(utf8, -1); -} - -// Convert from UTF-8 to UTF-16. -std::wstring widen(const std::string& utf8) { - return widen(utf8.c_str(), utf8.length()); -} - -// Convert from UTF-16 to UTF-8. -std::string narrow(const std::wstring& utf16) { - return narrow(utf16.c_str()); -} - -// Convert from UTF-16 to UTF-8. -std::string narrow(const wchar_t* utf16) { - // Note: Do not call SystemErrorCodeToString() from narrow() because - // SystemErrorCodeToString() calls narrow() and we don't want potential - // infinite recursion. - const int chars_required = WideCharToMultiByte(CP_UTF8, 0, utf16, -1, NULL, - 0, NULL, NULL); - if (chars_required <= 0) { - // UTF-16 to UTF-8 should be lossless, so we don't expect this to fail. - fatal("WideCharToMultiByte failed counting: %d, GetLastError: %lu", - chars_required, GetLastError()); + std::wstring utf16; + if (!android::base::UTF8ToWide(utf8, &utf16)) { + // If we call fatal() here and fatal() calls widen(), then there may be + // infinite recursion. To avoid this, call _widen_fatal() instead. + _widen_fatal("cannot convert from UTF-8 to UTF-16"); } - std::string utf8; - // Subtract space for the NULL terminator because resize() includes - // that itself. Note that this could potentially throw a std::bad_alloc - // exception. - utf8.resize(chars_required - 1); + return utf16; +} - // This uses &string[0] to get write-access to the entire string buffer - // which may be assuming that the chars are all contiguous, but it seems - // to work and saves us the hassle of using a temporary - // std::vector. - const int result = WideCharToMultiByte(CP_UTF8, 0, utf16, -1, &utf8[0], - chars_required, NULL, NULL); - if (result != chars_required) { - // UTF-16 to UTF-8 should be lossless, so we don't expect this to fail. - fatal("WideCharToMultiByte failed conversion: %d, GetLastError: %lu", - result, GetLastError()); +// Convert a UTF-8 std::string (including any embedded NULL characters) to +// UTF-16. Fatal exit on error. +std::wstring widen(const std::string& utf8) { + std::wstring utf16; + if (!android::base::UTF8ToWide(utf8, &utf16)) { + // If we call fatal() here and fatal() calls widen(), then there may be + // infinite recursion. To avoid this, call _widen_fatal() instead. + _widen_fatal("cannot convert from UTF-8 to UTF-16"); + } + + return utf16; +} + +// Convert a UTF-16 std::wstring (including any embedded NULL characters) to +// UTF-8. Fatal exit on error. +std::string narrow(const std::wstring& utf16) { + std::string utf8; + if (!android::base::WideToUTF8(utf16, &utf8)) { + fatal("cannot convert from UTF-16 to UTF-8"); + } + + return utf8; +} + +// Convert a NULL-terminated string of UTF-16 characters to UTF-8. Fatal exit +// on error. +std::string narrow(const wchar_t* utf16) { + std::string utf8; + if (!android::base::WideToUTF8(utf16, &utf8)) { + fatal("cannot convert from UTF-16 to UTF-8"); } return utf8; @@ -3702,9 +3664,12 @@ int adb_chmod(const char* path, int mode) { // on error. static int _console_write_utf8(const char* buf, size_t size, FILE* stream, HANDLE console) { - // Convert from UTF-8 to UTF-16. + std::wstring output; + + // Try to convert from data that might be UTF-8 to UTF-16, ignoring errors. + // Data might not be UTF-8 if the user cat's random data, runs dmesg, etc. // This could throw std::bad_alloc. - const std::wstring output(widen(buf, size)); + (void)android::base::UTF8ToWide(buf, size, &output); // Note that this does not do \n => \r\n translation because that // doesn't seem necessary for the Windows console. For the Windows diff --git a/base/Android.mk b/base/Android.mk index 613636b61..4e6bd10a2 100644 --- a/base/Android.mk +++ b/base/Android.mk @@ -23,6 +23,9 @@ libbase_src_files := \ strings.cpp \ test_utils.cpp \ +libbase_windows_src_files := \ + utf8.cpp \ + libbase_test_src_files := \ file_test.cpp \ logging_test.cpp \ @@ -31,6 +34,9 @@ libbase_test_src_files := \ strings_test.cpp \ test_main.cpp \ +libbase_test_windows_src_files := \ + utf8_test.cpp \ + libbase_cppflags := \ -Wall \ -Wextra \ @@ -42,6 +48,9 @@ include $(CLEAR_VARS) LOCAL_MODULE := libbase LOCAL_CLANG := true LOCAL_SRC_FILES := $(libbase_src_files) +LOCAL_SRC_FILES_darwin := $(libbase_darwin_src_files) +LOCAL_SRC_FILES_linux := $(libbase_linux_src_files) +LOCAL_SRC_FILES_windows := $(libbase_windows_src_files) LOCAL_C_INCLUDES := $(LOCAL_PATH)/include LOCAL_CPPFLAGS := $(libbase_cppflags) LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include @@ -64,6 +73,9 @@ include $(BUILD_SHARED_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libbase LOCAL_SRC_FILES := $(libbase_src_files) +LOCAL_SRC_FILES_darwin := $(libbase_darwin_src_files) +LOCAL_SRC_FILES_linux := $(libbase_linux_src_files) +LOCAL_SRC_FILES_windows := $(libbase_windows_src_files) LOCAL_C_INCLUDES := $(LOCAL_PATH)/include LOCAL_CPPFLAGS := $(libbase_cppflags) LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include @@ -88,6 +100,9 @@ include $(CLEAR_VARS) LOCAL_MODULE := libbase_test LOCAL_CLANG := true LOCAL_SRC_FILES := $(libbase_test_src_files) +LOCAL_SRC_FILES_darwin := $(libbase_test_darwin_src_files) +LOCAL_SRC_FILES_linux := $(libbase_test_linux_src_files) +LOCAL_SRC_FILES_windows := $(libbase_test_windows_src_files) LOCAL_C_INCLUDES := $(LOCAL_PATH) LOCAL_CPPFLAGS := $(libbase_cppflags) LOCAL_SHARED_LIBRARIES := libbase @@ -100,6 +115,9 @@ include $(CLEAR_VARS) LOCAL_MODULE := libbase_test LOCAL_MODULE_HOST_OS := darwin linux windows LOCAL_SRC_FILES := $(libbase_test_src_files) +LOCAL_SRC_FILES_darwin := $(libbase_test_darwin_src_files) +LOCAL_SRC_FILES_linux := $(libbase_test_linux_src_files) +LOCAL_SRC_FILES_windows := $(libbase_test_windows_src_files) LOCAL_C_INCLUDES := $(LOCAL_PATH) LOCAL_CPPFLAGS := $(libbase_cppflags) LOCAL_SHARED_LIBRARIES := libbase diff --git a/base/file.cpp b/base/file.cpp index 3468dcfbf..7b5e7b13d 100644 --- a/base/file.cpp +++ b/base/file.cpp @@ -24,6 +24,7 @@ #include #include "base/macros.h" // For TEMP_FAILURE_RETRY on Darwin. +#include "base/utf8.h" #define LOG_TAG "base.file" #include "cutils/log.h" #include "utils/Compat.h" @@ -35,6 +36,9 @@ namespace android { namespace base { +// Versions of standard library APIs that support UTF-8 strings. +using namespace android::base::utf8; + bool ReadFdToString(int fd, std::string* content) { content->clear(); diff --git a/base/include/base/utf8.h b/base/include/base/utf8.h new file mode 100755 index 000000000..3cc168da4 --- /dev/null +++ b/base/include/base/utf8.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BASE_UTF8_H +#define BASE_UTF8_H + +#ifdef _WIN32 +#include +#endif + +namespace android { +namespace base { + +// Only available on Windows because this is only needed on Windows. +#ifdef _WIN32 +// Convert size number of UTF-16 wchar_t's to UTF-8. Returns whether the +// conversion was done successfully. +bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8); + +// Convert a NULL-terminated string of UTF-16 characters to UTF-8. Returns +// whether the conversion was done successfully. +bool WideToUTF8(const wchar_t* utf16, std::string* utf8); + +// Convert a UTF-16 std::wstring (including any embedded NULL characters) to +// UTF-8. Returns whether the conversion was done successfully. +bool WideToUTF8(const std::wstring& utf16, std::string* utf8); + +// Convert size number of UTF-8 char's to UTF-16. Returns whether the conversion +// was done successfully. +bool UTF8ToWide(const char* utf8, const size_t size, std::wstring* utf16); + +// Convert a NULL-terminated string of UTF-8 characters to UTF-16. Returns +// whether the conversion was done successfully. +bool UTF8ToWide(const char* utf8, std::wstring* utf16); + +// Convert a UTF-8 std::string (including any embedded NULL characters) to +// UTF-16. Returns whether the conversion was done successfully. +bool UTF8ToWide(const std::string& utf8, std::wstring* utf16); +#endif + +// The functions in the utf8 namespace take UTF-8 strings. For Windows, these +// are wrappers, for non-Windows these just expose existing APIs. To call these +// functions, use: +// +// // anonymous namespace to avoid conflict with existing open(), unlink(), etc. +// namespace { +// // Import functions into anonymous namespace. +// using namespace android::base::utf8; +// +// void SomeFunction(const char* name) { +// int fd = open(name, ...); // Calls android::base::utf8::open(). +// ... +// unlink(name); // Calls android::base::utf8::unlink(). +// } +// } +namespace utf8 { + +#ifdef _WIN32 +int open(const char* name, int flags, ...); +int unlink(const char* name); +#else +using ::open; +using ::unlink; +#endif + +} // namespace utf8 +} // namespace base +} // namespace android + +#endif // BASE_UTF8_H diff --git a/base/logging.cpp b/base/logging.cpp index 248cd0617..01a046a0f 100644 --- a/base/logging.cpp +++ b/base/logging.cpp @@ -53,6 +53,33 @@ #include #endif +// For gettid. +#if defined(__APPLE__) +#include "AvailabilityMacros.h" // For MAC_OS_X_VERSION_MAX_ALLOWED +#include +#include +#include +#include +#include +#elif defined(__linux__) && !defined(__ANDROID__) +#include +#include +#elif defined(_WIN32) +#include +#endif + +static pid_t GetThreadId() { +#if defined(__BIONIC__) + return gettid(); +#elif defined(__APPLE__) + return syscall(SYS_thread_selfid); +#elif defined(__linux__) + return syscall(__NR_gettid); +#elif defined(_WIN32) + return GetCurrentThreadId(); +#endif +} + namespace { #ifndef _WIN32 using std::mutex; @@ -158,7 +185,7 @@ void StderrLogger(LogId, LogSeverity severity, const char*, const char* file, "Mismatch in size of log_characters and values in LogSeverity"); char severity_char = log_characters[severity]; fprintf(stderr, "%s %c %5d %5d %s:%u] %s\n", ProgramInvocationName(), - severity_char, getpid(), gettid(), file, line, message); + severity_char, getpid(), GetThreadId(), file, line, message); } diff --git a/base/utf8.cpp b/base/utf8.cpp new file mode 100755 index 000000000..62a118f56 --- /dev/null +++ b/base/utf8.cpp @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "base/utf8.h" + +#include + +#include + +#include "base/logging.h" + +namespace android { +namespace base { + +bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) { + utf8->clear(); + + if (size == 0) { + return true; + } + + // TODO: Consider using std::wstring_convert once libcxx is supported on + // Windows. + + // Only Vista or later has this flag that causes WideCharToMultiByte() to + // return an error on invalid characters. + const DWORD flags = +#if (WINVER >= 0x0600) + WC_ERR_INVALID_CHARS; +#else + 0; +#endif + + const int chars_required = WideCharToMultiByte(CP_UTF8, flags, utf16, size, + NULL, 0, NULL, NULL); + if (chars_required <= 0) { + return false; + } + + // This could potentially throw a std::bad_alloc exception. + utf8->resize(chars_required); + + const int result = WideCharToMultiByte(CP_UTF8, flags, utf16, size, + &(*utf8)[0], chars_required, NULL, + NULL); + if (result != chars_required) { + CHECK_LE(result, chars_required) << "WideCharToMultiByte wrote " << result + << " chars to buffer of " << chars_required << " chars"; + utf8->clear(); + return false; + } + + return true; +} + +bool WideToUTF8(const wchar_t* utf16, std::string* utf8) { + // Compute string length of NULL-terminated string with wcslen(). + return WideToUTF8(utf16, wcslen(utf16), utf8); +} + +bool WideToUTF8(const std::wstring& utf16, std::string* utf8) { + // Use the stored length of the string which allows embedded NULL characters + // to be converted. + return WideToUTF8(utf16.c_str(), utf16.length(), utf8); +} + +// Internal helper function that takes MultiByteToWideChar() flags. +static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size, + std::wstring* utf16, const DWORD flags) { + utf16->clear(); + + if (size == 0) { + return true; + } + + // TODO: Consider using std::wstring_convert once libcxx is supported on + // Windows. + const int chars_required = MultiByteToWideChar(CP_UTF8, flags, utf8, size, + NULL, 0); + if (chars_required <= 0) { + return false; + } + + // This could potentially throw a std::bad_alloc exception. + utf16->resize(chars_required); + + const int result = MultiByteToWideChar(CP_UTF8, flags, utf8, size, + &(*utf16)[0], chars_required); + if (result != chars_required) { + CHECK_LE(result, chars_required) << "MultiByteToWideChar wrote " << result + << " chars to buffer of " << chars_required << " chars"; + utf16->clear(); + return false; + } + + return true; +} + +bool UTF8ToWide(const char* utf8, const size_t size, std::wstring* utf16) { + // If strictly interpreting as UTF-8 succeeds, return success. + if (_UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) { + return true; + } + + // Fallback to non-strict interpretation, allowing invalid characters and + // converting as best as possible, and return false to signify a problem. + (void)_UTF8ToWideWithFlags(utf8, size, utf16, 0); + return false; +} + +bool UTF8ToWide(const char* utf8, std::wstring* utf16) { + // Compute string length of NULL-terminated string with strlen(). + return UTF8ToWide(utf8, strlen(utf8), utf16); +} + +bool UTF8ToWide(const std::string& utf8, std::wstring* utf16) { + // Use the stored length of the string which allows embedded NULL characters + // to be converted. + return UTF8ToWide(utf8.c_str(), utf8.length(), utf16); +} + +// Versions of standard library APIs that support UTF-8 strings. +namespace utf8 { + +int open(const char* name, int flags, ...) { + std::wstring name_utf16; + if (!UTF8ToWide(name, &name_utf16)) { + errno = EINVAL; + return -1; + } + + int mode = 0; + if ((flags & O_CREAT) != 0) { + va_list args; + va_start(args, flags); + mode = va_arg(args, int); + va_end(args); + } + + return _wopen(name_utf16.c_str(), flags, mode); +} + +int unlink(const char* name) { + std::wstring name_utf16; + if (!UTF8ToWide(name, &name_utf16)) { + errno = EINVAL; + return -1; + } + + return _wunlink(name_utf16.c_str()); +} + +} // namespace utf8 +} // namespace base +} // namespace android diff --git a/base/utf8_test.cpp b/base/utf8_test.cpp new file mode 100755 index 000000000..bbb54b1b1 --- /dev/null +++ b/base/utf8_test.cpp @@ -0,0 +1,402 @@ +/* +* Copyright (C) 2015 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "base/utf8.h" + +#include + +#include "base/macros.h" + +namespace android { +namespace base { + +TEST(UTFStringConversionsTest, ConvertInvalidUTF8) { + std::wstring wide; + + // Standalone \xa2 is an invalid UTF-8 sequence, so this should return an + // error. Concatenate two C/C++ literal string constants to prevent the + // compiler from giving an error about "\xa2af" containing a "hex escape + // sequence out of range". + EXPECT_FALSE(android::base::UTF8ToWide("before\xa2" "after", &wide)); + + // Even if an invalid character is encountered, UTF8ToWide() should still do + // its best to convert the rest of the string. sysdeps_win32.cpp: + // _console_write_utf8() depends on this behavior. + // + // Thus, we verify that the valid characters are converted, but we ignore the + // specific replacement character that UTF8ToWide() may replace the invalid + // UTF-8 characters with because we want to allow that to change if the + // implementation changes. + EXPECT_EQ(0, wide.find(L"before")); + const wchar_t after_wide[] = L"after"; + EXPECT_EQ(wide.length() - (arraysize(after_wide) - 1), wide.find(after_wide)); +} + +// Below is adapted from https://chromium.googlesource.com/chromium/src/+/master/base/strings/utf_string_conversions_unittest.cc + +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The tests below from utf_string_conversions_unittest.cc check for this +// preprocessor symbol, so define it, as it is appropriate for Windows. +#define WCHAR_T_IS_UTF16 +static_assert(sizeof(wchar_t) == 2, "wchar_t is not 2 bytes"); + +// The tests below from utf_string_conversions_unittest.cc call versions of +// UTF8ToWide() and WideToUTF8() that don't return success/failure, so these are +// stub implementations with that signature. These are just for testing and +// should not be moved to base because they assert/expect no errors which is +// probably not a good idea (or at least it is something that should be left +// up to the caller, not a base library). + +static std::wstring UTF8ToWide(const std::string& utf8) { + std::wstring utf16; + EXPECT_TRUE(UTF8ToWide(utf8, &utf16)); + return utf16; +} + +static std::string WideToUTF8(const std::wstring& utf16) { + std::string utf8; + EXPECT_TRUE(WideToUTF8(utf16, &utf8)); + return utf8; +} + +namespace { + +const wchar_t* const kConvertRoundtripCases[] = { + L"Google Video", + // "网页 图片 资讯更多 »" + L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", + // "Παγκόσμιος Ιστός" + L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" + L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", + // "Поиск страниц на русском" + L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" + L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" + L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", + // "전체서비스" + L"\xc804\xccb4\xc11c\xbe44\xc2a4", + + // Test characters that take more than 16 bits. This will depend on whether + // wchar_t is 16 or 32 bits. +#if defined(WCHAR_T_IS_UTF16) + L"\xd800\xdf00", + // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) + L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", +#elif defined(WCHAR_T_IS_UTF32) + L"\x10300", + // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) + L"\x11d40\x11d41\x11d42\x11d43\x11d44", +#endif +}; + +} // namespace + +TEST(UTFStringConversionsTest, ConvertUTF8AndWide) { + // we round-trip all the wide strings through UTF-8 to make sure everything + // agrees on the conversion. This uses the stream operators to test them + // simultaneously. + for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { + std::ostringstream utf8; + utf8 << WideToUTF8(kConvertRoundtripCases[i]); + std::wostringstream wide; + wide << UTF8ToWide(utf8.str()); + + EXPECT_EQ(kConvertRoundtripCases[i], wide.str()); + } +} + +TEST(UTFStringConversionsTest, ConvertUTF8AndWideEmptyString) { + // An empty std::wstring should be converted to an empty std::string, + // and vice versa. + std::wstring wempty; + std::string empty; + EXPECT_EQ(empty, WideToUTF8(wempty)); + EXPECT_EQ(wempty, UTF8ToWide(empty)); +} + +TEST(UTFStringConversionsTest, ConvertUTF8ToWide) { + struct UTF8ToWideCase { + const char* utf8; + const wchar_t* wide; + bool success; + } convert_cases[] = { + // Regular UTF-8 input. + {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, + // Non-character is passed through. + {"\xef\xbf\xbfHello", L"\xffffHello", true}, + // Truncated UTF-8 sequence. + {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, + // Truncated off the end. + {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false}, + // Non-shortest-form UTF-8. + {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, + // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. + // Note that for whatever reason, this test fails on Windows XP. + {"\xed\xb0\x80", L"\xfffd", false}, + // Non-BMP characters. The second is a non-character regarded as valid. + // The result will either be in UTF-16 or UTF-32. +#if defined(WCHAR_T_IS_UTF16) + {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, + {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true}, +#elif defined(WCHAR_T_IS_UTF32) + {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, + {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true}, +#endif + }; + + for (size_t i = 0; i < arraysize(convert_cases); i++) { + std::wstring converted; + const bool success = UTF8ToWide(convert_cases[i].utf8, + strlen(convert_cases[i].utf8), + &converted); + EXPECT_EQ(convert_cases[i].success, success); + // The original test always compared expected and converted, but don't do + // that because our implementation of UTF8ToWide() does not guarantee to + // produce the same output in error situations. + if (success) { + std::wstring expected(convert_cases[i].wide); + EXPECT_EQ(expected, converted); + } + } + + // Manually test an embedded NULL. + std::wstring converted; + EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted)); + ASSERT_EQ(3U, converted.length()); + EXPECT_EQ(static_cast(0), converted[0]); + EXPECT_EQ('Z', converted[1]); + EXPECT_EQ('\t', converted[2]); + + // Make sure that conversion replaces, not appends. + EXPECT_TRUE(UTF8ToWide("B", 1, &converted)); + ASSERT_EQ(1U, converted.length()); + EXPECT_EQ('B', converted[0]); +} + +#if defined(WCHAR_T_IS_UTF16) +// This test is only valid when wchar_t == UTF-16. +TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) { + struct WideToUTF8Case { + const wchar_t* utf16; + const char* utf8; + bool success; + } convert_cases[] = { + // Regular UTF-16 input. + {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, + // Test a non-BMP character. + {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, + // Non-characters are passed through. + {L"\xffffHello", "\xEF\xBF\xBFHello", true}, + {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true}, + // The first character is a truncated UTF-16 character. + // Note that for whatever reason, this test fails on Windows XP. + {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", +#if (WINVER >= 0x0600) + // Only Vista and later has a new API/flag that correctly returns false. + false +#else + true +#endif + }, + // Truncated at the end. + // Note that for whatever reason, this test fails on Windows XP. + {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd", +#if (WINVER >= 0x0600) + // Only Vista and later has a new API/flag that correctly returns false. + false +#else + true +#endif + }, + }; + + for (size_t i = 0; i < arraysize(convert_cases); i++) { + std::string converted; + const bool success = WideToUTF8(convert_cases[i].utf16, + wcslen(convert_cases[i].utf16), + &converted); + EXPECT_EQ(convert_cases[i].success, success); + // The original test always compared expected and converted, but don't do + // that because our implementation of WideToUTF8() does not guarantee to + // produce the same output in error situations. + if (success) { + std::string expected(convert_cases[i].utf8); + EXPECT_EQ(expected, converted); + } + } +} + +#elif defined(WCHAR_T_IS_UTF32) +// This test is only valid when wchar_t == UTF-32. +TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) { + struct WideToUTF8Case { + const wchar_t* utf32; + const char* utf8; + bool success; + } convert_cases[] = { + // Regular 16-bit input. + {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, + // Test a non-BMP character. + {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, + // Non-characters are passed through. + {L"\xffffHello", "\xEF\xBF\xBFHello", true}, + {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true}, + // Invalid Unicode code points. + {L"\xfffffffHello", "\xEF\xBF\xBDHello", false}, + // The first character is a truncated UTF-16 character. + {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, + {L"\xdc01Hello", "\xef\xbf\xbdHello", false}, + }; + + for (size_t i = 0; i < arraysize(convert_cases); i++) { + std::string converted; + EXPECT_EQ(convert_cases[i].success, + WideToUTF8(convert_cases[i].utf32, + wcslen(convert_cases[i].utf32), + &converted)); + std::string expected(convert_cases[i].utf8); + EXPECT_EQ(expected, converted); + } +} +#endif // defined(WCHAR_T_IS_UTF32) + +// The test below uses these types and functions, so just do enough to get the +// test running. +typedef wchar_t char16; +typedef std::wstring string16; + +template +static void* WriteInto(T* t, size_t size) { + // std::(w)string::resize() already includes space for a NULL terminator. + t->resize(size - 1); + return &(*t)[0]; +} + +// A stub implementation that calls a helper from above, just to get the test +// below working. This is just for testing and should not be moved to base +// because this ignores errors which is probably not a good idea, plus it takes +// a string16 type which we don't really have. +static std::string UTF16ToUTF8(const string16& utf16) { + return WideToUTF8(utf16); +} + +TEST(UTFStringConversionsTest, ConvertMultiString) { + static char16 multi16[] = { + 'f', 'o', 'o', '\0', + 'b', 'a', 'r', '\0', + 'b', 'a', 'z', '\0', + '\0' + }; + static char multi[] = { + 'f', 'o', 'o', '\0', + 'b', 'a', 'r', '\0', + 'b', 'a', 'z', '\0', + '\0' + }; + string16 multistring16; + memcpy(WriteInto(&multistring16, arraysize(multi16)), multi16, + sizeof(multi16)); + EXPECT_EQ(arraysize(multi16) - 1, multistring16.length()); + std::string expected; + memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); + EXPECT_EQ(arraysize(multi) - 1, expected.length()); + const std::string& converted = UTF16ToUTF8(multistring16); + EXPECT_EQ(arraysize(multi) - 1, converted.length()); + EXPECT_EQ(expected, converted); +} + +// The tests below from sys_string_conversions_unittest.cc call SysWideToUTF8() +// and SysUTF8ToWide(), so these are stub implementations that call the helpers +// above. These are just for testing and should not be moved to base because +// they ignore errors which is probably not a good idea. + +static std::string SysWideToUTF8(const std::wstring& utf16) { + return WideToUTF8(utf16); +} + +static std::wstring SysUTF8ToWide(const std::string& utf8) { + return UTF8ToWide(utf8); +} + +// Below is adapted from https://chromium.googlesource.com/chromium/src/+/master/base/strings/sys_string_conversions_unittest.cc + +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifdef WCHAR_T_IS_UTF32 +static const std::wstring kSysWideOldItalicLetterA = L"\x10300"; +#else +static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00"; +#endif + +TEST(SysStrings, SysWideToUTF8) { + EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world")); + EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d")); + + // >16 bits + EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA)); + + // Error case. When Windows finds a UTF-16 character going off the end of + // a string, it just converts that literal value to UTF-8, even though this + // is invalid. + // + // This is what XP does, but Vista has different behavior, so we don't bother + // verifying it: + // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", + // SysWideToUTF8(L"\x4f60\xd800zyxw")); + + // Test embedded NULLs. + std::wstring wide_null(L"a"); + wide_null.push_back(0); + wide_null.push_back('b'); + + std::string expected_null("a"); + expected_null.push_back(0); + expected_null.push_back('b'); + + EXPECT_EQ(expected_null, SysWideToUTF8(wide_null)); +} + +TEST(SysStrings, SysUTF8ToWide) { + EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world")); + EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); + // >16 bits + EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80")); + + // Error case. When Windows finds an invalid UTF-8 character, it just skips + // it. This seems weird because it's inconsistent with the reverse conversion. + // + // This is what XP does, but Vista has different behavior, so we don't bother + // verifying it: + // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); + + // Test embedded NULLs. + std::string utf8_null("a"); + utf8_null.push_back(0); + utf8_null.push_back('b'); + + std::wstring expected_null(L"a"); + expected_null.push_back(0); + expected_null.push_back('b'); + + EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); +} + +} // namespace base +} // namespace android diff --git a/fastboot/Android.mk b/fastboot/Android.mk index 3f201ec1c..7a7366eee 100644 --- a/fastboot/Android.mk +++ b/fastboot/Android.mk @@ -49,7 +49,7 @@ LOCAL_STATIC_LIBRARIES := \ libutils \ liblog \ libz \ - libbase + libbase \ LOCAL_STATIC_LIBRARIES_darwin := libselinux LOCAL_STATIC_LIBRARIES_linux := libselinux