adb: win32: remove widen()/narrow() in favor of UTF8ToWide()/WideToUTF8()

Now that we have a more standardized API (also available in Chromium),
switch to it. Another benefit is real error handling instead of just
killing the process on invalid Unicode.

Make UTF8ToWide()/WideToUTF8() set errno to EILSEQ on bad input. This is
the same error code that wcsrtombs(3) uses.

Update the unittest to check for EILSEQ.

Change-Id: Ie92acf74d37adaea116cf610c1bf8cd433741e16
Signed-off-by: Spencer Low <CompareAndSwap@gmail.com>
This commit is contained in:
Spencer Low 2015-11-12 15:20:15 -08:00
parent 5d75c9d9c2
commit d21dc825bb
9 changed files with 167 additions and 139 deletions

View file

@ -28,7 +28,9 @@ ADB_COMMON_darwin_CFLAGS := \
# Define windows.h and tchar.h Unicode preprocessor symbols so that
# CreateFile(), _tfopen(), etc. map to versions that take wchar_t*, breaking the
# build if you accidentally pass char*. Fix by calling like:
# CreateFileW(widen(utf8).c_str()).
# std::wstring path_wide;
# if (!android::base::UTF8ToWide(path_utf8, &path_wide)) { /* error handling */ }
# CreateFileW(path_wide.c_str());
ADB_COMMON_windows_CFLAGS := \
-DUNICODE=1 -D_UNICODE=1 \

View file

@ -311,7 +311,9 @@ static int get_user_keyfilepath(char *filename, size_t len)
SystemErrorCodeToString(hr).c_str());
return -1;
}
home_str = narrow(path);
if (!android::base::WideToUTF8(path, &home_str)) {
return -1;
}
home = home_str.c_str();
}
format = "%s\\%s";

View file

@ -58,7 +58,12 @@ static std::string GetLogFilePath() {
SystemErrorCodeToString(GetLastError()).c_str());
}
return narrow(temp_path) + log_name;
std::string temp_path_utf8;
if (!android::base::WideToUTF8(temp_path, &temp_path_utf8)) {
fatal_errno("cannot convert temporary file path from UTF-16 to UTF-8");
}
return temp_path_utf8 + log_name;
}
#else
static const char kNullFileName[] = "/dev/null";

View file

@ -77,7 +77,7 @@ void LinePrinter::Print(string to_print, LineType type) {
CONSOLE_SCREEN_BUFFER_INFO csbi;
GetConsoleScreenBufferInfo(console_, &csbi);
// TODO: const std::wstring to_print_wide = widen(to_print);
// TODO: std::wstring to_print_wide; if (!android::base::UTF8ToWide(to_print, &to_print_wide)...
// TODO: wstring ElideMiddle.
to_print = ElideMiddle(to_print, static_cast<size_t>(csbi.dwSize.X));
// We don't want to have the cursor spamming back and forth, so instead of

View file

@ -28,6 +28,9 @@
#include <string>
// Include this before open/unlink are defined as macros below.
#include <base/utf8.h>
/*
* TEMP_FAILURE_RETRY is defined by some, but not all, versions of
* <unistd.h>. (Alas, it is not as standard as we'd hoped!) So, if it's
@ -72,7 +75,7 @@
#include <ws2tcpip.h>
#include <memory> // unique_ptr
#include <string> // Prototypes for narrow() and widen() use std::(w)string.
#include <string>
#include "fdevent.h"
@ -342,18 +345,6 @@ inline void seekdir(DIR*, long) {
char* adb_strerror(int err);
#define strerror adb_strerror
// Convert from UTF-8 to UTF-16, typically used to convert char strings into
// wchar_t strings that can be passed to wchar_t-based OS and C Runtime APIs
// on Windows.
extern std::wstring widen(const std::string& utf8);
extern std::wstring widen(const char* utf8);
// Convert from UTF-16 to UTF-8, typically used to convert strings from OS and
// C Runtime APIs that return wchar_t, to a format for our char-based data
// structures.
extern std::string narrow(const std::wstring& utf16);
extern std::string narrow(const wchar_t* utf16);
// Helper class to convert UTF-16 argv from wmain() to UTF-8 args that can be
// passed to main().
class NarrowArgs {

View file

@ -103,7 +103,13 @@ std::string SystemErrorCodeToString(const DWORD error_code) {
}
// Convert UTF-16 to UTF-8.
std::string msg(narrow(msgbuf));
std::string msg;
if (!android::base::WideToUTF8(msgbuf, &msg)) {
return android::base::StringPrintf(
"Error (%d) converting from UTF-16 to UTF-8 while retrieving error. (%lu)", errno,
error_code);
}
// Messages returned by the system end with line breaks.
msg = android::base::Trim(msg);
// There are many Windows error messages compared to POSIX, so include the
@ -144,7 +150,11 @@ void *load_file(const char *fn, unsigned *_sz)
char *data;
DWORD file_size;
file = CreateFileW( widen(fn).c_str(),
std::wstring fn_wide;
if (!android::base::UTF8ToWide(fn, &fn_wide))
return NULL;
file = CreateFileW( fn_wide.c_str(),
GENERIC_READ,
FILE_SHARE_READ,
NULL,
@ -434,7 +444,11 @@ int adb_open(const char* path, int options)
return -1;
}
f->fh_handle = CreateFileW( widen(path).c_str(), desiredAccess, shareMode,
std::wstring path_wide;
if (!android::base::UTF8ToWide(path, &path_wide)) {
return -1;
}
f->fh_handle = CreateFileW( path_wide.c_str(), desiredAccess, shareMode,
NULL, OPEN_EXISTING, 0, NULL );
if ( f->fh_handle == INVALID_HANDLE_VALUE ) {
@ -475,7 +489,11 @@ int adb_creat(const char* path, int mode)
return -1;
}
f->fh_handle = CreateFileW( widen(path).c_str(), GENERIC_WRITE,
std::wstring path_wide;
if (!android::base::UTF8ToWide(path, &path_wide)) {
return -1;
}
f->fh_handle = CreateFileW( path_wide.c_str(), GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL,
NULL );
@ -981,7 +999,7 @@ int network_connect(const std::string& host, int port, int type, int timeout, st
#if (NTDDI_VERSION >= NTDDI_WINXPSP2) || (_WIN32_WINNT >= _WIN32_WINNT_WS03)
// TODO: When the Android SDK tools increases the Windows system
// requirements >= WinXP SP2, switch to GetAddrInfoW(widen(host).c_str()).
// requirements >= WinXP SP2, switch to android::base::UTF8ToWide() + GetAddrInfoW().
#else
// Otherwise, keep using getaddrinfo(), or do runtime API detection
// with GetProcAddress("GetAddrInfoW").
@ -3405,11 +3423,11 @@ int unix_read(int fd, void* buf, size_t len) {
// The Choice
// ----------
//
// The code below chooses option 3, the UTF-8 everywhere strategy. It
// introduces narrow() which converts UTF-16 to UTF-8. This is used by the
// The code below chooses option 3, the UTF-8 everywhere strategy. It uses
// android::base::WideToUTF8() which converts UTF-16 to UTF-8. This is used by the
// NarrowArgs helper class that is used to convert wmain() args into UTF-8
// args that are passed to main() at the beginning of program startup. We also
// introduce widen() which converts from UTF-8 to UTF-16. This is used to
// args that are passed to main() at the beginning of program startup. We also use
// android::base::UTF8ToWide() which converts from UTF-8 to UTF-16. This is used to
// implement wrappers below that call UTF-16 OS and C Runtime APIs.
//
// Unicode console output
@ -3439,101 +3457,17 @@ int unix_read(int fd, void* buf, size_t len) {
// to UTF-16 and then calls WriteConsoleW().
// Function prototype because attributes cannot be placed on func definitions.
static void _widen_fatal(const char *fmt, ...)
__attribute__((__format__(ADB_FORMAT_ARCHETYPE, 1, 2)));
// A version of fatal() that does not call adb_(v)fprintf(), so it can be
// called from those functions.
static void _widen_fatal(const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
// If (v)fprintf are macros that point to adb_(v)fprintf, when random adb
// code calls (v)fprintf, it may end up calling adb_(v)fprintf, which then
// calls _widen_fatal(). So then how does _widen_fatal() output a error?
// By directly calling real C Runtime APIs that don't properly output
// Unicode, but will be able to get a comprehendible message out. To do
// this, make sure we don't call (v)fprintf macros by undefining them.
#pragma push_macro("fprintf")
#pragma push_macro("vfprintf")
#undef fprintf
#undef vfprintf
fprintf(stderr, "error: ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
#pragma pop_macro("vfprintf")
#pragma pop_macro("fprintf")
va_end(ap);
exit(-1);
}
// Convert size number of UTF-8 char's to UTF-16. Fatal exit on error.
std::wstring widen(const char* utf8, const size_t size) {
std::wstring utf16;
if (!android::base::UTF8ToWide(utf8, size, &utf16)) {
// If we call fatal() here and fatal() calls widen(), then there may be
// infinite recursion. To avoid this, call _widen_fatal() instead.
_widen_fatal("cannot convert from UTF-8 to UTF-16");
}
return utf16;
}
// Convert a NULL-terminated string of UTF-8 characters to UTF-16. Fatal exit
// on error.
std::wstring widen(const char* utf8) {
std::wstring utf16;
if (!android::base::UTF8ToWide(utf8, &utf16)) {
// If we call fatal() here and fatal() calls widen(), then there may be
// infinite recursion. To avoid this, call _widen_fatal() instead.
_widen_fatal("cannot convert from UTF-8 to UTF-16");
}
return utf16;
}
// Convert a UTF-8 std::string (including any embedded NULL characters) to
// UTF-16. Fatal exit on error.
std::wstring widen(const std::string& utf8) {
std::wstring utf16;
if (!android::base::UTF8ToWide(utf8, &utf16)) {
// If we call fatal() here and fatal() calls widen(), then there may be
// infinite recursion. To avoid this, call _widen_fatal() instead.
_widen_fatal("cannot convert from UTF-8 to UTF-16");
}
return utf16;
}
// Convert a UTF-16 std::wstring (including any embedded NULL characters) to
// UTF-8. Fatal exit on error.
std::string narrow(const std::wstring& utf16) {
std::string utf8;
if (!android::base::WideToUTF8(utf16, &utf8)) {
fatal("cannot convert from UTF-16 to UTF-8");
}
return utf8;
}
// Convert a NULL-terminated string of UTF-16 characters to UTF-8. Fatal exit
// on error.
std::string narrow(const wchar_t* utf16) {
std::string utf8;
if (!android::base::WideToUTF8(utf16, &utf8)) {
fatal("cannot convert from UTF-16 to UTF-8");
}
return utf8;
}
// Constructor for helper class to convert wmain() UTF-16 args to UTF-8 to
// be passed to main().
NarrowArgs::NarrowArgs(const int argc, wchar_t** const argv) {
narrow_args = new char*[argc + 1];
for (int i = 0; i < argc; ++i) {
narrow_args[i] = strdup(narrow(argv[i]).c_str());
std::string arg_narrow;
if (!android::base::WideToUTF8(argv[i], &arg_narrow)) {
fatal_errno("cannot convert argument from UTF-16 to UTF-8");
}
narrow_args[i] = strdup(arg_narrow.c_str());
}
narrow_args[argc] = nullptr; // terminate
}
@ -3549,20 +3483,24 @@ NarrowArgs::~NarrowArgs() {
}
int unix_open(const char* path, int options, ...) {
std::wstring path_wide;
if (!android::base::UTF8ToWide(path, &path_wide)) {
return -1;
}
if ((options & O_CREAT) == 0) {
return _wopen(widen(path).c_str(), options);
return _wopen(path_wide.c_str(), options);
} else {
int mode;
va_list args;
va_start(args, options);
mode = va_arg(args, int);
va_end(args);
return _wopen(widen(path).c_str(), options, mode);
return _wopen(path_wide.c_str(), options, mode);
}
}
// Version of stat() that takes a UTF-8 path.
int adb_stat(const char* f, struct adb_stat* s) {
int adb_stat(const char* path, struct adb_stat* s) {
#pragma push_macro("wstat")
// This definition of wstat seems to be missing from <sys/stat.h>.
#if defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 64)
@ -3575,17 +3513,27 @@ int adb_stat(const char* f, struct adb_stat* s) {
// <sys/stat.h> has a function prototype for wstat() that should be available.
#endif
return wstat(widen(f).c_str(), s);
std::wstring path_wide;
if (!android::base::UTF8ToWide(path, &path_wide)) {
return -1;
}
return wstat(path_wide.c_str(), s);
#pragma pop_macro("wstat")
}
// Version of opendir() that takes a UTF-8 path.
DIR* adb_opendir(const char* name) {
DIR* adb_opendir(const char* path) {
std::wstring path_wide;
if (!android::base::UTF8ToWide(path, &path_wide)) {
return nullptr;
}
// Just cast _WDIR* to DIR*. This doesn't work if the caller reads any of
// the fields, but right now all the callers treat the structure as
// opaque.
return reinterpret_cast<DIR*>(_wopendir(widen(name).c_str()));
return reinterpret_cast<DIR*>(_wopendir(path_wide.c_str()));
}
// Version of readdir() that returns UTF-8 paths.
@ -3595,8 +3543,12 @@ struct dirent* adb_readdir(DIR* dir) {
if (went == nullptr) {
return nullptr;
}
// Convert from UTF-16 to UTF-8.
const std::string name_utf8(narrow(went->d_name));
std::string name_utf8;
if (!android::base::WideToUTF8(went->d_name, &name_utf8)) {
return nullptr;
}
// Cast the _wdirent* to dirent* and overwrite the d_name field (which has
// space for UTF-16 wchar_t's) with UTF-8 char's.
@ -3628,7 +3580,10 @@ int adb_closedir(DIR* dir) {
// Version of unlink() that takes a UTF-8 path.
int adb_unlink(const char* path) {
const std::wstring wpath(widen(path));
std::wstring wpath;
if (!android::base::UTF8ToWide(path, &wpath)) {
return -1;
}
int rc = _wunlink(wpath.c_str());
@ -3644,20 +3599,35 @@ int adb_unlink(const char* path) {
// Version of mkdir() that takes a UTF-8 path.
int adb_mkdir(const std::string& path, int mode) {
return _wmkdir(widen(path.c_str()).c_str());
std::wstring path_wide;
if (!android::base::UTF8ToWide(path, &path_wide)) {
return -1;
}
return _wmkdir(path_wide.c_str());
}
// Version of utime() that takes a UTF-8 path.
int adb_utime(const char* path, struct utimbuf* u) {
std::wstring path_wide;
if (!android::base::UTF8ToWide(path, &path_wide)) {
return -1;
}
static_assert(sizeof(struct utimbuf) == sizeof(struct _utimbuf),
"utimbuf and _utimbuf should be the same size because they both "
"contain the same types, namely time_t");
return _wutime(widen(path).c_str(), reinterpret_cast<struct _utimbuf*>(u));
return _wutime(path_wide.c_str(), reinterpret_cast<struct _utimbuf*>(u));
}
// Version of chmod() that takes a UTF-8 path.
int adb_chmod(const char* path, int mode) {
return _wchmod(widen(path).c_str(), mode);
std::wstring path_wide;
if (!android::base::UTF8ToWide(path, &path_wide)) {
return -1;
}
return _wchmod(path_wide.c_str(), mode);
}
// Internal helper function to write UTF-8 bytes to a console. Returns -1
@ -3819,8 +3789,18 @@ size_t adb_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream) {
// Version of fopen() that takes a UTF-8 filename and can access a file with
// a Unicode filename.
FILE* adb_fopen(const char* f, const char* m) {
return _wfopen(widen(f).c_str(), widen(m).c_str());
FILE* adb_fopen(const char* path, const char* mode) {
std::wstring path_wide;
if (!android::base::UTF8ToWide(path, &path_wide)) {
return nullptr;
}
std::wstring mode_wide;
if (!android::base::UTF8ToWide(mode, &mode_wide)) {
return nullptr;
}
return _wfopen(path_wide.c_str(), mode_wide.c_str());
}
// Return a lowercase version of the argument. Uses C Runtime tolower() on
@ -3880,15 +3860,27 @@ static void _ensure_env_setup() {
continue;
}
// If we encounter an error converting UTF-16, don't error-out on account of a single env
// var because the program might never even read this particular variable.
std::string name_utf8;
if (!android::base::WideToUTF8(*env, equal - *env, &name_utf8)) {
continue;
}
// Store lowercase name so that we can do case-insensitive searches.
const std::string name_utf8(ToLower(narrow(
std::wstring(*env, equal - *env))));
char* const value_utf8 = strdup(narrow(equal + 1).c_str());
name_utf8 = ToLower(name_utf8);
std::string value_utf8;
if (!android::base::WideToUTF8(equal + 1, &value_utf8)) {
continue;
}
char* const value_dup = strdup(value_utf8.c_str());
// Don't overwrite a previus env var with the same name. In reality,
// the system probably won't let two env vars with the same name exist
// in _wenviron.
g_environ_utf8.insert({name_utf8, value_utf8});
g_environ_utf8.insert({name_utf8, value_dup});
}
}
@ -3914,10 +3906,15 @@ char* adb_getcwd(char* buf, int size) {
return nullptr;
}
const std::string buf_utf8(narrow(wbuf));
std::string buf_utf8;
const bool narrow_result = android::base::WideToUTF8(wbuf, &buf_utf8);
free(wbuf);
wbuf = nullptr;
if (!narrow_result) {
return nullptr;
}
// If size was specified, make sure all the chars will fit.
if (size != 0) {
if (size < static_cast<int>(buf_utf8.length() + 1)) {

View file

@ -19,6 +19,10 @@
#ifdef _WIN32
#include <string>
#else
// Bring in prototypes for standard APIs so that we can import them into the utf8 namespace.
#include <fcntl.h> // open
#include <unistd.h> // unlink
#endif
namespace android {

View file

@ -27,6 +27,18 @@
namespace android {
namespace base {
// Helper to set errno based on GetLastError() after WideCharToMultiByte()/MultiByteToWideChar().
static void SetErrnoFromLastError() {
switch (GetLastError()) {
case ERROR_NO_UNICODE_TRANSLATION:
errno = EILSEQ;
break;
default:
errno = EINVAL;
break;
}
}
bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
utf8->clear();
@ -49,6 +61,7 @@ bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
const int chars_required = WideCharToMultiByte(CP_UTF8, flags, utf16, size,
NULL, 0, NULL, NULL);
if (chars_required <= 0) {
SetErrnoFromLastError();
return false;
}
@ -59,6 +72,7 @@ bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
&(*utf8)[0], chars_required, NULL,
NULL);
if (result != chars_required) {
SetErrnoFromLastError();
CHECK_LE(result, chars_required) << "WideCharToMultiByte wrote " << result
<< " chars to buffer of " << chars_required << " chars";
utf8->clear();
@ -80,8 +94,8 @@ bool WideToUTF8(const std::wstring& utf16, std::string* utf8) {
}
// Internal helper function that takes MultiByteToWideChar() flags.
static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
std::wstring* utf16, const DWORD flags) {
static bool UTF8ToWideWithFlags(const char* utf8, const size_t size, std::wstring* utf16,
const DWORD flags) {
utf16->clear();
if (size == 0) {
@ -93,6 +107,7 @@ static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
const int chars_required = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
NULL, 0);
if (chars_required <= 0) {
SetErrnoFromLastError();
return false;
}
@ -102,6 +117,7 @@ static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
const int result = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
&(*utf16)[0], chars_required);
if (result != chars_required) {
SetErrnoFromLastError();
CHECK_LE(result, chars_required) << "MultiByteToWideChar wrote " << result
<< " chars to buffer of " << chars_required << " chars";
utf16->clear();
@ -113,13 +129,16 @@ static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
bool UTF8ToWide(const char* utf8, const size_t size, std::wstring* utf16) {
// If strictly interpreting as UTF-8 succeeds, return success.
if (_UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
if (UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
return true;
}
const int saved_errno = errno;
// Fallback to non-strict interpretation, allowing invalid characters and
// converting as best as possible, and return false to signify a problem.
(void)_UTF8ToWideWithFlags(utf8, size, utf16, 0);
(void)UTF8ToWideWithFlags(utf8, size, utf16, 0);
errno = saved_errno;
return false;
}
@ -140,7 +159,6 @@ namespace utf8 {
int open(const char* name, int flags, ...) {
std::wstring name_utf16;
if (!UTF8ToWide(name, &name_utf16)) {
errno = EINVAL;
return -1;
}
@ -158,7 +176,6 @@ int open(const char* name, int flags, ...) {
int unlink(const char* name) {
std::wstring name_utf16;
if (!UTF8ToWide(name, &name_utf16)) {
errno = EINVAL;
return -1;
}

View file

@ -26,12 +26,16 @@ namespace base {
TEST(UTFStringConversionsTest, ConvertInvalidUTF8) {
std::wstring wide;
errno = 0;
// Standalone \xa2 is an invalid UTF-8 sequence, so this should return an
// error. Concatenate two C/C++ literal string constants to prevent the
// compiler from giving an error about "\xa2af" containing a "hex escape
// sequence out of range".
EXPECT_FALSE(android::base::UTF8ToWide("before\xa2" "after", &wide));
EXPECT_EQ(EILSEQ, errno);
// Even if an invalid character is encountered, UTF8ToWide() should still do
// its best to convert the rest of the string. sysdeps_win32.cpp:
// _console_write_utf8() depends on this behavior.
@ -161,6 +165,7 @@ TEST(UTFStringConversionsTest, ConvertUTF8ToWide) {
for (size_t i = 0; i < arraysize(convert_cases); i++) {
std::wstring converted;
errno = 0;
const bool success = UTF8ToWide(convert_cases[i].utf8,
strlen(convert_cases[i].utf8),
&converted);
@ -171,6 +176,8 @@ TEST(UTFStringConversionsTest, ConvertUTF8ToWide) {
if (success) {
std::wstring expected(convert_cases[i].wide);
EXPECT_EQ(expected, converted);
} else {
EXPECT_EQ(EILSEQ, errno);
}
}
@ -227,6 +234,7 @@ TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) {
for (size_t i = 0; i < arraysize(convert_cases); i++) {
std::string converted;
errno = 0;
const bool success = WideToUTF8(convert_cases[i].utf16,
wcslen(convert_cases[i].utf16),
&converted);
@ -237,6 +245,8 @@ TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) {
if (success) {
std::string expected(convert_cases[i].utf8);
EXPECT_EQ(expected, converted);
} else {
EXPECT_EQ(EILSEQ, errno);
}
}
}