adb: win32: remove widen()/narrow() in favor of UTF8ToWide()/WideToUTF8()
Now that we have a more standardized API (also available in Chromium), switch to it. Another benefit is real error handling instead of just killing the process on invalid Unicode. Make UTF8ToWide()/WideToUTF8() set errno to EILSEQ on bad input. This is the same error code that wcsrtombs(3) uses. Update the unittest to check for EILSEQ. Change-Id: Ie92acf74d37adaea116cf610c1bf8cd433741e16 Signed-off-by: Spencer Low <CompareAndSwap@gmail.com>
This commit is contained in:
parent
5d75c9d9c2
commit
d21dc825bb
9 changed files with 167 additions and 139 deletions
|
@ -28,7 +28,9 @@ ADB_COMMON_darwin_CFLAGS := \
|
|||
# Define windows.h and tchar.h Unicode preprocessor symbols so that
|
||||
# CreateFile(), _tfopen(), etc. map to versions that take wchar_t*, breaking the
|
||||
# build if you accidentally pass char*. Fix by calling like:
|
||||
# CreateFileW(widen(utf8).c_str()).
|
||||
# std::wstring path_wide;
|
||||
# if (!android::base::UTF8ToWide(path_utf8, &path_wide)) { /* error handling */ }
|
||||
# CreateFileW(path_wide.c_str());
|
||||
ADB_COMMON_windows_CFLAGS := \
|
||||
-DUNICODE=1 -D_UNICODE=1 \
|
||||
|
||||
|
|
|
@ -311,7 +311,9 @@ static int get_user_keyfilepath(char *filename, size_t len)
|
|||
SystemErrorCodeToString(hr).c_str());
|
||||
return -1;
|
||||
}
|
||||
home_str = narrow(path);
|
||||
if (!android::base::WideToUTF8(path, &home_str)) {
|
||||
return -1;
|
||||
}
|
||||
home = home_str.c_str();
|
||||
}
|
||||
format = "%s\\%s";
|
||||
|
|
|
@ -58,7 +58,12 @@ static std::string GetLogFilePath() {
|
|||
SystemErrorCodeToString(GetLastError()).c_str());
|
||||
}
|
||||
|
||||
return narrow(temp_path) + log_name;
|
||||
std::string temp_path_utf8;
|
||||
if (!android::base::WideToUTF8(temp_path, &temp_path_utf8)) {
|
||||
fatal_errno("cannot convert temporary file path from UTF-16 to UTF-8");
|
||||
}
|
||||
|
||||
return temp_path_utf8 + log_name;
|
||||
}
|
||||
#else
|
||||
static const char kNullFileName[] = "/dev/null";
|
||||
|
|
|
@ -77,7 +77,7 @@ void LinePrinter::Print(string to_print, LineType type) {
|
|||
CONSOLE_SCREEN_BUFFER_INFO csbi;
|
||||
GetConsoleScreenBufferInfo(console_, &csbi);
|
||||
|
||||
// TODO: const std::wstring to_print_wide = widen(to_print);
|
||||
// TODO: std::wstring to_print_wide; if (!android::base::UTF8ToWide(to_print, &to_print_wide)...
|
||||
// TODO: wstring ElideMiddle.
|
||||
to_print = ElideMiddle(to_print, static_cast<size_t>(csbi.dwSize.X));
|
||||
// We don't want to have the cursor spamming back and forth, so instead of
|
||||
|
|
|
@ -28,6 +28,9 @@
|
|||
|
||||
#include <string>
|
||||
|
||||
// Include this before open/unlink are defined as macros below.
|
||||
#include <base/utf8.h>
|
||||
|
||||
/*
|
||||
* TEMP_FAILURE_RETRY is defined by some, but not all, versions of
|
||||
* <unistd.h>. (Alas, it is not as standard as we'd hoped!) So, if it's
|
||||
|
@ -72,7 +75,7 @@
|
|||
#include <ws2tcpip.h>
|
||||
|
||||
#include <memory> // unique_ptr
|
||||
#include <string> // Prototypes for narrow() and widen() use std::(w)string.
|
||||
#include <string>
|
||||
|
||||
#include "fdevent.h"
|
||||
|
||||
|
@ -342,18 +345,6 @@ inline void seekdir(DIR*, long) {
|
|||
char* adb_strerror(int err);
|
||||
#define strerror adb_strerror
|
||||
|
||||
// Convert from UTF-8 to UTF-16, typically used to convert char strings into
|
||||
// wchar_t strings that can be passed to wchar_t-based OS and C Runtime APIs
|
||||
// on Windows.
|
||||
extern std::wstring widen(const std::string& utf8);
|
||||
extern std::wstring widen(const char* utf8);
|
||||
|
||||
// Convert from UTF-16 to UTF-8, typically used to convert strings from OS and
|
||||
// C Runtime APIs that return wchar_t, to a format for our char-based data
|
||||
// structures.
|
||||
extern std::string narrow(const std::wstring& utf16);
|
||||
extern std::string narrow(const wchar_t* utf16);
|
||||
|
||||
// Helper class to convert UTF-16 argv from wmain() to UTF-8 args that can be
|
||||
// passed to main().
|
||||
class NarrowArgs {
|
||||
|
|
|
@ -103,7 +103,13 @@ std::string SystemErrorCodeToString(const DWORD error_code) {
|
|||
}
|
||||
|
||||
// Convert UTF-16 to UTF-8.
|
||||
std::string msg(narrow(msgbuf));
|
||||
std::string msg;
|
||||
if (!android::base::WideToUTF8(msgbuf, &msg)) {
|
||||
return android::base::StringPrintf(
|
||||
"Error (%d) converting from UTF-16 to UTF-8 while retrieving error. (%lu)", errno,
|
||||
error_code);
|
||||
}
|
||||
|
||||
// Messages returned by the system end with line breaks.
|
||||
msg = android::base::Trim(msg);
|
||||
// There are many Windows error messages compared to POSIX, so include the
|
||||
|
@ -144,7 +150,11 @@ void *load_file(const char *fn, unsigned *_sz)
|
|||
char *data;
|
||||
DWORD file_size;
|
||||
|
||||
file = CreateFileW( widen(fn).c_str(),
|
||||
std::wstring fn_wide;
|
||||
if (!android::base::UTF8ToWide(fn, &fn_wide))
|
||||
return NULL;
|
||||
|
||||
file = CreateFileW( fn_wide.c_str(),
|
||||
GENERIC_READ,
|
||||
FILE_SHARE_READ,
|
||||
NULL,
|
||||
|
@ -434,7 +444,11 @@ int adb_open(const char* path, int options)
|
|||
return -1;
|
||||
}
|
||||
|
||||
f->fh_handle = CreateFileW( widen(path).c_str(), desiredAccess, shareMode,
|
||||
std::wstring path_wide;
|
||||
if (!android::base::UTF8ToWide(path, &path_wide)) {
|
||||
return -1;
|
||||
}
|
||||
f->fh_handle = CreateFileW( path_wide.c_str(), desiredAccess, shareMode,
|
||||
NULL, OPEN_EXISTING, 0, NULL );
|
||||
|
||||
if ( f->fh_handle == INVALID_HANDLE_VALUE ) {
|
||||
|
@ -475,7 +489,11 @@ int adb_creat(const char* path, int mode)
|
|||
return -1;
|
||||
}
|
||||
|
||||
f->fh_handle = CreateFileW( widen(path).c_str(), GENERIC_WRITE,
|
||||
std::wstring path_wide;
|
||||
if (!android::base::UTF8ToWide(path, &path_wide)) {
|
||||
return -1;
|
||||
}
|
||||
f->fh_handle = CreateFileW( path_wide.c_str(), GENERIC_WRITE,
|
||||
FILE_SHARE_READ | FILE_SHARE_WRITE,
|
||||
NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL,
|
||||
NULL );
|
||||
|
@ -981,7 +999,7 @@ int network_connect(const std::string& host, int port, int type, int timeout, st
|
|||
|
||||
#if (NTDDI_VERSION >= NTDDI_WINXPSP2) || (_WIN32_WINNT >= _WIN32_WINNT_WS03)
|
||||
// TODO: When the Android SDK tools increases the Windows system
|
||||
// requirements >= WinXP SP2, switch to GetAddrInfoW(widen(host).c_str()).
|
||||
// requirements >= WinXP SP2, switch to android::base::UTF8ToWide() + GetAddrInfoW().
|
||||
#else
|
||||
// Otherwise, keep using getaddrinfo(), or do runtime API detection
|
||||
// with GetProcAddress("GetAddrInfoW").
|
||||
|
@ -3405,11 +3423,11 @@ int unix_read(int fd, void* buf, size_t len) {
|
|||
// The Choice
|
||||
// ----------
|
||||
//
|
||||
// The code below chooses option 3, the UTF-8 everywhere strategy. It
|
||||
// introduces narrow() which converts UTF-16 to UTF-8. This is used by the
|
||||
// The code below chooses option 3, the UTF-8 everywhere strategy. It uses
|
||||
// android::base::WideToUTF8() which converts UTF-16 to UTF-8. This is used by the
|
||||
// NarrowArgs helper class that is used to convert wmain() args into UTF-8
|
||||
// args that are passed to main() at the beginning of program startup. We also
|
||||
// introduce widen() which converts from UTF-8 to UTF-16. This is used to
|
||||
// args that are passed to main() at the beginning of program startup. We also use
|
||||
// android::base::UTF8ToWide() which converts from UTF-8 to UTF-16. This is used to
|
||||
// implement wrappers below that call UTF-16 OS and C Runtime APIs.
|
||||
//
|
||||
// Unicode console output
|
||||
|
@ -3439,101 +3457,17 @@ int unix_read(int fd, void* buf, size_t len) {
|
|||
// to UTF-16 and then calls WriteConsoleW().
|
||||
|
||||
|
||||
// Function prototype because attributes cannot be placed on func definitions.
|
||||
static void _widen_fatal(const char *fmt, ...)
|
||||
__attribute__((__format__(ADB_FORMAT_ARCHETYPE, 1, 2)));
|
||||
|
||||
// A version of fatal() that does not call adb_(v)fprintf(), so it can be
|
||||
// called from those functions.
|
||||
static void _widen_fatal(const char *fmt, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
// If (v)fprintf are macros that point to adb_(v)fprintf, when random adb
|
||||
// code calls (v)fprintf, it may end up calling adb_(v)fprintf, which then
|
||||
// calls _widen_fatal(). So then how does _widen_fatal() output a error?
|
||||
// By directly calling real C Runtime APIs that don't properly output
|
||||
// Unicode, but will be able to get a comprehendible message out. To do
|
||||
// this, make sure we don't call (v)fprintf macros by undefining them.
|
||||
#pragma push_macro("fprintf")
|
||||
#pragma push_macro("vfprintf")
|
||||
#undef fprintf
|
||||
#undef vfprintf
|
||||
fprintf(stderr, "error: ");
|
||||
vfprintf(stderr, fmt, ap);
|
||||
fprintf(stderr, "\n");
|
||||
#pragma pop_macro("vfprintf")
|
||||
#pragma pop_macro("fprintf")
|
||||
va_end(ap);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// Convert size number of UTF-8 char's to UTF-16. Fatal exit on error.
|
||||
std::wstring widen(const char* utf8, const size_t size) {
|
||||
std::wstring utf16;
|
||||
if (!android::base::UTF8ToWide(utf8, size, &utf16)) {
|
||||
// If we call fatal() here and fatal() calls widen(), then there may be
|
||||
// infinite recursion. To avoid this, call _widen_fatal() instead.
|
||||
_widen_fatal("cannot convert from UTF-8 to UTF-16");
|
||||
}
|
||||
|
||||
return utf16;
|
||||
}
|
||||
|
||||
// Convert a NULL-terminated string of UTF-8 characters to UTF-16. Fatal exit
|
||||
// on error.
|
||||
std::wstring widen(const char* utf8) {
|
||||
std::wstring utf16;
|
||||
if (!android::base::UTF8ToWide(utf8, &utf16)) {
|
||||
// If we call fatal() here and fatal() calls widen(), then there may be
|
||||
// infinite recursion. To avoid this, call _widen_fatal() instead.
|
||||
_widen_fatal("cannot convert from UTF-8 to UTF-16");
|
||||
}
|
||||
|
||||
return utf16;
|
||||
}
|
||||
|
||||
// Convert a UTF-8 std::string (including any embedded NULL characters) to
|
||||
// UTF-16. Fatal exit on error.
|
||||
std::wstring widen(const std::string& utf8) {
|
||||
std::wstring utf16;
|
||||
if (!android::base::UTF8ToWide(utf8, &utf16)) {
|
||||
// If we call fatal() here and fatal() calls widen(), then there may be
|
||||
// infinite recursion. To avoid this, call _widen_fatal() instead.
|
||||
_widen_fatal("cannot convert from UTF-8 to UTF-16");
|
||||
}
|
||||
|
||||
return utf16;
|
||||
}
|
||||
|
||||
// Convert a UTF-16 std::wstring (including any embedded NULL characters) to
|
||||
// UTF-8. Fatal exit on error.
|
||||
std::string narrow(const std::wstring& utf16) {
|
||||
std::string utf8;
|
||||
if (!android::base::WideToUTF8(utf16, &utf8)) {
|
||||
fatal("cannot convert from UTF-16 to UTF-8");
|
||||
}
|
||||
|
||||
return utf8;
|
||||
}
|
||||
|
||||
// Convert a NULL-terminated string of UTF-16 characters to UTF-8. Fatal exit
|
||||
// on error.
|
||||
std::string narrow(const wchar_t* utf16) {
|
||||
std::string utf8;
|
||||
if (!android::base::WideToUTF8(utf16, &utf8)) {
|
||||
fatal("cannot convert from UTF-16 to UTF-8");
|
||||
}
|
||||
|
||||
return utf8;
|
||||
}
|
||||
|
||||
// Constructor for helper class to convert wmain() UTF-16 args to UTF-8 to
|
||||
// be passed to main().
|
||||
NarrowArgs::NarrowArgs(const int argc, wchar_t** const argv) {
|
||||
narrow_args = new char*[argc + 1];
|
||||
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
narrow_args[i] = strdup(narrow(argv[i]).c_str());
|
||||
std::string arg_narrow;
|
||||
if (!android::base::WideToUTF8(argv[i], &arg_narrow)) {
|
||||
fatal_errno("cannot convert argument from UTF-16 to UTF-8");
|
||||
}
|
||||
narrow_args[i] = strdup(arg_narrow.c_str());
|
||||
}
|
||||
narrow_args[argc] = nullptr; // terminate
|
||||
}
|
||||
|
@ -3549,20 +3483,24 @@ NarrowArgs::~NarrowArgs() {
|
|||
}
|
||||
|
||||
int unix_open(const char* path, int options, ...) {
|
||||
std::wstring path_wide;
|
||||
if (!android::base::UTF8ToWide(path, &path_wide)) {
|
||||
return -1;
|
||||
}
|
||||
if ((options & O_CREAT) == 0) {
|
||||
return _wopen(widen(path).c_str(), options);
|
||||
return _wopen(path_wide.c_str(), options);
|
||||
} else {
|
||||
int mode;
|
||||
va_list args;
|
||||
va_start(args, options);
|
||||
mode = va_arg(args, int);
|
||||
va_end(args);
|
||||
return _wopen(widen(path).c_str(), options, mode);
|
||||
return _wopen(path_wide.c_str(), options, mode);
|
||||
}
|
||||
}
|
||||
|
||||
// Version of stat() that takes a UTF-8 path.
|
||||
int adb_stat(const char* f, struct adb_stat* s) {
|
||||
int adb_stat(const char* path, struct adb_stat* s) {
|
||||
#pragma push_macro("wstat")
|
||||
// This definition of wstat seems to be missing from <sys/stat.h>.
|
||||
#if defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 64)
|
||||
|
@ -3575,17 +3513,27 @@ int adb_stat(const char* f, struct adb_stat* s) {
|
|||
// <sys/stat.h> has a function prototype for wstat() that should be available.
|
||||
#endif
|
||||
|
||||
return wstat(widen(f).c_str(), s);
|
||||
std::wstring path_wide;
|
||||
if (!android::base::UTF8ToWide(path, &path_wide)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return wstat(path_wide.c_str(), s);
|
||||
|
||||
#pragma pop_macro("wstat")
|
||||
}
|
||||
|
||||
// Version of opendir() that takes a UTF-8 path.
|
||||
DIR* adb_opendir(const char* name) {
|
||||
DIR* adb_opendir(const char* path) {
|
||||
std::wstring path_wide;
|
||||
if (!android::base::UTF8ToWide(path, &path_wide)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Just cast _WDIR* to DIR*. This doesn't work if the caller reads any of
|
||||
// the fields, but right now all the callers treat the structure as
|
||||
// opaque.
|
||||
return reinterpret_cast<DIR*>(_wopendir(widen(name).c_str()));
|
||||
return reinterpret_cast<DIR*>(_wopendir(path_wide.c_str()));
|
||||
}
|
||||
|
||||
// Version of readdir() that returns UTF-8 paths.
|
||||
|
@ -3595,8 +3543,12 @@ struct dirent* adb_readdir(DIR* dir) {
|
|||
if (went == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Convert from UTF-16 to UTF-8.
|
||||
const std::string name_utf8(narrow(went->d_name));
|
||||
std::string name_utf8;
|
||||
if (!android::base::WideToUTF8(went->d_name, &name_utf8)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Cast the _wdirent* to dirent* and overwrite the d_name field (which has
|
||||
// space for UTF-16 wchar_t's) with UTF-8 char's.
|
||||
|
@ -3628,7 +3580,10 @@ int adb_closedir(DIR* dir) {
|
|||
|
||||
// Version of unlink() that takes a UTF-8 path.
|
||||
int adb_unlink(const char* path) {
|
||||
const std::wstring wpath(widen(path));
|
||||
std::wstring wpath;
|
||||
if (!android::base::UTF8ToWide(path, &wpath)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int rc = _wunlink(wpath.c_str());
|
||||
|
||||
|
@ -3644,20 +3599,35 @@ int adb_unlink(const char* path) {
|
|||
|
||||
// Version of mkdir() that takes a UTF-8 path.
|
||||
int adb_mkdir(const std::string& path, int mode) {
|
||||
return _wmkdir(widen(path.c_str()).c_str());
|
||||
std::wstring path_wide;
|
||||
if (!android::base::UTF8ToWide(path, &path_wide)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return _wmkdir(path_wide.c_str());
|
||||
}
|
||||
|
||||
// Version of utime() that takes a UTF-8 path.
|
||||
int adb_utime(const char* path, struct utimbuf* u) {
|
||||
std::wstring path_wide;
|
||||
if (!android::base::UTF8ToWide(path, &path_wide)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
static_assert(sizeof(struct utimbuf) == sizeof(struct _utimbuf),
|
||||
"utimbuf and _utimbuf should be the same size because they both "
|
||||
"contain the same types, namely time_t");
|
||||
return _wutime(widen(path).c_str(), reinterpret_cast<struct _utimbuf*>(u));
|
||||
return _wutime(path_wide.c_str(), reinterpret_cast<struct _utimbuf*>(u));
|
||||
}
|
||||
|
||||
// Version of chmod() that takes a UTF-8 path.
|
||||
int adb_chmod(const char* path, int mode) {
|
||||
return _wchmod(widen(path).c_str(), mode);
|
||||
std::wstring path_wide;
|
||||
if (!android::base::UTF8ToWide(path, &path_wide)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return _wchmod(path_wide.c_str(), mode);
|
||||
}
|
||||
|
||||
// Internal helper function to write UTF-8 bytes to a console. Returns -1
|
||||
|
@ -3819,8 +3789,18 @@ size_t adb_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream) {
|
|||
|
||||
// Version of fopen() that takes a UTF-8 filename and can access a file with
|
||||
// a Unicode filename.
|
||||
FILE* adb_fopen(const char* f, const char* m) {
|
||||
return _wfopen(widen(f).c_str(), widen(m).c_str());
|
||||
FILE* adb_fopen(const char* path, const char* mode) {
|
||||
std::wstring path_wide;
|
||||
if (!android::base::UTF8ToWide(path, &path_wide)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::wstring mode_wide;
|
||||
if (!android::base::UTF8ToWide(mode, &mode_wide)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return _wfopen(path_wide.c_str(), mode_wide.c_str());
|
||||
}
|
||||
|
||||
// Return a lowercase version of the argument. Uses C Runtime tolower() on
|
||||
|
@ -3880,15 +3860,27 @@ static void _ensure_env_setup() {
|
|||
continue;
|
||||
}
|
||||
|
||||
// If we encounter an error converting UTF-16, don't error-out on account of a single env
|
||||
// var because the program might never even read this particular variable.
|
||||
std::string name_utf8;
|
||||
if (!android::base::WideToUTF8(*env, equal - *env, &name_utf8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Store lowercase name so that we can do case-insensitive searches.
|
||||
const std::string name_utf8(ToLower(narrow(
|
||||
std::wstring(*env, equal - *env))));
|
||||
char* const value_utf8 = strdup(narrow(equal + 1).c_str());
|
||||
name_utf8 = ToLower(name_utf8);
|
||||
|
||||
std::string value_utf8;
|
||||
if (!android::base::WideToUTF8(equal + 1, &value_utf8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
char* const value_dup = strdup(value_utf8.c_str());
|
||||
|
||||
// Don't overwrite a previus env var with the same name. In reality,
|
||||
// the system probably won't let two env vars with the same name exist
|
||||
// in _wenviron.
|
||||
g_environ_utf8.insert({name_utf8, value_utf8});
|
||||
g_environ_utf8.insert({name_utf8, value_dup});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3914,10 +3906,15 @@ char* adb_getcwd(char* buf, int size) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
const std::string buf_utf8(narrow(wbuf));
|
||||
std::string buf_utf8;
|
||||
const bool narrow_result = android::base::WideToUTF8(wbuf, &buf_utf8);
|
||||
free(wbuf);
|
||||
wbuf = nullptr;
|
||||
|
||||
if (!narrow_result) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// If size was specified, make sure all the chars will fit.
|
||||
if (size != 0) {
|
||||
if (size < static_cast<int>(buf_utf8.length() + 1)) {
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
|
||||
#ifdef _WIN32
|
||||
#include <string>
|
||||
#else
|
||||
// Bring in prototypes for standard APIs so that we can import them into the utf8 namespace.
|
||||
#include <fcntl.h> // open
|
||||
#include <unistd.h> // unlink
|
||||
#endif
|
||||
|
||||
namespace android {
|
||||
|
|
|
@ -27,6 +27,18 @@
|
|||
namespace android {
|
||||
namespace base {
|
||||
|
||||
// Helper to set errno based on GetLastError() after WideCharToMultiByte()/MultiByteToWideChar().
|
||||
static void SetErrnoFromLastError() {
|
||||
switch (GetLastError()) {
|
||||
case ERROR_NO_UNICODE_TRANSLATION:
|
||||
errno = EILSEQ;
|
||||
break;
|
||||
default:
|
||||
errno = EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
|
||||
utf8->clear();
|
||||
|
||||
|
@ -49,6 +61,7 @@ bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
|
|||
const int chars_required = WideCharToMultiByte(CP_UTF8, flags, utf16, size,
|
||||
NULL, 0, NULL, NULL);
|
||||
if (chars_required <= 0) {
|
||||
SetErrnoFromLastError();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -59,6 +72,7 @@ bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
|
|||
&(*utf8)[0], chars_required, NULL,
|
||||
NULL);
|
||||
if (result != chars_required) {
|
||||
SetErrnoFromLastError();
|
||||
CHECK_LE(result, chars_required) << "WideCharToMultiByte wrote " << result
|
||||
<< " chars to buffer of " << chars_required << " chars";
|
||||
utf8->clear();
|
||||
|
@ -80,8 +94,8 @@ bool WideToUTF8(const std::wstring& utf16, std::string* utf8) {
|
|||
}
|
||||
|
||||
// Internal helper function that takes MultiByteToWideChar() flags.
|
||||
static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
|
||||
std::wstring* utf16, const DWORD flags) {
|
||||
static bool UTF8ToWideWithFlags(const char* utf8, const size_t size, std::wstring* utf16,
|
||||
const DWORD flags) {
|
||||
utf16->clear();
|
||||
|
||||
if (size == 0) {
|
||||
|
@ -93,6 +107,7 @@ static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
|
|||
const int chars_required = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
|
||||
NULL, 0);
|
||||
if (chars_required <= 0) {
|
||||
SetErrnoFromLastError();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -102,6 +117,7 @@ static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
|
|||
const int result = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
|
||||
&(*utf16)[0], chars_required);
|
||||
if (result != chars_required) {
|
||||
SetErrnoFromLastError();
|
||||
CHECK_LE(result, chars_required) << "MultiByteToWideChar wrote " << result
|
||||
<< " chars to buffer of " << chars_required << " chars";
|
||||
utf16->clear();
|
||||
|
@ -113,13 +129,16 @@ static bool _UTF8ToWideWithFlags(const char* utf8, const size_t size,
|
|||
|
||||
bool UTF8ToWide(const char* utf8, const size_t size, std::wstring* utf16) {
|
||||
// If strictly interpreting as UTF-8 succeeds, return success.
|
||||
if (_UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
|
||||
if (UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const int saved_errno = errno;
|
||||
|
||||
// Fallback to non-strict interpretation, allowing invalid characters and
|
||||
// converting as best as possible, and return false to signify a problem.
|
||||
(void)_UTF8ToWideWithFlags(utf8, size, utf16, 0);
|
||||
(void)UTF8ToWideWithFlags(utf8, size, utf16, 0);
|
||||
errno = saved_errno;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -140,7 +159,6 @@ namespace utf8 {
|
|||
int open(const char* name, int flags, ...) {
|
||||
std::wstring name_utf16;
|
||||
if (!UTF8ToWide(name, &name_utf16)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -158,7 +176,6 @@ int open(const char* name, int flags, ...) {
|
|||
int unlink(const char* name) {
|
||||
std::wstring name_utf16;
|
||||
if (!UTF8ToWide(name, &name_utf16)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -26,12 +26,16 @@ namespace base {
|
|||
TEST(UTFStringConversionsTest, ConvertInvalidUTF8) {
|
||||
std::wstring wide;
|
||||
|
||||
errno = 0;
|
||||
|
||||
// Standalone \xa2 is an invalid UTF-8 sequence, so this should return an
|
||||
// error. Concatenate two C/C++ literal string constants to prevent the
|
||||
// compiler from giving an error about "\xa2af" containing a "hex escape
|
||||
// sequence out of range".
|
||||
EXPECT_FALSE(android::base::UTF8ToWide("before\xa2" "after", &wide));
|
||||
|
||||
EXPECT_EQ(EILSEQ, errno);
|
||||
|
||||
// Even if an invalid character is encountered, UTF8ToWide() should still do
|
||||
// its best to convert the rest of the string. sysdeps_win32.cpp:
|
||||
// _console_write_utf8() depends on this behavior.
|
||||
|
@ -161,6 +165,7 @@ TEST(UTFStringConversionsTest, ConvertUTF8ToWide) {
|
|||
|
||||
for (size_t i = 0; i < arraysize(convert_cases); i++) {
|
||||
std::wstring converted;
|
||||
errno = 0;
|
||||
const bool success = UTF8ToWide(convert_cases[i].utf8,
|
||||
strlen(convert_cases[i].utf8),
|
||||
&converted);
|
||||
|
@ -171,6 +176,8 @@ TEST(UTFStringConversionsTest, ConvertUTF8ToWide) {
|
|||
if (success) {
|
||||
std::wstring expected(convert_cases[i].wide);
|
||||
EXPECT_EQ(expected, converted);
|
||||
} else {
|
||||
EXPECT_EQ(EILSEQ, errno);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -227,6 +234,7 @@ TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) {
|
|||
|
||||
for (size_t i = 0; i < arraysize(convert_cases); i++) {
|
||||
std::string converted;
|
||||
errno = 0;
|
||||
const bool success = WideToUTF8(convert_cases[i].utf16,
|
||||
wcslen(convert_cases[i].utf16),
|
||||
&converted);
|
||||
|
@ -237,6 +245,8 @@ TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) {
|
|||
if (success) {
|
||||
std::string expected(convert_cases[i].utf8);
|
||||
EXPECT_EQ(expected, converted);
|
||||
} else {
|
||||
EXPECT_EQ(EILSEQ, errno);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue