5a0aa3dee2
Although glibc gets by with an 8-byte mbstate_t, OpenBSD uses 12 bytes (of the 128 bytes it reserves!). We can actually implement UTF-8 encoding/decoding with a 0-byte mbstate_t which means we can make things work on LP32 too, as long as we accept the limitation that the caller needs to present us with a complete sequence before we'll process it. Our behavior is fine when going from characters to bytes; we just update the source wchar_t** to say how far through the input we got. I'll come back and use the 4 bytes we do have to cope with byte sequences split across multiple input buffers. The fact that we don't support UTF-8 sequences longer than 4 bytes plus the fact that the first byte of a UTF-8 sequence encodes the length means we shouldn't need the other fields OpenBSD used (at the cost of some recomputation in cases where a sequence is split across buffers). This patch also makes the minimal changes necessary to setlocale(3) to make us behave like glibc when an app requests UTF-8. (The difference being that our "C" locale is the same as our "C.UTF-8" locale.) Change-Id: Ied327a8c4643744b3611bf6bb005a9b389ba4c2f
155 lines
4.8 KiB
C++
155 lines
4.8 KiB
C++
/*
|
|
* Copyright (C) 2008 The Android Open Source Project
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <locale.h>
|
|
#include <pthread.h>
|
|
#include <stdlib.h>
|
|
|
|
// We currently support a single locale, the "C" locale (also known as "POSIX").
|
|
|
|
struct __locale_t {
|
|
// Because we only support one locale, these are just tokens with no data.
|
|
};
|
|
|
|
static pthread_once_t gLocaleOnce = PTHREAD_ONCE_INIT;
|
|
static lconv gLocale;
|
|
|
|
// We don't use pthread_once for this so that we know when the resource (a TLS slot) will be taken.
|
|
static pthread_key_t gUselocaleKey;
|
|
__attribute__((constructor)) static void __bionic_tls_uselocale_key_init() {
|
|
pthread_key_create(&gUselocaleKey, NULL);
|
|
}
|
|
|
|
static void __locale_init() {
|
|
gLocale.decimal_point = const_cast<char*>(".");
|
|
|
|
char* not_available = const_cast<char*>("");
|
|
gLocale.thousands_sep = not_available;
|
|
gLocale.grouping = not_available;
|
|
gLocale.int_curr_symbol = not_available;
|
|
gLocale.currency_symbol = not_available;
|
|
gLocale.mon_decimal_point = not_available;
|
|
gLocale.mon_thousands_sep = not_available;
|
|
gLocale.mon_grouping = not_available;
|
|
gLocale.positive_sign = not_available;
|
|
gLocale.negative_sign = not_available;
|
|
|
|
gLocale.int_frac_digits = CHAR_MAX;
|
|
gLocale.frac_digits = CHAR_MAX;
|
|
gLocale.p_cs_precedes = CHAR_MAX;
|
|
gLocale.p_sep_by_space = CHAR_MAX;
|
|
gLocale.n_cs_precedes = CHAR_MAX;
|
|
gLocale.n_sep_by_space = CHAR_MAX;
|
|
gLocale.p_sign_posn = CHAR_MAX;
|
|
gLocale.n_sign_posn = CHAR_MAX;
|
|
gLocale.int_p_cs_precedes = CHAR_MAX;
|
|
gLocale.int_p_sep_by_space = CHAR_MAX;
|
|
gLocale.int_n_cs_precedes = CHAR_MAX;
|
|
gLocale.int_n_sep_by_space = CHAR_MAX;
|
|
gLocale.int_p_sign_posn = CHAR_MAX;
|
|
gLocale.int_n_sign_posn = CHAR_MAX;
|
|
}
|
|
|
|
static bool __bionic_current_locale_is_utf8 = false;
|
|
|
|
static bool __is_supported_locale(const char* locale) {
|
|
return (strcmp(locale, "") == 0 ||
|
|
strcmp(locale, "C") == 0 || strcmp(locale, "C.UTF-8") == 0 ||
|
|
strcmp(locale, "POSIX") == 0);
|
|
}
|
|
|
|
static locale_t __new_locale() {
|
|
return reinterpret_cast<locale_t>(malloc(sizeof(__locale_t)));
|
|
}
|
|
|
|
lconv* localeconv() {
|
|
pthread_once(&gLocaleOnce, __locale_init);
|
|
return &gLocale;
|
|
}
|
|
|
|
locale_t duplocale(locale_t l) {
|
|
locale_t clone = __new_locale();
|
|
if (clone != NULL && l != LC_GLOBAL_LOCALE) {
|
|
*clone = *l;
|
|
}
|
|
return clone;
|
|
}
|
|
|
|
void freelocale(locale_t l) {
|
|
free(l);
|
|
}
|
|
|
|
locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/) {
|
|
// Is 'category_mask' valid?
|
|
if ((category_mask & ~LC_ALL_MASK) != 0) {
|
|
errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
if (!__is_supported_locale(locale_name)) {
|
|
errno = ENOENT;
|
|
return NULL;
|
|
}
|
|
|
|
return __new_locale();
|
|
}
|
|
|
|
char* setlocale(int category, const char* locale_name) {
|
|
// Is 'category' valid?
|
|
if (category < LC_CTYPE || category > LC_IDENTIFICATION) {
|
|
errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
// Caller wants to set the locale rather than just query?
|
|
if (locale_name != NULL) {
|
|
if (!__is_supported_locale(locale_name)) {
|
|
// We don't support this locale.
|
|
errno = ENOENT;
|
|
return NULL;
|
|
}
|
|
__bionic_current_locale_is_utf8 = (strstr(locale_name, "UTF-8") != NULL);
|
|
}
|
|
|
|
return const_cast<char*>(__bionic_current_locale_is_utf8 ? "C.UTF-8" : "C");
|
|
}
|
|
|
|
locale_t uselocale(locale_t new_locale) {
|
|
locale_t old_locale = static_cast<locale_t>(pthread_getspecific(gUselocaleKey));
|
|
|
|
// If this is the first call to uselocale(3) on this thread, we return LC_GLOBAL_LOCALE.
|
|
if (old_locale == NULL) {
|
|
old_locale = LC_GLOBAL_LOCALE;
|
|
}
|
|
|
|
if (new_locale != NULL) {
|
|
pthread_setspecific(gUselocaleKey, new_locale);
|
|
}
|
|
|
|
return old_locale;
|
|
}
|