<ctype.h>: stop using _ctype_
.
The code comment that's being removed here defends the old implementation by claiming that it's faster. Annoyingly, we don't know what hardware that was run on. Running on current-ish hardware (cheetah), I can't really tell the difference except: (a) for hwasan, avoiding the unsafe memory access by _not_ using the array is a huge win, and (b) even for arm32 the logic is (very slightly) faster than the array lookup. So let's get rid of the unsafety (as musl and FreeBSD have already done) and the large hwasan slowdown (10ns vs 2ns). It's possible in-order cores might still care, but it's 2023 and it's time to move on. This change _does not_ remove `_ctype_` and associated macros from the headers, though we might want to come back and do that. Historically libc++ used these implementation details directly, but that's no longer the case, and it seems unlikely that anyone else is, and today's results suggest they probably shouldn't anyway, and doing so only ever really made sense for something like ISO-Latin-1 anyway. Most ASCII tests are _always_ better off inlined, and Android's never supported non-ASCII for <ctype.h> anyway (use the isw*() functions if you want that, but bear in mind that if you're actually dealing with human languages, you probably want icu4c rather than libc anyway). Test: treehugger & benchmarks Change-Id: Ifac25c23ac33e996a3c726317b5c6e602dc72e30
This commit is contained in:
parent
f89da9b996
commit
69270ed6b9
1 changed files with 10 additions and 18 deletions
|
@ -73,15 +73,6 @@ __BEGIN_DECLS
|
|||
/** Internal implementation detail. Do not use. */
|
||||
extern const char* _ctype_;
|
||||
|
||||
/** Returns true if `ch` is in `[A-Za-z0-9]`. */
|
||||
__BIONIC_CTYPE_INLINE int isalnum(int __ch) {
|
||||
// `isalnum(c)` is `isalpha(c) || isdigit(c)`, but there's no obvious way
|
||||
// to simplify that, and the table lookup is just slightly faster...
|
||||
// Note that this is unsafe for inputs less than -1 (EOF) or greater than
|
||||
// 0xff. This is true of other C libraries too.
|
||||
return (_ctype_[__ch + 1] & (_CTYPE_U|_CTYPE_L|_CTYPE_N));
|
||||
}
|
||||
|
||||
/** Returns true if `ch` is in `[A-Za-z]`. */
|
||||
__BIONIC_CTYPE_INLINE int isalpha(int __ch) {
|
||||
return (__ch >= 'A' && __ch <= 'Z') || (__ch >= 'a' && __ch <= 'z');
|
||||
|
@ -117,15 +108,6 @@ __BIONIC_CTYPE_INLINE int isprint(int __ch) {
|
|||
return (__ch >= ' ' && __ch <= '~');
|
||||
}
|
||||
|
||||
/** Returns true if `ch` is punctuation. */
|
||||
__BIONIC_CTYPE_INLINE int ispunct(int __ch) {
|
||||
// `ispunct(c)` is `isgraph(c) && !isalnum(c)`, but there's no obvious way
|
||||
// to simplify that, and the table lookup is just slightly faster...
|
||||
// Note that this is unsafe for inputs less than -1 (EOF) or greater than
|
||||
// 0xff. This is true of other C libraries too.
|
||||
return (_ctype_[__ch + 1] & _CTYPE_P);
|
||||
}
|
||||
|
||||
/** Returns true if `ch` is in `[ \f\n\r\t\v]`. */
|
||||
__BIONIC_CTYPE_INLINE int isspace(int __ch) {
|
||||
return __ch == ' ' || (__ch >= '\t' && __ch <= '\r');
|
||||
|
@ -141,6 +123,16 @@ __BIONIC_CTYPE_INLINE int isxdigit(int __ch) {
|
|||
return (__ch >= '0' && __ch <= '9') || (__ch >= 'a' && __ch <= 'f') || (__ch >= 'A' && __ch <= 'F');
|
||||
}
|
||||
|
||||
/** Returns true if `ch` is in `[A-Za-z0-9]`. */
|
||||
__BIONIC_CTYPE_INLINE int isalnum(int __ch) {
|
||||
return isalpha(__ch) || isdigit(__ch);
|
||||
}
|
||||
|
||||
/** Returns true if `ch` is punctuation. */
|
||||
__BIONIC_CTYPE_INLINE int ispunct(int __ch) {
|
||||
return isgraph(__ch) && !isalnum(__ch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the corresponding lower-case character if `ch` is upper-case, or undefined otherwise.
|
||||
*
|
||||
|
|
Loading…
Reference in a new issue