Microoptimize the strtol() family.

The main change here is to remember that we arm64/x86-64 have flags, and
__builtin_<op>_overflow() lets us look at them. There's a clear saving
for arm64, and x86-64 is better too, though interestingly there the
unsigned case doesn't work out as well as the signed case because both
unsigned __builtin_mul_overflow and unsigned __builtin_add_overflow are
less efficient than the signed multiply and subtract on that
architecture, but the new code still beats the old code even so.

There's a very tiny microoptimization of the hex path that takes
advantage of the fact that conversion to lowercase is a single
instruction on all our architectures when we already know we're dealing
with a letter.

This also merges the signed and unsigned variants of the code. Not
entirely successfully, but the vast majority of the code benefits.

Before (arm64):
```
----------------------------------------------------------------
Benchmark                      Time             CPU   Iterations
----------------------------------------------------------------
BM_inttypes_strtoimax       44.6 ns         44.3 ns     15807654
BM_inttypes_strtoumax       43.1 ns         42.8 ns     16348848
BM_stdlib_strtol            44.6 ns         44.3 ns     15805384
BM_stdlib_strtol_hex        85.5 ns         85.0 ns      8235487
BM_stdlib_strtoll           44.5 ns         44.2 ns     15833137
BM_stdlib_strtoul           43.1 ns         42.8 ns     16353963
BM_stdlib_strtoul_hex       83.1 ns         82.6 ns      8477732
BM_stdlib_strtoull          43.1 ns         42.8 ns     16353015
```

After (arm64):
```
----------------------------------------------------------------
Benchmark                      Time             CPU   Iterations
----------------------------------------------------------------
BM_inttypes_strtoimax       37.9 ns         37.6 ns     17657577
BM_inttypes_strtoumax       35.9 ns         35.7 ns     19597727
BM_stdlib_strtol            36.9 ns         36.7 ns     19093037
BM_stdlib_strtol_hex        70.7 ns         70.3 ns      9961626
BM_stdlib_strtoll           36.9 ns         36.7 ns     19093032
BM_stdlib_strtoul           35.9 ns         35.7 ns     19617784
BM_stdlib_strtoul_hex       67.7 ns         67.3 ns     10113521
BM_stdlib_strtoull          35.9 ns         35.7 ns     19621828
```

Test: treehugger
Change-Id: Ibf53b29e34d63ac31520c6d27ef80ff281899d61
This commit is contained in:
Elliott Hughes 2023-12-12 14:05:03 -08:00
parent 14b322b251
commit ca89b33108

View file

@ -35,141 +35,88 @@
#include <wchar.h>
template <typename T, T Min, T Max, typename CharT>
T StrToI(const CharT* nptr, CharT** endptr, int base) {
__attribute__((always_inline)) T StrToI(const CharT* s, CharT** end_ptr, int base) {
// Ensure that base is between 2 and 36 inclusive, or the special value of 0.
if (base < 0 || base == 1 || base > 36) {
if (endptr != nullptr) *endptr = const_cast<CharT*>(nptr);
if (end_ptr != nullptr) *end_ptr = const_cast<CharT*>(s);
errno = EINVAL;
return 0;
}
// Skip white space and pick up leading +/- sign if any.
// If base is 0, allow 0x for hex and 0 for octal, else
// assume decimal; if base is already 16, allow 0x.
const CharT* s = nptr;
const CharT* p = s;
int c;
do {
c = *s++;
} while (isspace(c));
int neg;
if (c == '-') {
neg = 1;
c = *s++;
} else {
neg = 0;
if (c == '+') c = *s++;
while (isspace(c = *p++)) {
}
if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X') && isxdigit(s[1])) {
c = s[1];
s += 2;
bool neg = false;
if (c == '-') {
neg = true;
c = *p++;
} else if (c == '+') {
c = *p++;
}
// If base is 0 or 16, allow "0x" prefix for hex.
if ((base == 0 || base == 16) && c == '0' && (*p == 'x' || *p == 'X') && isxdigit(p[1])) {
c = p[1];
p += 2;
base = 16;
}
if ((base == 0 || base == 2) && c == '0' && (*s == 'b' || *s == 'B') && isdigit(s[1])) {
c = s[1];
s += 2;
// If base is 0 or 2, allow "0b" prefix for binary.
if ((base == 0 || base == 2) && c == '0' && (*p == 'b' || *p == 'B') && isdigit(p[1])) {
c = p[1];
p += 2;
base = 2;
}
// If base is 0, allow "0" prefix for octal, otherwise base is 10.
if (base == 0) base = (c == '0') ? 8 : 10;
// We always work in the negative space because the most negative value has a
// larger magnitude than the most positive value.
T cutoff = Min / base;
int cutlim = -(Min % base);
constexpr bool is_signed = (Min != 0);
T acc = 0;
// Non-zero if any digits consumed; negative to indicate overflow/underflow.
int any = 0;
T acc = 0;
for (; ; c = *s++) {
for (;; c = *p++) {
if (isdigit(c)) {
c -= '0';
} else if (isalpha(c)) {
c -= isupper(c) ? 'A' - 10 : 'a' - 10;
c = 10 + (_tolower(c) - 'a');
} else {
break;
}
if (c >= base) break;
if (any < 0) continue;
if (acc < cutoff || (acc == cutoff && c > cutlim)) {
any = -1;
acc = Min;
errno = ERANGE;
if (is_signed) {
// We work in the negative space because the most negative value has a
// larger magnitude than the most positive value.
if (__builtin_mul_overflow(acc, base, &acc) || __builtin_sub_overflow(acc, c, &acc)) {
any = -1;
continue;
}
} else {
any = 1;
acc *= base;
acc -= c;
if (__builtin_mul_overflow(acc, base, &acc) || __builtin_add_overflow(acc, c, &acc)) {
any = -1;
continue;
}
}
}
if (endptr != nullptr) *endptr = const_cast<CharT*>(any ? s - 1 : nptr);
if (!neg) {
if (acc == Min) {
errno = ERANGE;
acc = Max;
} else {
acc = -acc;
}
}
return acc;
}
template <typename T, T Max, typename CharT>
T StrToU(const CharT* nptr, CharT** endptr, int base) {
if (base < 0 || base == 1 || base > 36) {
if (endptr != nullptr) *endptr = const_cast<CharT*>(nptr);
errno = EINVAL;
return 0;
any = 1;
}
const CharT* s = nptr;
int c;
do {
c = *s++;
} while (isspace(c));
int neg;
if (c == '-') {
neg = 1;
c = *s++;
} else {
neg = 0;
if (c == '+') c = *s++;
}
if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X') && isxdigit(s[1])) {
c = s[1];
s += 2;
base = 16;
}
if ((base == 0 || base == 2) && c == '0' && (*s == 'b' || *s == 'B') && isdigit(s[1])) {
c = s[1];
s += 2;
base = 2;
}
if (base == 0) base = (c == '0') ? 8 : 10;
if (end_ptr != nullptr) *end_ptr = const_cast<CharT*>(any ? p - 1 : s);
T cutoff = Max / static_cast<T>(base);
int cutlim = Max % static_cast<T>(base);
T acc = 0;
int any = 0;
for (; ; c = *s++) {
if (isdigit(c)) {
c -= '0';
} else if (isalpha(c)) {
c -= isupper(c) ? 'A' - 10 : 'a' - 10;
} else {
break;
}
if (c >= base) break;
if (any < 0) continue;
if (acc > cutoff || (acc == cutoff && c > cutlim)) {
any = -1;
acc = Max;
errno = ERANGE;
} else {
any = 1;
acc *= base;
acc += c;
}
// Detected overflow/underflow in the loop?
if (any == -1) {
errno = ERANGE;
return (is_signed && neg) ? Min : Max;
}
if (neg && any > 0) acc = -acc;
if (endptr != nullptr) *endptr = const_cast<CharT*>(any ? s - 1 : nptr);
return acc;
// Will we overflow by trying to negate the most negative value?
if (any > 0 && is_signed && !neg && acc == Min) {
errno = ERANGE;
return Max;
}
if (is_signed) return neg ? acc : -acc;
return neg ? -acc : acc;
}
int atoi(const char* s) {
@ -209,25 +156,25 @@ long long wcstoll(const wchar_t* s, wchar_t** end, int base) {
}
unsigned long strtoul(const char* s, char** end, int base) {
return StrToU<unsigned long, ULONG_MAX, char>(s, end, base);
return StrToI<unsigned long, 0, ULONG_MAX, char>(s, end, base);
}
unsigned long wcstoul(const wchar_t* s, wchar_t** end, int base) {
return StrToU<unsigned long, ULONG_MAX, wchar_t>(s, end, base);
return StrToI<unsigned long, 0, ULONG_MAX, wchar_t>(s, end, base);
}
unsigned long long strtoull(const char* s, char** end, int base) {
return StrToU<unsigned long long, ULLONG_MAX, char>(s, end, base);
return StrToI<unsigned long long, 0, ULLONG_MAX, char>(s, end, base);
}
unsigned long long wcstoull(const wchar_t* s, wchar_t** end, int base) {
return StrToU<unsigned long long, ULLONG_MAX, wchar_t>(s, end, base);
return StrToI<unsigned long long, 0, ULLONG_MAX, wchar_t>(s, end, base);
}
uintmax_t strtoumax(const char* s, char** end, int base) {
return StrToU<uintmax_t, UINTMAX_MAX, char>(s, end, base);
return StrToI<uintmax_t, 0, UINTMAX_MAX, char>(s, end, base);
}
uintmax_t wcstoumax(const wchar_t* s, wchar_t** end, int base) {
return StrToU<uintmax_t, UINTMAX_MAX, wchar_t>(s, end, base);
return StrToI<uintmax_t, 0, UINTMAX_MAX, wchar_t>(s, end, base);
}