Microoptimize the strtol() family.
The main change here is to remember that we arm64/x86-64 have flags, and __builtin_<op>_overflow() lets us look at them. There's a clear saving for arm64, and x86-64 is better too, though interestingly there the unsigned case doesn't work out as well as the signed case because both unsigned __builtin_mul_overflow and unsigned __builtin_add_overflow are less efficient than the signed multiply and subtract on that architecture, but the new code still beats the old code even so. There's a very tiny microoptimization of the hex path that takes advantage of the fact that conversion to lowercase is a single instruction on all our architectures when we already know we're dealing with a letter. This also merges the signed and unsigned variants of the code. Not entirely successfully, but the vast majority of the code benefits. Before (arm64): ``` ---------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------- BM_inttypes_strtoimax 44.6 ns 44.3 ns 15807654 BM_inttypes_strtoumax 43.1 ns 42.8 ns 16348848 BM_stdlib_strtol 44.6 ns 44.3 ns 15805384 BM_stdlib_strtol_hex 85.5 ns 85.0 ns 8235487 BM_stdlib_strtoll 44.5 ns 44.2 ns 15833137 BM_stdlib_strtoul 43.1 ns 42.8 ns 16353963 BM_stdlib_strtoul_hex 83.1 ns 82.6 ns 8477732 BM_stdlib_strtoull 43.1 ns 42.8 ns 16353015 ``` After (arm64): ``` ---------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------- BM_inttypes_strtoimax 37.9 ns 37.6 ns 17657577 BM_inttypes_strtoumax 35.9 ns 35.7 ns 19597727 BM_stdlib_strtol 36.9 ns 36.7 ns 19093037 BM_stdlib_strtol_hex 70.7 ns 70.3 ns 9961626 BM_stdlib_strtoll 36.9 ns 36.7 ns 19093032 BM_stdlib_strtoul 35.9 ns 35.7 ns 19617784 BM_stdlib_strtoul_hex 67.7 ns 67.3 ns 10113521 BM_stdlib_strtoull 35.9 ns 35.7 ns 19621828 ``` Test: treehugger Change-Id: Ibf53b29e34d63ac31520c6d27ef80ff281899d61
This commit is contained in:
parent
14b322b251
commit
ca89b33108
1 changed files with 57 additions and 110 deletions
|
@ -35,141 +35,88 @@
|
|||
#include <wchar.h>
|
||||
|
||||
template <typename T, T Min, T Max, typename CharT>
|
||||
T StrToI(const CharT* nptr, CharT** endptr, int base) {
|
||||
__attribute__((always_inline)) T StrToI(const CharT* s, CharT** end_ptr, int base) {
|
||||
// Ensure that base is between 2 and 36 inclusive, or the special value of 0.
|
||||
if (base < 0 || base == 1 || base > 36) {
|
||||
if (endptr != nullptr) *endptr = const_cast<CharT*>(nptr);
|
||||
if (end_ptr != nullptr) *end_ptr = const_cast<CharT*>(s);
|
||||
errno = EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Skip white space and pick up leading +/- sign if any.
|
||||
// If base is 0, allow 0x for hex and 0 for octal, else
|
||||
// assume decimal; if base is already 16, allow 0x.
|
||||
const CharT* s = nptr;
|
||||
const CharT* p = s;
|
||||
int c;
|
||||
do {
|
||||
c = *s++;
|
||||
} while (isspace(c));
|
||||
int neg;
|
||||
if (c == '-') {
|
||||
neg = 1;
|
||||
c = *s++;
|
||||
} else {
|
||||
neg = 0;
|
||||
if (c == '+') c = *s++;
|
||||
while (isspace(c = *p++)) {
|
||||
}
|
||||
if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X') && isxdigit(s[1])) {
|
||||
c = s[1];
|
||||
s += 2;
|
||||
bool neg = false;
|
||||
if (c == '-') {
|
||||
neg = true;
|
||||
c = *p++;
|
||||
} else if (c == '+') {
|
||||
c = *p++;
|
||||
}
|
||||
|
||||
// If base is 0 or 16, allow "0x" prefix for hex.
|
||||
if ((base == 0 || base == 16) && c == '0' && (*p == 'x' || *p == 'X') && isxdigit(p[1])) {
|
||||
c = p[1];
|
||||
p += 2;
|
||||
base = 16;
|
||||
}
|
||||
if ((base == 0 || base == 2) && c == '0' && (*s == 'b' || *s == 'B') && isdigit(s[1])) {
|
||||
c = s[1];
|
||||
s += 2;
|
||||
// If base is 0 or 2, allow "0b" prefix for binary.
|
||||
if ((base == 0 || base == 2) && c == '0' && (*p == 'b' || *p == 'B') && isdigit(p[1])) {
|
||||
c = p[1];
|
||||
p += 2;
|
||||
base = 2;
|
||||
}
|
||||
// If base is 0, allow "0" prefix for octal, otherwise base is 10.
|
||||
if (base == 0) base = (c == '0') ? 8 : 10;
|
||||
|
||||
// We always work in the negative space because the most negative value has a
|
||||
// larger magnitude than the most positive value.
|
||||
T cutoff = Min / base;
|
||||
int cutlim = -(Min % base);
|
||||
constexpr bool is_signed = (Min != 0);
|
||||
T acc = 0;
|
||||
// Non-zero if any digits consumed; negative to indicate overflow/underflow.
|
||||
int any = 0;
|
||||
T acc = 0;
|
||||
for (; ; c = *s++) {
|
||||
for (;; c = *p++) {
|
||||
if (isdigit(c)) {
|
||||
c -= '0';
|
||||
} else if (isalpha(c)) {
|
||||
c -= isupper(c) ? 'A' - 10 : 'a' - 10;
|
||||
c = 10 + (_tolower(c) - 'a');
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
if (c >= base) break;
|
||||
if (any < 0) continue;
|
||||
if (acc < cutoff || (acc == cutoff && c > cutlim)) {
|
||||
any = -1;
|
||||
acc = Min;
|
||||
errno = ERANGE;
|
||||
if (is_signed) {
|
||||
// We work in the negative space because the most negative value has a
|
||||
// larger magnitude than the most positive value.
|
||||
if (__builtin_mul_overflow(acc, base, &acc) || __builtin_sub_overflow(acc, c, &acc)) {
|
||||
any = -1;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
any = 1;
|
||||
acc *= base;
|
||||
acc -= c;
|
||||
if (__builtin_mul_overflow(acc, base, &acc) || __builtin_add_overflow(acc, c, &acc)) {
|
||||
any = -1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (endptr != nullptr) *endptr = const_cast<CharT*>(any ? s - 1 : nptr);
|
||||
if (!neg) {
|
||||
if (acc == Min) {
|
||||
errno = ERANGE;
|
||||
acc = Max;
|
||||
} else {
|
||||
acc = -acc;
|
||||
}
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
template <typename T, T Max, typename CharT>
|
||||
T StrToU(const CharT* nptr, CharT** endptr, int base) {
|
||||
if (base < 0 || base == 1 || base > 36) {
|
||||
if (endptr != nullptr) *endptr = const_cast<CharT*>(nptr);
|
||||
errno = EINVAL;
|
||||
return 0;
|
||||
any = 1;
|
||||
}
|
||||
|
||||
const CharT* s = nptr;
|
||||
int c;
|
||||
do {
|
||||
c = *s++;
|
||||
} while (isspace(c));
|
||||
int neg;
|
||||
if (c == '-') {
|
||||
neg = 1;
|
||||
c = *s++;
|
||||
} else {
|
||||
neg = 0;
|
||||
if (c == '+') c = *s++;
|
||||
}
|
||||
if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X') && isxdigit(s[1])) {
|
||||
c = s[1];
|
||||
s += 2;
|
||||
base = 16;
|
||||
}
|
||||
if ((base == 0 || base == 2) && c == '0' && (*s == 'b' || *s == 'B') && isdigit(s[1])) {
|
||||
c = s[1];
|
||||
s += 2;
|
||||
base = 2;
|
||||
}
|
||||
if (base == 0) base = (c == '0') ? 8 : 10;
|
||||
if (end_ptr != nullptr) *end_ptr = const_cast<CharT*>(any ? p - 1 : s);
|
||||
|
||||
T cutoff = Max / static_cast<T>(base);
|
||||
int cutlim = Max % static_cast<T>(base);
|
||||
T acc = 0;
|
||||
int any = 0;
|
||||
for (; ; c = *s++) {
|
||||
if (isdigit(c)) {
|
||||
c -= '0';
|
||||
} else if (isalpha(c)) {
|
||||
c -= isupper(c) ? 'A' - 10 : 'a' - 10;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
if (c >= base) break;
|
||||
if (any < 0) continue;
|
||||
if (acc > cutoff || (acc == cutoff && c > cutlim)) {
|
||||
any = -1;
|
||||
acc = Max;
|
||||
errno = ERANGE;
|
||||
} else {
|
||||
any = 1;
|
||||
acc *= base;
|
||||
acc += c;
|
||||
}
|
||||
// Detected overflow/underflow in the loop?
|
||||
if (any == -1) {
|
||||
errno = ERANGE;
|
||||
return (is_signed && neg) ? Min : Max;
|
||||
}
|
||||
if (neg && any > 0) acc = -acc;
|
||||
if (endptr != nullptr) *endptr = const_cast<CharT*>(any ? s - 1 : nptr);
|
||||
return acc;
|
||||
|
||||
// Will we overflow by trying to negate the most negative value?
|
||||
if (any > 0 && is_signed && !neg && acc == Min) {
|
||||
errno = ERANGE;
|
||||
return Max;
|
||||
}
|
||||
|
||||
if (is_signed) return neg ? acc : -acc;
|
||||
return neg ? -acc : acc;
|
||||
}
|
||||
|
||||
int atoi(const char* s) {
|
||||
|
@ -209,25 +156,25 @@ long long wcstoll(const wchar_t* s, wchar_t** end, int base) {
|
|||
}
|
||||
|
||||
unsigned long strtoul(const char* s, char** end, int base) {
|
||||
return StrToU<unsigned long, ULONG_MAX, char>(s, end, base);
|
||||
return StrToI<unsigned long, 0, ULONG_MAX, char>(s, end, base);
|
||||
}
|
||||
|
||||
unsigned long wcstoul(const wchar_t* s, wchar_t** end, int base) {
|
||||
return StrToU<unsigned long, ULONG_MAX, wchar_t>(s, end, base);
|
||||
return StrToI<unsigned long, 0, ULONG_MAX, wchar_t>(s, end, base);
|
||||
}
|
||||
|
||||
unsigned long long strtoull(const char* s, char** end, int base) {
|
||||
return StrToU<unsigned long long, ULLONG_MAX, char>(s, end, base);
|
||||
return StrToI<unsigned long long, 0, ULLONG_MAX, char>(s, end, base);
|
||||
}
|
||||
|
||||
unsigned long long wcstoull(const wchar_t* s, wchar_t** end, int base) {
|
||||
return StrToU<unsigned long long, ULLONG_MAX, wchar_t>(s, end, base);
|
||||
return StrToI<unsigned long long, 0, ULLONG_MAX, wchar_t>(s, end, base);
|
||||
}
|
||||
|
||||
uintmax_t strtoumax(const char* s, char** end, int base) {
|
||||
return StrToU<uintmax_t, UINTMAX_MAX, char>(s, end, base);
|
||||
return StrToI<uintmax_t, 0, UINTMAX_MAX, char>(s, end, base);
|
||||
}
|
||||
|
||||
uintmax_t wcstoumax(const wchar_t* s, wchar_t** end, int base) {
|
||||
return StrToU<uintmax_t, UINTMAX_MAX, wchar_t>(s, end, base);
|
||||
return StrToI<uintmax_t, 0, UINTMAX_MAX, wchar_t>(s, end, base);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue