platform_bionic/libc/bionic/vdso.cpp

140 lines
4.5 KiB
C++
Raw Normal View History

/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "private/bionic_globals.h"
#include "private/bionic_vdso.h"
#include <limits.h>
#include <link.h>
#include <string.h>
#include <sys/cdefs.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include "private/KernelArgumentBlock.h"
static inline int vdso_return(int result) {
if (__predict_true(result == 0)) return 0;
errno = -result;
return -1;
}
int clock_gettime(int clock_id, timespec* tp) {
auto vdso_clock_gettime = reinterpret_cast<decltype(&clock_gettime)>(
__libc_globals->vdso[VDSO_CLOCK_GETTIME].fn);
if (__predict_true(vdso_clock_gettime)) {
return vdso_return(vdso_clock_gettime(clock_id, tp));
}
return __clock_gettime(clock_id, tp);
}
bionic: add vdso clock_getres clock_getres() should not be a hot call, nevertheless it is ~6-7 times faster for supported clock ids if it uses __vdso_clock_getres if available. There is a 3% performance penalty for unsupported clock ids via __vdso_clock_getres with respect to a direct syscall. [TL;DR] w/vdso32 kernel patches, locked cores to MAX, little cores only. BEFORE: hikey960 vdso (aarch64): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_getres 126 ns 126 ns 5577874 BM_time_clock_getres_syscall 127 ns 127 ns 5505016 BM_time_clock_getres_REALTIME 126 ns 126 ns 5574682 BM_time_clock_getres_BOOTTIME 126 ns 126 ns 5575237 BM_time_clock_getres_TAI 126 ns 126 ns 5576810 BM_time_clock_getres_unsupported 128 ns 128 ns 5480189 hikey960 vdso32 (aarch32): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_getres 199 ns 199 ns 3508708 BM_time_clock_getres_syscall 220 ns 220 ns 3184676 BM_time_clock_getres_REALTIME 199 ns 199 ns 3509697 BM_time_clock_getres_BOOTTIME 199 ns 199 ns 3513551 BM_time_clock_getres_TAI 200 ns 199 ns 3512412 BM_time_clock_getres_unsupported 196 ns 196 ns 3575609 x86_64 (glibc): --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- BM_time_clock_getres 252 ns 252 ns 2370263 BM_time_clock_getres_syscall 215 ns 215 ns 3287497 BM_time_clock_getres_REALTIME 214 ns 214 ns 3294228 BM_time_clock_getres_BOOTTIME 213 ns 213 ns 3277519 BM_time_clock_getres_TAI 213 ns 213 ns 3294991 BM_time_clock_getres_unsupported 206 ns 206 ns 3450654 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): (Virtual Timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_getres 16 345 2000000 BM_time_clock_getres_syscall 16 339 2121212 BM_time_clock_getres_REALTIME 17 350 2058824 BM_time_clock_getres_BOOTTIME 17 345 2000000 BM_time_clock_getres_TAI 16 350 2000000 BM_time_clock_getres_unsupported 13 284 2500000 AFTER: hikey960 vdso (aarch64): --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- BM_time_clock_getres 18 ns 18 ns 37880389 BM_time_clock_getres_syscall 127 ns 127 ns 5520029 BM_time_clock_getres_REALTIME 18 ns 18 ns 37879962 BM_time_clock_getres_BOOTTIME 19 ns 18 ns 37878361 BM_time_clock_getres_TAI 131 ns 131 ns 5368484 BM_time_clock_getres_unsupported 97 ns 97 ns 7182864 hikey960 vdso32 (aarch32): --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- BM_time_clock_getres 36 ns 36 ns 19205240 BM_time_clock_getres_syscall 212 ns 212 ns 3297100 BM_time_clock_getres_REALTIME 36 ns 36 ns 19219109 BM_time_clock_getres_BOOTTIME 36 ns 36 ns 19222490 BM_time_clock_getres_TAI 206 ns 206 ns 3402868 BM_time_clock_getres_unsupported 159 ns 159 ns 4409492 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): (Physical Timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_getres 2 48 14000000 BM_time_clock_getres_syscall 14 335 2058824 BM_time_clock_getres_REALTIME 2 49 14583333 BM_time_clock_getres_BOOTTIME 2 48 14000000 BM_time_clock_getres_TAI 14 350 2058824 BM_time_clock_getres_unsupported 8 203 3500000 Test: taskset F \ /data/benchmarktest{64}/bionic-benchmarks/bionic-benchmarks \ --bionic_xml=vdso.xml --benchmark_filter=BM_time_clock_getres* Bug: 63737556 Change-Id: I80c0a5106625d76720287f715fcf145d2aad1705
2017-11-07 17:19:20 +01:00
int clock_getres(int clock_id, timespec* tp) {
auto vdso_clock_getres = reinterpret_cast<decltype(&clock_getres)>(
__libc_globals->vdso[VDSO_CLOCK_GETRES].fn);
if (__predict_true(vdso_clock_getres)) {
return vdso_return(vdso_clock_getres(clock_id, tp));
}
return __clock_getres(clock_id, tp);
}
int gettimeofday(timeval* tv, struct timezone* tz) {
auto vdso_gettimeofday = reinterpret_cast<decltype(&gettimeofday)>(
__libc_globals->vdso[VDSO_GETTIMEOFDAY].fn);
if (__predict_true(vdso_gettimeofday)) {
return vdso_return(vdso_gettimeofday(tv, tz));
}
return __gettimeofday(tv, tz);
}
bionic: add vdso time() time() can be a hot call, and it currently uses __vdso_gettimeofday, which is already pretty fast (~3 times faster than the syscall), but with a __vdso_time call it is ~3 times even faster, in part because __vdso_time does not require interlocking with updates, and the read for just the seconds is atomic. __vdso_time is always available, whereas __vdso_gettimeofday is gated on access to the physical timers. arm improvement is compelling (x10), x86 improvement is even more pronounced (x100). [TL;DR] w/vdso32 kernel patches, locked cores to MAX, little cores only. BEFORE: hikey960 vdso (aarch64): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_gettime 48 ns 48 ns 15414753 BM_time_clock_gettime_syscall 175 ns 175 ns 4062031 BM_time_clock_gettime_REALTIME 44 ns 44 ns 15897875 BM_time_clock_gettime_BOOTTIME 47 ns 47 ns 14307903 BM_time_clock_gettime_TAI 210 ns 210 ns 3341372 BM_time_clock_gettime_unsupported 100 ns 100 ns 7030649 BM_time_gettimeofday 47 ns 47 ns 14969643 BM_time_gettimeofday_syscall 163 ns 163 ns 4283542 BM_time_time 59 ns 59 ns 11815385 hikey960 vdso32 (aarch32): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_gettime 90 ns 90 ns 7572898 BM_time_clock_gettime_syscall 251 ns 251 ns 2763442 BM_time_clock_gettime_REALTIME 81 ns 80 ns 8699536 BM_time_clock_gettime_BOOTTIME 97 ns 97 ns 7256667 BM_time_clock_gettime_TAI 272 ns 272 ns 2570419 BM_time_clock_gettime_unsupported 160 ns 160 ns 4379819 BM_time_gettimeofday 73 ns 73 ns 9608922 BM_time_gettimeofday_syscall 200 ns 199 ns 3527957 BM_time_time 123 ns 123 ns 5651095 x86_64 (glibc): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 21 ns 21 ns 28873070 BM_time_clock_gettime_syscall 224 ns 224 ns 3095370 BM_time_clock_gettime_REALTIME 17 ns 17 ns 42083086 BM_time_clock_gettime_BOOTTIME 239 ns 239 ns 2924015 BM_time_clock_gettime_TAI 236 ns 236 ns 2961423 BM_time_clock_gettime_unsupported 221 ns 221 ns 3357696 BM_time_gettimeofday 22 ns 22 ns 27975154 BM_time_gettimeofday_syscall 238 ns 238 ns 2882032 BM_time_time 2 ns 2 ns 340354885 BM_time_time_syscall 207 ns 207 ns 3383073 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): (virtual timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 20 477 1489362 BM_time_clock_gettime_syscall 20 487 1458333 BM_time_clock_gettime_REALTIME 19 464 1400000 BM_time_clock_gettime_BOOTTIME 29 700 1000000 BM_time_clock_gettime_TAI 29 690 1000000 BM_time_clock_gettime_unsupported 9 227 3043478 BM_time_gettimeofday 18 444 1555556 BM_time_gettimeofday_syscall 19 456 1555556 BM_time_time 21 497 1166667 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): (physical timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 6 144 4666667 BM_time_clock_gettime_syscall 20 486 1400000 BM_time_clock_gettime_REALTIME 6 136 5000000 BM_time_clock_gettime_BOOTTIME 6 153 4375000 BM_time_clock_gettime_TAI 31 760 1000000 BM_time_clock_gettime_unsupported 10 233 3043478 BM_time_gettimeofday 6 140 5000000 BM_time_gettimeofday_syscall 19 450 1555556 BM_time_time 9 203 3500000 AFTER: hikey960 vdso (aarch64): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 48 ns 48 ns 15414753 BM_time_clock_gettime_syscall 175 ns 175 ns 4062031 BM_time_clock_gettime_REALTIME 44 ns 44 ns 15897875 BM_time_clock_gettime_BOOTTIME 47 ns 47 ns 14307903 BM_time_clock_gettime_TAI 210 ns 210 ns 3341372 BM_time_clock_gettime_unsupported 100 ns 100 ns 7030649 BM_time_gettimeofday 47 ns 47 ns 14975314 BM_time_gettimeofday_syscall 164 ns 164 ns 4278797 BM_time_time 16 ns 16 ns 42932165 hikey960 vdso32 (aarch32): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 90 ns 90 ns 7572898 BM_time_clock_gettime_syscall 251 ns 251 ns 2763442 BM_time_clock_gettime_REALTIME 81 ns 80 ns 8699536 BM_time_clock_gettime_BOOTTIME 97 ns 97 ns 7256667 BM_time_clock_gettime_TAI 272 ns 272 ns 2570419 BM_time_clock_gettime_unsupported 160 ns 160 ns 4379819 BM_time_gettimeofday 73 ns 73 ns 9596230 BM_time_gettimeofday_syscall 199 ns 199 ns 3575428 BM_time_time 35 ns 35 ns 19798801 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 20 477 1489362 BM_time_clock_gettime_syscall 20 487 1458333 BM_time_clock_gettime_REALTIME 19 464 1400000 BM_time_clock_gettime_BOOTTIME 29 700 1000000 BM_time_clock_gettime_TAI 29 690 1000000 BM_time_clock_gettime_unsupported 9 227 3043478 BM_time_gettimeofday 18 444 1555556 BM_time_gettimeofday_syscall 19 456 1555556 BM_time_time 2 50 11666667 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 6 144 4666667 BM_time_clock_gettime_syscall 20 486 1400000 BM_time_clock_gettime_REALTIME 6 136 5000000 BM_time_clock_gettime_BOOTTIME 6 153 4375000 BM_time_clock_gettime_TAI 31 760 1000000 BM_time_clock_gettime_unsupported 10 233 3043478 BM_time_gettimeofday 6 140 5000000 BM_time_gettimeofday_syscall 19 450 1555556 BM_time_time 2 50 10000000 Test: bionic-unit-tests --gtest_filter=time.time taskset F bionic-benchmarks --bionic_xml=vdso.xml \ --benchmark_filter='BM_time_(time*|clock_gettime*|gettimeofday*)' Bug: 63737556 Change-Id: I81b088a12ca41a6c4733d46c5477527777138efa
2017-12-04 22:51:29 +01:00
time_t time(time_t* t) {
auto vdso_time = reinterpret_cast<decltype(&time)>(__libc_globals->vdso[VDSO_TIME].fn);
bionic: add vdso time() time() can be a hot call, and it currently uses __vdso_gettimeofday, which is already pretty fast (~3 times faster than the syscall), but with a __vdso_time call it is ~3 times even faster, in part because __vdso_time does not require interlocking with updates, and the read for just the seconds is atomic. __vdso_time is always available, whereas __vdso_gettimeofday is gated on access to the physical timers. arm improvement is compelling (x10), x86 improvement is even more pronounced (x100). [TL;DR] w/vdso32 kernel patches, locked cores to MAX, little cores only. BEFORE: hikey960 vdso (aarch64): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_gettime 48 ns 48 ns 15414753 BM_time_clock_gettime_syscall 175 ns 175 ns 4062031 BM_time_clock_gettime_REALTIME 44 ns 44 ns 15897875 BM_time_clock_gettime_BOOTTIME 47 ns 47 ns 14307903 BM_time_clock_gettime_TAI 210 ns 210 ns 3341372 BM_time_clock_gettime_unsupported 100 ns 100 ns 7030649 BM_time_gettimeofday 47 ns 47 ns 14969643 BM_time_gettimeofday_syscall 163 ns 163 ns 4283542 BM_time_time 59 ns 59 ns 11815385 hikey960 vdso32 (aarch32): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_gettime 90 ns 90 ns 7572898 BM_time_clock_gettime_syscall 251 ns 251 ns 2763442 BM_time_clock_gettime_REALTIME 81 ns 80 ns 8699536 BM_time_clock_gettime_BOOTTIME 97 ns 97 ns 7256667 BM_time_clock_gettime_TAI 272 ns 272 ns 2570419 BM_time_clock_gettime_unsupported 160 ns 160 ns 4379819 BM_time_gettimeofday 73 ns 73 ns 9608922 BM_time_gettimeofday_syscall 200 ns 199 ns 3527957 BM_time_time 123 ns 123 ns 5651095 x86_64 (glibc): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 21 ns 21 ns 28873070 BM_time_clock_gettime_syscall 224 ns 224 ns 3095370 BM_time_clock_gettime_REALTIME 17 ns 17 ns 42083086 BM_time_clock_gettime_BOOTTIME 239 ns 239 ns 2924015 BM_time_clock_gettime_TAI 236 ns 236 ns 2961423 BM_time_clock_gettime_unsupported 221 ns 221 ns 3357696 BM_time_gettimeofday 22 ns 22 ns 27975154 BM_time_gettimeofday_syscall 238 ns 238 ns 2882032 BM_time_time 2 ns 2 ns 340354885 BM_time_time_syscall 207 ns 207 ns 3383073 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): (virtual timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 20 477 1489362 BM_time_clock_gettime_syscall 20 487 1458333 BM_time_clock_gettime_REALTIME 19 464 1400000 BM_time_clock_gettime_BOOTTIME 29 700 1000000 BM_time_clock_gettime_TAI 29 690 1000000 BM_time_clock_gettime_unsupported 9 227 3043478 BM_time_gettimeofday 18 444 1555556 BM_time_gettimeofday_syscall 19 456 1555556 BM_time_time 21 497 1166667 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): (physical timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 6 144 4666667 BM_time_clock_gettime_syscall 20 486 1400000 BM_time_clock_gettime_REALTIME 6 136 5000000 BM_time_clock_gettime_BOOTTIME 6 153 4375000 BM_time_clock_gettime_TAI 31 760 1000000 BM_time_clock_gettime_unsupported 10 233 3043478 BM_time_gettimeofday 6 140 5000000 BM_time_gettimeofday_syscall 19 450 1555556 BM_time_time 9 203 3500000 AFTER: hikey960 vdso (aarch64): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 48 ns 48 ns 15414753 BM_time_clock_gettime_syscall 175 ns 175 ns 4062031 BM_time_clock_gettime_REALTIME 44 ns 44 ns 15897875 BM_time_clock_gettime_BOOTTIME 47 ns 47 ns 14307903 BM_time_clock_gettime_TAI 210 ns 210 ns 3341372 BM_time_clock_gettime_unsupported 100 ns 100 ns 7030649 BM_time_gettimeofday 47 ns 47 ns 14975314 BM_time_gettimeofday_syscall 164 ns 164 ns 4278797 BM_time_time 16 ns 16 ns 42932165 hikey960 vdso32 (aarch32): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 90 ns 90 ns 7572898 BM_time_clock_gettime_syscall 251 ns 251 ns 2763442 BM_time_clock_gettime_REALTIME 81 ns 80 ns 8699536 BM_time_clock_gettime_BOOTTIME 97 ns 97 ns 7256667 BM_time_clock_gettime_TAI 272 ns 272 ns 2570419 BM_time_clock_gettime_unsupported 160 ns 160 ns 4379819 BM_time_gettimeofday 73 ns 73 ns 9596230 BM_time_gettimeofday_syscall 199 ns 199 ns 3575428 BM_time_time 35 ns 35 ns 19798801 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 20 477 1489362 BM_time_clock_gettime_syscall 20 487 1458333 BM_time_clock_gettime_REALTIME 19 464 1400000 BM_time_clock_gettime_BOOTTIME 29 700 1000000 BM_time_clock_gettime_TAI 29 690 1000000 BM_time_clock_gettime_unsupported 9 227 3043478 BM_time_gettimeofday 18 444 1555556 BM_time_gettimeofday_syscall 19 456 1555556 BM_time_time 2 50 11666667 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 6 144 4666667 BM_time_clock_gettime_syscall 20 486 1400000 BM_time_clock_gettime_REALTIME 6 136 5000000 BM_time_clock_gettime_BOOTTIME 6 153 4375000 BM_time_clock_gettime_TAI 31 760 1000000 BM_time_clock_gettime_unsupported 10 233 3043478 BM_time_gettimeofday 6 140 5000000 BM_time_gettimeofday_syscall 19 450 1555556 BM_time_time 2 50 10000000 Test: bionic-unit-tests --gtest_filter=time.time taskset F bionic-benchmarks --bionic_xml=vdso.xml \ --benchmark_filter='BM_time_(time*|clock_gettime*|gettimeofday*)' Bug: 63737556 Change-Id: I81b088a12ca41a6c4733d46c5477527777138efa
2017-12-04 22:51:29 +01:00
if (__predict_true(vdso_time)) {
return vdso_time(t);
}
// We can't fallback to the time(2) system call because it doesn't exist for most architectures.
timeval tv;
if (gettimeofday(&tv, nullptr) == -1) return -1;
if (t) *t = tv.tv_sec;
return tv.tv_sec;
bionic: add vdso time() time() can be a hot call, and it currently uses __vdso_gettimeofday, which is already pretty fast (~3 times faster than the syscall), but with a __vdso_time call it is ~3 times even faster, in part because __vdso_time does not require interlocking with updates, and the read for just the seconds is atomic. __vdso_time is always available, whereas __vdso_gettimeofday is gated on access to the physical timers. arm improvement is compelling (x10), x86 improvement is even more pronounced (x100). [TL;DR] w/vdso32 kernel patches, locked cores to MAX, little cores only. BEFORE: hikey960 vdso (aarch64): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_gettime 48 ns 48 ns 15414753 BM_time_clock_gettime_syscall 175 ns 175 ns 4062031 BM_time_clock_gettime_REALTIME 44 ns 44 ns 15897875 BM_time_clock_gettime_BOOTTIME 47 ns 47 ns 14307903 BM_time_clock_gettime_TAI 210 ns 210 ns 3341372 BM_time_clock_gettime_unsupported 100 ns 100 ns 7030649 BM_time_gettimeofday 47 ns 47 ns 14969643 BM_time_gettimeofday_syscall 163 ns 163 ns 4283542 BM_time_time 59 ns 59 ns 11815385 hikey960 vdso32 (aarch32): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_gettime 90 ns 90 ns 7572898 BM_time_clock_gettime_syscall 251 ns 251 ns 2763442 BM_time_clock_gettime_REALTIME 81 ns 80 ns 8699536 BM_time_clock_gettime_BOOTTIME 97 ns 97 ns 7256667 BM_time_clock_gettime_TAI 272 ns 272 ns 2570419 BM_time_clock_gettime_unsupported 160 ns 160 ns 4379819 BM_time_gettimeofday 73 ns 73 ns 9608922 BM_time_gettimeofday_syscall 200 ns 199 ns 3527957 BM_time_time 123 ns 123 ns 5651095 x86_64 (glibc): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 21 ns 21 ns 28873070 BM_time_clock_gettime_syscall 224 ns 224 ns 3095370 BM_time_clock_gettime_REALTIME 17 ns 17 ns 42083086 BM_time_clock_gettime_BOOTTIME 239 ns 239 ns 2924015 BM_time_clock_gettime_TAI 236 ns 236 ns 2961423 BM_time_clock_gettime_unsupported 221 ns 221 ns 3357696 BM_time_gettimeofday 22 ns 22 ns 27975154 BM_time_gettimeofday_syscall 238 ns 238 ns 2882032 BM_time_time 2 ns 2 ns 340354885 BM_time_time_syscall 207 ns 207 ns 3383073 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): (virtual timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 20 477 1489362 BM_time_clock_gettime_syscall 20 487 1458333 BM_time_clock_gettime_REALTIME 19 464 1400000 BM_time_clock_gettime_BOOTTIME 29 700 1000000 BM_time_clock_gettime_TAI 29 690 1000000 BM_time_clock_gettime_unsupported 9 227 3043478 BM_time_gettimeofday 18 444 1555556 BM_time_gettimeofday_syscall 19 456 1555556 BM_time_time 21 497 1166667 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): (physical timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 6 144 4666667 BM_time_clock_gettime_syscall 20 486 1400000 BM_time_clock_gettime_REALTIME 6 136 5000000 BM_time_clock_gettime_BOOTTIME 6 153 4375000 BM_time_clock_gettime_TAI 31 760 1000000 BM_time_clock_gettime_unsupported 10 233 3043478 BM_time_gettimeofday 6 140 5000000 BM_time_gettimeofday_syscall 19 450 1555556 BM_time_time 9 203 3500000 AFTER: hikey960 vdso (aarch64): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 48 ns 48 ns 15414753 BM_time_clock_gettime_syscall 175 ns 175 ns 4062031 BM_time_clock_gettime_REALTIME 44 ns 44 ns 15897875 BM_time_clock_gettime_BOOTTIME 47 ns 47 ns 14307903 BM_time_clock_gettime_TAI 210 ns 210 ns 3341372 BM_time_clock_gettime_unsupported 100 ns 100 ns 7030649 BM_time_gettimeofday 47 ns 47 ns 14975314 BM_time_gettimeofday_syscall 164 ns 164 ns 4278797 BM_time_time 16 ns 16 ns 42932165 hikey960 vdso32 (aarch32): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 90 ns 90 ns 7572898 BM_time_clock_gettime_syscall 251 ns 251 ns 2763442 BM_time_clock_gettime_REALTIME 81 ns 80 ns 8699536 BM_time_clock_gettime_BOOTTIME 97 ns 97 ns 7256667 BM_time_clock_gettime_TAI 272 ns 272 ns 2570419 BM_time_clock_gettime_unsupported 160 ns 160 ns 4379819 BM_time_gettimeofday 73 ns 73 ns 9596230 BM_time_gettimeofday_syscall 199 ns 199 ns 3575428 BM_time_time 35 ns 35 ns 19798801 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 20 477 1489362 BM_time_clock_gettime_syscall 20 487 1458333 BM_time_clock_gettime_REALTIME 19 464 1400000 BM_time_clock_gettime_BOOTTIME 29 700 1000000 BM_time_clock_gettime_TAI 29 690 1000000 BM_time_clock_gettime_unsupported 9 227 3043478 BM_time_gettimeofday 18 444 1555556 BM_time_gettimeofday_syscall 19 456 1555556 BM_time_time 2 50 11666667 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 6 144 4666667 BM_time_clock_gettime_syscall 20 486 1400000 BM_time_clock_gettime_REALTIME 6 136 5000000 BM_time_clock_gettime_BOOTTIME 6 153 4375000 BM_time_clock_gettime_TAI 31 760 1000000 BM_time_clock_gettime_unsupported 10 233 3043478 BM_time_gettimeofday 6 140 5000000 BM_time_gettimeofday_syscall 19 450 1555556 BM_time_time 2 50 10000000 Test: bionic-unit-tests --gtest_filter=time.time taskset F bionic-benchmarks --bionic_xml=vdso.xml \ --benchmark_filter='BM_time_(time*|clock_gettime*|gettimeofday*)' Bug: 63737556 Change-Id: I81b088a12ca41a6c4733d46c5477527777138efa
2017-12-04 22:51:29 +01:00
}
void __libc_init_vdso(libc_globals* globals, KernelArgumentBlock& args) {
auto&& vdso = globals->vdso;
vdso[VDSO_CLOCK_GETTIME] = { VDSO_CLOCK_GETTIME_SYMBOL, nullptr };
bionic: add vdso clock_getres clock_getres() should not be a hot call, nevertheless it is ~6-7 times faster for supported clock ids if it uses __vdso_clock_getres if available. There is a 3% performance penalty for unsupported clock ids via __vdso_clock_getres with respect to a direct syscall. [TL;DR] w/vdso32 kernel patches, locked cores to MAX, little cores only. BEFORE: hikey960 vdso (aarch64): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_getres 126 ns 126 ns 5577874 BM_time_clock_getres_syscall 127 ns 127 ns 5505016 BM_time_clock_getres_REALTIME 126 ns 126 ns 5574682 BM_time_clock_getres_BOOTTIME 126 ns 126 ns 5575237 BM_time_clock_getres_TAI 126 ns 126 ns 5576810 BM_time_clock_getres_unsupported 128 ns 128 ns 5480189 hikey960 vdso32 (aarch32): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_getres 199 ns 199 ns 3508708 BM_time_clock_getres_syscall 220 ns 220 ns 3184676 BM_time_clock_getres_REALTIME 199 ns 199 ns 3509697 BM_time_clock_getres_BOOTTIME 199 ns 199 ns 3513551 BM_time_clock_getres_TAI 200 ns 199 ns 3512412 BM_time_clock_getres_unsupported 196 ns 196 ns 3575609 x86_64 (glibc): --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- BM_time_clock_getres 252 ns 252 ns 2370263 BM_time_clock_getres_syscall 215 ns 215 ns 3287497 BM_time_clock_getres_REALTIME 214 ns 214 ns 3294228 BM_time_clock_getres_BOOTTIME 213 ns 213 ns 3277519 BM_time_clock_getres_TAI 213 ns 213 ns 3294991 BM_time_clock_getres_unsupported 206 ns 206 ns 3450654 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): (Virtual Timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_getres 16 345 2000000 BM_time_clock_getres_syscall 16 339 2121212 BM_time_clock_getres_REALTIME 17 350 2058824 BM_time_clock_getres_BOOTTIME 17 345 2000000 BM_time_clock_getres_TAI 16 350 2000000 BM_time_clock_getres_unsupported 13 284 2500000 AFTER: hikey960 vdso (aarch64): --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- BM_time_clock_getres 18 ns 18 ns 37880389 BM_time_clock_getres_syscall 127 ns 127 ns 5520029 BM_time_clock_getres_REALTIME 18 ns 18 ns 37879962 BM_time_clock_getres_BOOTTIME 19 ns 18 ns 37878361 BM_time_clock_getres_TAI 131 ns 131 ns 5368484 BM_time_clock_getres_unsupported 97 ns 97 ns 7182864 hikey960 vdso32 (aarch32): --------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------- BM_time_clock_getres 36 ns 36 ns 19205240 BM_time_clock_getres_syscall 212 ns 212 ns 3297100 BM_time_clock_getres_REALTIME 36 ns 36 ns 19219109 BM_time_clock_getres_BOOTTIME 36 ns 36 ns 19222490 BM_time_clock_getres_TAI 206 ns 206 ns 3402868 BM_time_clock_getres_unsupported 159 ns 159 ns 4409492 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): (Physical Timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_getres 2 48 14000000 BM_time_clock_getres_syscall 14 335 2058824 BM_time_clock_getres_REALTIME 2 49 14583333 BM_time_clock_getres_BOOTTIME 2 48 14000000 BM_time_clock_getres_TAI 14 350 2058824 BM_time_clock_getres_unsupported 8 203 3500000 Test: taskset F \ /data/benchmarktest{64}/bionic-benchmarks/bionic-benchmarks \ --bionic_xml=vdso.xml --benchmark_filter=BM_time_clock_getres* Bug: 63737556 Change-Id: I80c0a5106625d76720287f715fcf145d2aad1705
2017-11-07 17:19:20 +01:00
vdso[VDSO_CLOCK_GETRES] = { VDSO_CLOCK_GETRES_SYMBOL, nullptr };
vdso[VDSO_GETTIMEOFDAY] = { VDSO_GETTIMEOFDAY_SYMBOL, nullptr };
bionic: add vdso time() time() can be a hot call, and it currently uses __vdso_gettimeofday, which is already pretty fast (~3 times faster than the syscall), but with a __vdso_time call it is ~3 times even faster, in part because __vdso_time does not require interlocking with updates, and the read for just the seconds is atomic. __vdso_time is always available, whereas __vdso_gettimeofday is gated on access to the physical timers. arm improvement is compelling (x10), x86 improvement is even more pronounced (x100). [TL;DR] w/vdso32 kernel patches, locked cores to MAX, little cores only. BEFORE: hikey960 vdso (aarch64): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_gettime 48 ns 48 ns 15414753 BM_time_clock_gettime_syscall 175 ns 175 ns 4062031 BM_time_clock_gettime_REALTIME 44 ns 44 ns 15897875 BM_time_clock_gettime_BOOTTIME 47 ns 47 ns 14307903 BM_time_clock_gettime_TAI 210 ns 210 ns 3341372 BM_time_clock_gettime_unsupported 100 ns 100 ns 7030649 BM_time_gettimeofday 47 ns 47 ns 14969643 BM_time_gettimeofday_syscall 163 ns 163 ns 4283542 BM_time_time 59 ns 59 ns 11815385 hikey960 vdso32 (aarch32): ---------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------- BM_time_clock_gettime 90 ns 90 ns 7572898 BM_time_clock_gettime_syscall 251 ns 251 ns 2763442 BM_time_clock_gettime_REALTIME 81 ns 80 ns 8699536 BM_time_clock_gettime_BOOTTIME 97 ns 97 ns 7256667 BM_time_clock_gettime_TAI 272 ns 272 ns 2570419 BM_time_clock_gettime_unsupported 160 ns 160 ns 4379819 BM_time_gettimeofday 73 ns 73 ns 9608922 BM_time_gettimeofday_syscall 200 ns 199 ns 3527957 BM_time_time 123 ns 123 ns 5651095 x86_64 (glibc): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 21 ns 21 ns 28873070 BM_time_clock_gettime_syscall 224 ns 224 ns 3095370 BM_time_clock_gettime_REALTIME 17 ns 17 ns 42083086 BM_time_clock_gettime_BOOTTIME 239 ns 239 ns 2924015 BM_time_clock_gettime_TAI 236 ns 236 ns 2961423 BM_time_clock_gettime_unsupported 221 ns 221 ns 3357696 BM_time_gettimeofday 22 ns 22 ns 27975154 BM_time_gettimeofday_syscall 238 ns 238 ns 2882032 BM_time_time 2 ns 2 ns 340354885 BM_time_time_syscall 207 ns 207 ns 3383073 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): (virtual timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 20 477 1489362 BM_time_clock_gettime_syscall 20 487 1458333 BM_time_clock_gettime_REALTIME 19 464 1400000 BM_time_clock_gettime_BOOTTIME 29 700 1000000 BM_time_clock_gettime_TAI 29 690 1000000 BM_time_clock_gettime_unsupported 9 227 3043478 BM_time_gettimeofday 18 444 1555556 BM_time_gettimeofday_syscall 19 456 1555556 BM_time_time 21 497 1166667 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): (physical timers) Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 6 144 4666667 BM_time_clock_gettime_syscall 20 486 1400000 BM_time_clock_gettime_REALTIME 6 136 5000000 BM_time_clock_gettime_BOOTTIME 6 153 4375000 BM_time_clock_gettime_TAI 31 760 1000000 BM_time_clock_gettime_unsupported 10 233 3043478 BM_time_gettimeofday 6 140 5000000 BM_time_gettimeofday_syscall 19 450 1555556 BM_time_time 9 203 3500000 AFTER: hikey960 vdso (aarch64): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 48 ns 48 ns 15414753 BM_time_clock_gettime_syscall 175 ns 175 ns 4062031 BM_time_clock_gettime_REALTIME 44 ns 44 ns 15897875 BM_time_clock_gettime_BOOTTIME 47 ns 47 ns 14307903 BM_time_clock_gettime_TAI 210 ns 210 ns 3341372 BM_time_clock_gettime_unsupported 100 ns 100 ns 7030649 BM_time_gettimeofday 47 ns 47 ns 14975314 BM_time_gettimeofday_syscall 164 ns 164 ns 4278797 BM_time_time 16 ns 16 ns 42932165 hikey960 vdso32 (aarch32): -------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------- BM_time_clock_gettime 90 ns 90 ns 7572898 BM_time_clock_gettime_syscall 251 ns 251 ns 2763442 BM_time_clock_gettime_REALTIME 81 ns 80 ns 8699536 BM_time_clock_gettime_BOOTTIME 97 ns 97 ns 7256667 BM_time_clock_gettime_TAI 272 ns 272 ns 2570419 BM_time_clock_gettime_unsupported 160 ns 160 ns 4379819 BM_time_gettimeofday 73 ns 73 ns 9596230 BM_time_gettimeofday_syscall 199 ns 199 ns 3575428 BM_time_time 35 ns 35 ns 19798801 imx7d_pico IOT nyc (w/arm,cpu-registers-not-fw-configured) (armv7a): Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 20 477 1489362 BM_time_clock_gettime_syscall 20 487 1458333 BM_time_clock_gettime_REALTIME 19 464 1400000 BM_time_clock_gettime_BOOTTIME 29 700 1000000 BM_time_clock_gettime_TAI 29 690 1000000 BM_time_clock_gettime_unsupported 9 227 3043478 BM_time_gettimeofday 18 444 1555556 BM_time_gettimeofday_syscall 19 456 1555556 BM_time_time 2 50 11666667 imx7d_pico IOT nyc (wo/arm,cpu-registers-not-fw-configured) (armv7a): Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------------------------ BM_time_clock_gettime 6 144 4666667 BM_time_clock_gettime_syscall 20 486 1400000 BM_time_clock_gettime_REALTIME 6 136 5000000 BM_time_clock_gettime_BOOTTIME 6 153 4375000 BM_time_clock_gettime_TAI 31 760 1000000 BM_time_clock_gettime_unsupported 10 233 3043478 BM_time_gettimeofday 6 140 5000000 BM_time_gettimeofday_syscall 19 450 1555556 BM_time_time 2 50 10000000 Test: bionic-unit-tests --gtest_filter=time.time taskset F bionic-benchmarks --bionic_xml=vdso.xml \ --benchmark_filter='BM_time_(time*|clock_gettime*|gettimeofday*)' Bug: 63737556 Change-Id: I81b088a12ca41a6c4733d46c5477527777138efa
2017-12-04 22:51:29 +01:00
vdso[VDSO_TIME] = { VDSO_TIME_SYMBOL, nullptr };
// Do we have a vdso?
uintptr_t vdso_ehdr_addr = args.getauxval(AT_SYSINFO_EHDR);
ElfW(Ehdr)* vdso_ehdr = reinterpret_cast<ElfW(Ehdr)*>(vdso_ehdr_addr);
if (vdso_ehdr == nullptr) {
return;
}
// How many symbols does it have?
size_t symbol_count = 0;
ElfW(Shdr)* vdso_shdr = reinterpret_cast<ElfW(Shdr)*>(vdso_ehdr_addr + vdso_ehdr->e_shoff);
for (size_t i = 0; i < vdso_ehdr->e_shnum; ++i) {
if (vdso_shdr[i].sh_type == SHT_DYNSYM) {
symbol_count = vdso_shdr[i].sh_size / sizeof(ElfW(Sym));
}
}
if (symbol_count == 0) {
return;
}
// Where's the dynamic table?
ElfW(Addr) vdso_addr = 0;
ElfW(Dyn)* vdso_dyn = nullptr;
ElfW(Phdr)* vdso_phdr = reinterpret_cast<ElfW(Phdr)*>(vdso_ehdr_addr + vdso_ehdr->e_phoff);
for (size_t i = 0; i < vdso_ehdr->e_phnum; ++i) {
if (vdso_phdr[i].p_type == PT_DYNAMIC) {
vdso_dyn = reinterpret_cast<ElfW(Dyn)*>(vdso_ehdr_addr + vdso_phdr[i].p_offset);
} else if (vdso_phdr[i].p_type == PT_LOAD) {
vdso_addr = vdso_ehdr_addr + vdso_phdr[i].p_offset - vdso_phdr[i].p_vaddr;
}
}
if (vdso_addr == 0 || vdso_dyn == nullptr) {
return;
}
// Where are the string and symbol tables?
const char* strtab = nullptr;
ElfW(Sym)* symtab = nullptr;
for (ElfW(Dyn)* d = vdso_dyn; d->d_tag != DT_NULL; ++d) {
if (d->d_tag == DT_STRTAB) {
strtab = reinterpret_cast<const char*>(vdso_addr + d->d_un.d_ptr);
} else if (d->d_tag == DT_SYMTAB) {
symtab = reinterpret_cast<ElfW(Sym)*>(vdso_addr + d->d_un.d_ptr);
}
}
if (strtab == nullptr || symtab == nullptr) {
return;
}
// Are there any symbols we want?
for (size_t i = 0; i < symbol_count; ++i) {
for (size_t j = 0; j < VDSO_END; ++j) {
if (strcmp(vdso[j].name, strtab + symtab[i].st_name) == 0) {
vdso[j].fn = reinterpret_cast<void*>(vdso_addr + symtab[i].st_value);
}
}
}
}