platform_bionic/benchmarks/atomic_benchmark.cpp
Christopher Ferris 858e33698d Generate all the benchmarks to run.
Instead of requiring the need to maintain a list of all the benchmarks,
add a programmatic way to generate all of the benchmarks.

This generation runs the benchmarks in alphabetical order.

Add a new macro BIONIC_BENCHMARK_WITH_ARG that will be the default argument
to pass to the benchmark. Change the benchmarks that require default arguments.

Add a small example xml file, and remove the full.xml/host.xml files.

Update readme.

Test: Ran new unit tests, verified all tests are added.
Change-Id: I8036daeae7635393222a7a92d18f34119adba745
2017-11-30 09:09:41 -08:00

150 lines
4.4 KiB
C++

/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Our goal is to measure the cost of various C++ atomic operations.
// Android doesn't really control those. But since some of these operations can be quite
// expensive, this may be useful input for development of higher level code.
// Expected mappings from C++ atomics to hardware primitives can be found at
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .
#include <atomic>
#include <mutex>
#include <benchmark/benchmark.h>
#include "util.h"
// We time atomic operations separated by a volatile (not atomic!) increment. This ensures
// that the compiler emits memory instructions (e.g. load or store) prior to any fence or the
// like. That in turn ensures that the CPU has outstanding memory operations when the fence
// is executed.
// In most respects, we compute best case values. Since there is only one thread, there are no
// coherence misses.
// We assume that the compiler is not smart enough to optimize away fences in a single-threaded
// program. If that changes, we'll need to add a second thread.
volatile unsigned counter;
std::atomic<int> test_loc(0);
volatile unsigned sink;
std::mutex mtx;
void BM_atomic_empty(benchmark::State& state) {
while (state.KeepRunning()) {
++counter;
}
}
BIONIC_BENCHMARK(BM_atomic_empty);
static void BM_atomic_load_relaxed(benchmark::State& state) {
unsigned result = 0;
while (state.KeepRunning()) {
result += test_loc.load(std::memory_order_relaxed);
++counter;
}
sink = result;
}
BIONIC_BENCHMARK(BM_atomic_load_relaxed);
static void BM_atomic_load_acquire(benchmark::State& state) {
unsigned result = 0;
while (state.KeepRunning()) {
result += test_loc.load(std::memory_order_acquire);
++counter;
}
sink = result;
}
BIONIC_BENCHMARK(BM_atomic_load_acquire);
static void BM_atomic_store_release(benchmark::State& state) {
int i = counter;
while (state.KeepRunning()) {
test_loc.store(++i, std::memory_order_release);
++counter;
}
}
BIONIC_BENCHMARK(BM_atomic_store_release);
static void BM_atomic_store_seq_cst(benchmark::State& state) {
int i = counter;
while (state.KeepRunning()) {
test_loc.store(++i, std::memory_order_seq_cst);
++counter;
}
}
BIONIC_BENCHMARK(BM_atomic_store_seq_cst);
static void BM_atomic_fetch_add_relaxed(benchmark::State& state) {
unsigned result = 0;
while (state.KeepRunning()) {
result += test_loc.fetch_add(1, std::memory_order_relaxed);
++counter;
}
sink = result;
}
BIONIC_BENCHMARK(BM_atomic_fetch_add_relaxed);
static void BM_atomic_fetch_add_seq_cst(benchmark::State& state) {
unsigned result = 0;
while (state.KeepRunning()) {
result += test_loc.fetch_add(1, std::memory_order_seq_cst);
++counter;
}
sink = result;
}
BIONIC_BENCHMARK(BM_atomic_fetch_add_seq_cst);
// The fence benchmarks include a relaxed load to make it much harder to optimize away
// the fence.
static void BM_atomic_acquire_fence(benchmark::State& state) {
unsigned result = 0;
while (state.KeepRunning()) {
result += test_loc.load(std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_acquire);
++counter;
}
sink = result;
}
BIONIC_BENCHMARK(BM_atomic_acquire_fence);
static void BM_atomic_seq_cst_fence(benchmark::State& state) {
unsigned result = 0;
while (state.KeepRunning()) {
result += test_loc.load(std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_seq_cst);
++counter;
}
sink = result;
}
BIONIC_BENCHMARK(BM_atomic_seq_cst_fence);
// For comparison, also throw in a critical section version:
static void BM_atomic_fetch_add_cs(benchmark::State& state) {
unsigned result = 0;
while (state.KeepRunning()) {
{
std::lock_guard<std::mutex> _(mtx);
result += ++counter;
}
}
sink = result;
}
BIONIC_BENCHMARK(BM_atomic_fetch_add_cs);