platform_bionic/benchmarks/atomic_benchmark.cpp

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Our goal is to measure the cost of various C++ atomic operations.
// Android doesn't really control those. But since some of these operations can be quite
// expensive, this may be useful input for development of higher level code.
// Expected mappings from C++ atomics to hardware primitives can be found at
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .

#include <atomic>
#include <mutex>

#include <benchmark/benchmark.h>
#include "util.h"

// We time atomic operations separated by a volatile (not atomic!) increment.  This ensures
// that the compiler emits memory instructions (e.g. load or store) prior to any fence or the
// like.  That in turn ensures that the CPU has outstanding memory operations when the fence
// is executed.

// In most respects, we compute best case values. Since there is only one thread, there are no
// coherence misses.

// We assume that the compiler is not smart enough to optimize away fences in a single-threaded
// program. If that changes, we'll need to add a second thread.

volatile unsigned counter;

std::atomic<int> test_loc(0);

volatile unsigned sink;

std::mutex mtx;

void BM_empty(benchmark::State& state) {
  while (state.KeepRunning()) {
    ++counter;
  }
}
BIONIC_BENCHMARK(BM_empty);

static void BM_load_relaxed(benchmark::State& state) {
  unsigned result = 0;
  while (state.KeepRunning()) {
    result += test_loc.load(std::memory_order_relaxed);
    ++counter;
  }
  sink = result;
}
BIONIC_BENCHMARK(BM_load_relaxed);

static void BM_load_acquire(benchmark::State& state) {
  unsigned result = 0;
  while (state.KeepRunning()) {
    result += test_loc.load(std::memory_order_acquire);
    ++counter;
  }
  sink = result;
}
BIONIC_BENCHMARK(BM_load_acquire);

static void BM_store_release(benchmark::State& state) {
  int i = counter;
  while (state.KeepRunning()) {
    test_loc.store(++i, std::memory_order_release);
    ++counter;
  }
}
BIONIC_BENCHMARK(BM_store_release);

static void BM_store_seq_cst(benchmark::State& state) {
  int i = counter;
  while (state.KeepRunning()) {
    test_loc.store(++i, std::memory_order_seq_cst);
    ++counter;
  }
}
BIONIC_BENCHMARK(BM_store_seq_cst);

static void BM_fetch_add_relaxed(benchmark::State& state) {
  unsigned result = 0;
  while (state.KeepRunning()) {
    result += test_loc.fetch_add(1, std::memory_order_relaxed);
    ++counter;
  }
  sink = result;
}
BIONIC_BENCHMARK(BM_fetch_add_relaxed);

static void BM_fetch_add_seq_cst(benchmark::State& state) {
  unsigned result = 0;
  while (state.KeepRunning()) {
    result += test_loc.fetch_add(1, std::memory_order_seq_cst);
    ++counter;
  }
  sink = result;
}
BIONIC_BENCHMARK(BM_fetch_add_seq_cst);

// The fence benchmarks include a relaxed load to make it much harder to optimize away
// the fence.

static void BM_acquire_fence(benchmark::State& state) {
  unsigned result = 0;
  while (state.KeepRunning()) {
    result += test_loc.load(std::memory_order_relaxed);
    std::atomic_thread_fence(std::memory_order_acquire);
    ++counter;
  }
  sink = result;
}
BIONIC_BENCHMARK(BM_acquire_fence);

static void BM_seq_cst_fence(benchmark::State& state) {
  unsigned result = 0;
  while (state.KeepRunning()) {
    result += test_loc.load(std::memory_order_relaxed);
    std::atomic_thread_fence(std::memory_order_seq_cst);
    ++counter;
  }
  sink = result;
}
BIONIC_BENCHMARK(BM_seq_cst_fence);

// For comparison, also throw in a critical section version:

static void BM_fetch_add_cs(benchmark::State& state) {
  unsigned result = 0;
  while (state.KeepRunning()) {
    {
      std::lock_guard<std::mutex> _(mtx);
      result += ++counter;
    }
  }
  sink = result;
}
BIONIC_BENCHMARK(BM_fetch_add_cs);
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00			`/*`
			`* Copyright (C) 2017 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`// Our goal is to measure the cost of various C++ atomic operations.`
			`// Android doesn't really control those. But since some of these operations can be quite`
			`// expensive, this may be useful input for development of higher level code.`
			`// Expected mappings from C++ atomics to hardware primitives can be found at`
			`// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html .`

			`#include <atomic>`
			`#include <mutex>`

Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`#include <benchmark/benchmark.h>`
			`#include "util.h"`

Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00			`// We time atomic operations separated by a volatile (not atomic!) increment. This ensures`
			`// that the compiler emits memory instructions (e.g. load or store) prior to any fence or the`
			`// like. That in turn ensures that the CPU has outstanding memory operations when the fence`
			`// is executed.`

			`// In most respects, we compute best case values. Since there is only one thread, there are no`
			`// coherence misses.`

			`// We assume that the compiler is not smart enough to optimize away fences in a single-threaded`
			`// program. If that changes, we'll need to add a second thread.`

			`volatile unsigned counter;`

			`std::atomic<int> test_loc(0);`

			`volatile unsigned sink;`

			`std::mutex mtx;`

			`void BM_empty(benchmark::State& state) {`
			`while (state.KeepRunning()) {`
			`++counter;`
			`}`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_empty);`
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00
			`static void BM_load_relaxed(benchmark::State& state) {`
			`unsigned result = 0;`
			`while (state.KeepRunning()) {`
			`result += test_loc.load(std::memory_order_relaxed);`
			`++counter;`
			`}`
			`sink = result;`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_load_relaxed);`
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00
			`static void BM_load_acquire(benchmark::State& state) {`
			`unsigned result = 0;`
			`while (state.KeepRunning()) {`
			`result += test_loc.load(std::memory_order_acquire);`
			`++counter;`
			`}`
			`sink = result;`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_load_acquire);`
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00
			`static void BM_store_release(benchmark::State& state) {`
			`int i = counter;`
			`while (state.KeepRunning()) {`
			`test_loc.store(++i, std::memory_order_release);`
			`++counter;`
			`}`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_store_release);`
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00
			`static void BM_store_seq_cst(benchmark::State& state) {`
			`int i = counter;`
			`while (state.KeepRunning()) {`
			`test_loc.store(++i, std::memory_order_seq_cst);`
			`++counter;`
			`}`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_store_seq_cst);`
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00
			`static void BM_fetch_add_relaxed(benchmark::State& state) {`
			`unsigned result = 0;`
			`while (state.KeepRunning()) {`
			`result += test_loc.fetch_add(1, std::memory_order_relaxed);`
			`++counter;`
			`}`
			`sink = result;`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_fetch_add_relaxed);`
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00
			`static void BM_fetch_add_seq_cst(benchmark::State& state) {`
			`unsigned result = 0;`
			`while (state.KeepRunning()) {`
			`result += test_loc.fetch_add(1, std::memory_order_seq_cst);`
			`++counter;`
			`}`
			`sink = result;`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_fetch_add_seq_cst);`
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00
			`// The fence benchmarks include a relaxed load to make it much harder to optimize away`
			`// the fence.`

			`static void BM_acquire_fence(benchmark::State& state) {`
			`unsigned result = 0;`
			`while (state.KeepRunning()) {`
			`result += test_loc.load(std::memory_order_relaxed);`
			`std::atomic_thread_fence(std::memory_order_acquire);`
			`++counter;`
			`}`
			`sink = result;`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_acquire_fence);`
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00
			`static void BM_seq_cst_fence(benchmark::State& state) {`
			`unsigned result = 0;`
			`while (state.KeepRunning()) {`
			`result += test_loc.load(std::memory_order_relaxed);`
			`std::atomic_thread_fence(std::memory_order_seq_cst);`
			`++counter;`
			`}`
			`sink = result;`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_seq_cst_fence);`
Add "benchmark" to time atomic operations The intent here is to confirm that the compiler behaves as expected, and to guide optimization efforts, particularly where there is a choice between primitives. Test: Built and ran benchmark on Angler repeatedly. Manually confirmed that the compiler behaves roughly as expected. Change-Id: I059b245d1ba8296e9b28602559b53eafafe0a30f 2017-01-24 02:30:44 +01:00
			`// For comparison, also throw in a critical section version:`

			`static void BM_fetch_add_cs(benchmark::State& state) {`
			`unsigned result = 0;`
			`while (state.KeepRunning()) {`
			`{`
			`std::lock_guard<std::mutex> _(mtx);`
			`result += ++counter;`
			`}`
			`}`
			`sink = result;`
			`}`
Implement interface for bionic benchmarks. Test: Unit tests. Change-Id: Ic61932f61ddd572e2f045b601f9da6e090cdc45d 2017-07-25 05:01:13 +02:00			`BIONIC_BENCHMARK(BM_fetch_add_cs);`