Merge "Add multithreads throughput benchmark" into main am: 9ac6169db4
am: 2e6a1923f3
am: 33bc3b7809
am: fd7118ed66
am: 86c29d11cf
am: 59de084b0b
Original change: https://android-review.googlesource.com/c/platform/bionic/+/2645428 Change-Id: Iff90cb7c04393787444f0301f0868d771b96c7bf Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
commit
6e1d673765
1 changed files with 106 additions and 0 deletions
|
@ -29,6 +29,10 @@
|
|||
#include <malloc.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include <benchmark/benchmark.h>
|
||||
|
@ -68,6 +72,89 @@ static void RunMalloptPurge(benchmark::State& state, int purge_value) {
|
|||
mallopt(M_DECAY_TIME, 0);
|
||||
}
|
||||
|
||||
static void RunThreadsThroughput(benchmark::State& state, size_t size, size_t num_threads) {
|
||||
constexpr size_t kMaxBytes = 1 << 24;
|
||||
constexpr size_t kMaxThreads = 8;
|
||||
constexpr size_t kMinRounds = 4;
|
||||
const size_t MaxAllocCounts = kMaxBytes / size;
|
||||
std::mutex m;
|
||||
bool ready = false;
|
||||
std::condition_variable cv;
|
||||
std::thread* threads[kMaxThreads];
|
||||
|
||||
// The goal is to create malloc/free interleaving patterns across threads.
|
||||
// The bytes processed by each thread will be the same. The difference is the
|
||||
// patterns. Here's an example:
|
||||
//
|
||||
// A: Allocation
|
||||
// D: Deallocation
|
||||
//
|
||||
// T1 T2 T3
|
||||
// A A A
|
||||
// A A D
|
||||
// A D A
|
||||
// A D D
|
||||
// D A A
|
||||
// D A D
|
||||
// D D A
|
||||
// D D D
|
||||
//
|
||||
// To do this, `AllocCounts` and `AllocRounds` will be adjusted according to the
|
||||
// thread id.
|
||||
auto thread_task = [&](size_t id) {
|
||||
{
|
||||
std::unique_lock lock(m);
|
||||
// Wait until all threads are created.
|
||||
cv.wait(lock, [&] { return ready; });
|
||||
}
|
||||
|
||||
void** MemPool;
|
||||
const size_t AllocCounts = (MaxAllocCounts >> id);
|
||||
const size_t AllocRounds = (kMinRounds << id);
|
||||
MemPool = new void*[AllocCounts];
|
||||
|
||||
for (size_t i = 0; i < AllocRounds; ++i) {
|
||||
for (size_t j = 0; j < AllocCounts; ++j) {
|
||||
void* ptr = malloc(size);
|
||||
MemPool[j] = ptr;
|
||||
}
|
||||
|
||||
// Use a fix seed to reduce the noise of different round of benchmark.
|
||||
const unsigned seed = 33529;
|
||||
std::shuffle(MemPool, &MemPool[AllocCounts], std::default_random_engine(seed));
|
||||
|
||||
for (size_t j = 0; j < AllocCounts; ++j) free(MemPool[j]);
|
||||
}
|
||||
|
||||
delete[] MemPool;
|
||||
};
|
||||
|
||||
for (auto _ : state) {
|
||||
state.PauseTiming();
|
||||
// Don't need to acquire the lock because no thread is created.
|
||||
ready = false;
|
||||
|
||||
for (size_t i = 0; i < num_threads; ++i) threads[i] = new std::thread(thread_task, i);
|
||||
|
||||
state.ResumeTiming();
|
||||
|
||||
{
|
||||
std::unique_lock lock(m);
|
||||
ready = true;
|
||||
}
|
||||
|
||||
cv.notify_all();
|
||||
|
||||
for (size_t i = 0; i < num_threads; ++i) {
|
||||
threads[i]->join();
|
||||
delete threads[i];
|
||||
}
|
||||
}
|
||||
|
||||
const size_t ThreadsBytesProcessed = kMaxBytes * kMinRounds * num_threads;
|
||||
state.SetBytesProcessed(ThreadsBytesProcessed * static_cast<size_t>(state.iterations()));
|
||||
}
|
||||
|
||||
static void BM_mallopt_purge(benchmark::State& state) {
|
||||
RunMalloptPurge(state, M_PURGE);
|
||||
}
|
||||
|
@ -78,4 +165,23 @@ static void BM_mallopt_purge_all(benchmark::State& state) {
|
|||
}
|
||||
BIONIC_BENCHMARK(BM_mallopt_purge_all);
|
||||
|
||||
// Note that this will only test a single size class at a time so that we can
|
||||
// observe the impact of contention more often.
|
||||
#define BM_MALLOC_THREADS_THROUGHPUT(SIZE, NUM_THREADS) \
|
||||
static void BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS(benchmark::State& state) { \
|
||||
RunThreadsThroughput(state, SIZE, NUM_THREADS); \
|
||||
} \
|
||||
BIONIC_BENCHMARK(BM_malloc_threads_throughput_##SIZE##_##NUM_THREADS);
|
||||
|
||||
// There are three block categories in Scudo, we choose 1 from each category.
|
||||
BM_MALLOC_THREADS_THROUGHPUT(64, 2);
|
||||
BM_MALLOC_THREADS_THROUGHPUT(64, 4);
|
||||
BM_MALLOC_THREADS_THROUGHPUT(64, 8);
|
||||
BM_MALLOC_THREADS_THROUGHPUT(512, 2);
|
||||
BM_MALLOC_THREADS_THROUGHPUT(512, 4);
|
||||
BM_MALLOC_THREADS_THROUGHPUT(512, 8);
|
||||
BM_MALLOC_THREADS_THROUGHPUT(8192, 2);
|
||||
BM_MALLOC_THREADS_THROUGHPUT(8192, 4);
|
||||
BM_MALLOC_THREADS_THROUGHPUT(8192, 8);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue