Support memory alignment tests for string_benchmark.cpp

Test: Manually verify that benchmark times are similar and add a unit test.
Change-Id: Idaaeb4c8c3629f797ffd8d0c86c8d7b7b9653181
This commit is contained in:
Anders Lewis 2017-06-23 15:53:59 -07:00
parent 42edca036e
commit f4447b9105
5 changed files with 317 additions and 70 deletions

View file

@ -35,6 +35,17 @@ cc_defaults {
"time_benchmark.cpp", "time_benchmark.cpp",
"unistd_benchmark.cpp", "unistd_benchmark.cpp",
], ],
static_libs: ["libBionicBenchmarksUtils"],
}
cc_defaults {
name: "bionic-benchmarks-extras-defaults",
cflags: [
"-Wall",
"-Wextra",
"-Werror",
"-Wunused",
],
} }
// Build benchmarks for the device (with bionic's .so). Run with: // Build benchmarks for the device (with bionic's .so). Run with:
@ -63,3 +74,19 @@ cc_benchmark_host {
}, },
}, },
} }
cc_library_static {
name: "libBionicBenchmarksUtils",
defaults: ["bionic-benchmarks-extras-defaults"],
srcs: ["util.cpp"],
host_supported: true,
}
cc_test {
name: "bionic-benchmarks-tests",
defaults: ["bionic-benchmarks-extras-defaults"],
srcs: [
"tests/benchmark_test.cpp",
],
static_libs: ["libBionicBenchmarksUtils"],
}

View file

@ -18,188 +18,246 @@
#include <string.h> #include <string.h>
#include <benchmark/benchmark.h> #include <benchmark/benchmark.h>
#include "util.h"
constexpr auto KB = 1024; constexpr auto KB = 1024;
#define AT_COMMON_SIZES \ // NOTE: these constants are temporary replacements for AT_COMMON_SIZES until
Arg(8)->Arg(64)->Arg(512)->Arg(1*KB)->Arg(8*KB)->Arg(16*KB)->Arg(32*KB)->Arg(64*KB) // the new interface for Bionic benchmarks is implemented.
// TODO: test unaligned operation too? (currently everything will be 8-byte aligned by malloc.) // Set all four to 0 to test normal alignment.
#define AT_SRC_ALIGN 0
#define AT_DST_ALIGN 0
#define AT_ALIGNED_TWOBUF \
Args({(8), AT_SRC_ALIGN, AT_DST_ALIGN})->Args({(64), AT_SRC_ALIGN, AT_DST_ALIGN})-> \
Args({(512), AT_SRC_ALIGN, AT_DST_ALIGN})->Args({(1*KB), AT_SRC_ALIGN, AT_DST_ALIGN})-> \
Args({(8*KB), AT_SRC_ALIGN, AT_DST_ALIGN})->Args({(16*KB), AT_SRC_ALIGN, AT_DST_ALIGN})-> \
Args({(32*KB), AT_SRC_ALIGN, AT_DST_ALIGN})->Args({(64*KB), AT_SRC_ALIGN, AT_DST_ALIGN})
#define AT_ALIGNED_ONEBUF \
Args({(8), AT_SRC_ALIGN})->Args({(64), AT_SRC_ALIGN})->Args({(512), AT_SRC_ALIGN})-> \
Args({(1*KB), AT_SRC_ALIGN})->Args({(8*KB), AT_SRC_ALIGN})->Args({(16*KB), AT_SRC_ALIGN})-> \
Args({(32*KB), AT_SRC_ALIGN})->Args({(64*KB), AT_SRC_ALIGN})
static void BM_string_memcmp(benchmark::State& state) { static void BM_string_memcmp(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
char* src = new char[nbytes]; char* dst = new char[nbytes]; const size_t src_alignment = state.range(1);
memset(src, 'x', nbytes); const size_t dst_alignment = state.range(2);
memset(dst, 'x', nbytes);
std::vector<char> src;
std::vector<char> dst;
char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
char* dst_aligned = GetAlignedPtrFilled(&dst, dst_alignment, nbytes, 'x');
volatile int c __attribute__((unused)) = 0; volatile int c __attribute__((unused)) = 0;
while (state.KeepRunning()) { while (state.KeepRunning()) {
c += memcmp(dst, src, nbytes); c += memcmp(dst_aligned, src_aligned, nbytes);
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
delete[] src;
delete[] dst;
} }
BENCHMARK(BM_string_memcmp)->AT_COMMON_SIZES; BENCHMARK(BM_string_memcmp)->AT_ALIGNED_TWOBUF;
static void BM_string_memcpy(benchmark::State& state) { static void BM_string_memcpy(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
char* src = new char[nbytes]; char* dst = new char[nbytes]; const size_t src_alignment = state.range(1);
memset(src, 'x', nbytes); const size_t dst_alignment = state.range(2);
std::vector<char> src;
std::vector<char> dst;
char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
char* dst_aligned = GetAlignedPtr(&dst, dst_alignment, nbytes);
while (state.KeepRunning()) { while (state.KeepRunning()) {
memcpy(dst, src, nbytes); memcpy(dst_aligned, src_aligned, nbytes);
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
delete[] src;
delete[] dst;
} }
BENCHMARK(BM_string_memcpy)->AT_COMMON_SIZES; BENCHMARK(BM_string_memcpy)->AT_ALIGNED_TWOBUF;
static void BM_string_memmove_non_overlapping(benchmark::State& state) { static void BM_string_memmove_non_overlapping(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
std::vector<char> src(nbytes, 'x'); const size_t src_alignment = state.range(1);
std::vector<char> dst(nbytes, 'x'); const size_t dst_alignment = state.range(2);
std::vector<char> src;
std::vector<char> dst;
char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
char* dst_aligned = GetAlignedPtrFilled(&dst, dst_alignment, nbytes, 'y');
while (state.KeepRunning()) { while (state.KeepRunning()) {
memmove(dst.data(), src.data(), nbytes); memmove(dst_aligned, src_aligned, nbytes);
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
} }
BENCHMARK(BM_string_memmove_non_overlapping)->AT_COMMON_SIZES; BENCHMARK(BM_string_memmove_non_overlapping)->AT_ALIGNED_TWOBUF;
static void BM_string_memmove_overlap_dst_before_src(benchmark::State& state) { static void BM_string_memmove_overlap_dst_before_src(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
std::vector<char> buf(nbytes + 1, 'x'); const size_t alignment = state.range(1);
std::vector<char> buf(3 * alignment + nbytes + 1, 'x');
char* buf_aligned = GetAlignedPtrFilled(&buf, alignment, nbytes + 1, 'x');
while (state.KeepRunning()) { while (state.KeepRunning()) {
memmove(buf.data(), buf.data() + 1, nbytes); // Worst-case overlap. memmove(buf_aligned, buf_aligned + 1, nbytes); // Worst-case overlap.
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
} }
BENCHMARK(BM_string_memmove_overlap_dst_before_src)->AT_COMMON_SIZES; BENCHMARK(BM_string_memmove_overlap_dst_before_src)->AT_ALIGNED_ONEBUF;
static void BM_string_memmove_overlap_src_before_dst(benchmark::State& state) { static void BM_string_memmove_overlap_src_before_dst(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
std::vector<char> buf(nbytes + 1, 'x'); const size_t alignment = state.range(1);
std::vector<char> buf;
char* buf_aligned = GetAlignedPtrFilled(&buf, alignment, nbytes + 1, 'x');
while (state.KeepRunning()) { while (state.KeepRunning()) {
memmove(buf.data() + 1, buf.data(), nbytes); // Worst-case overlap. memmove(buf_aligned + 1, buf_aligned, nbytes); // Worst-case overlap.
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
} }
BENCHMARK(BM_string_memmove_overlap_src_before_dst)->AT_COMMON_SIZES; BENCHMARK(BM_string_memmove_overlap_src_before_dst)->AT_ALIGNED_ONEBUF;
static void BM_string_memset(benchmark::State& state) { static void BM_string_memset(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
char* dst = new char[nbytes]; const size_t alignment = state.range(1);
std::vector<char> buf;
char* buf_aligned = GetAlignedPtr(&buf, alignment, nbytes + 1);
while (state.KeepRunning()) { while (state.KeepRunning()) {
memset(dst, 0, nbytes); memset(buf_aligned, 0, nbytes);
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
delete[] dst;
} }
BENCHMARK(BM_string_memset)->AT_COMMON_SIZES; BENCHMARK(BM_string_memset)->AT_ALIGNED_ONEBUF;
static void BM_string_strlen(benchmark::State& state) { static void BM_string_strlen(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
char* s = new char[nbytes]; const size_t alignment = state.range(1);
memset(s, 'x', nbytes);
s[nbytes - 1] = 0; std::vector<char> buf;
char* buf_aligned = GetAlignedPtrFilled(&buf, alignment, nbytes + 1, 'x');
buf_aligned[nbytes - 1] = '\0';
volatile int c __attribute__((unused)) = 0; volatile int c __attribute__((unused)) = 0;
while (state.KeepRunning()) { while (state.KeepRunning()) {
c += strlen(s); c += strlen(buf_aligned);
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
delete[] s;
} }
BENCHMARK(BM_string_strlen)->AT_COMMON_SIZES; BENCHMARK(BM_string_strlen)->AT_ALIGNED_ONEBUF;
static void BM_string_strcat_copy_only(benchmark::State& state) { static void BM_string_strcat_copy_only(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
std::vector<char> src(nbytes, 'x'); const size_t src_alignment = state.range(1);
std::vector<char> dst(nbytes + 2); const size_t dst_alignment = state.range(2);
src[nbytes - 1] = '\0';
dst[0] = 'y'; std::vector<char> src;
dst[1] = 'y'; std::vector<char> dst;
dst[2] = '\0'; char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
char* dst_aligned = GetAlignedPtr(&dst, dst_alignment, nbytes + 2);
src_aligned[nbytes - 1] = '\0';
dst_aligned[0] = 'y';
dst_aligned[1] = 'y';
dst_aligned[2] = '\0';
while (state.KeepRunning()) { while (state.KeepRunning()) {
strcat(dst.data(), src.data()); strcat(dst_aligned, src_aligned);
dst[2] = '\0'; dst_aligned[2] = '\0';
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
} }
BENCHMARK(BM_string_strcat_copy_only)->AT_COMMON_SIZES; BENCHMARK(BM_string_strcat_copy_only)->AT_ALIGNED_TWOBUF;
static void BM_string_strcat_seek_only(benchmark::State& state) { static void BM_string_strcat_seek_only(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
std::vector<char> src(3, 'x'); const size_t src_alignment = state.range(1);
std::vector<char> dst(nbytes + 2, 'y'); const size_t dst_alignment = state.range(2);
src[2] = '\0';
dst[nbytes - 1] = '\0'; std::vector<char> src;
std::vector<char> dst;
char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, 3, 'x');
char* dst_aligned = GetAlignedPtrFilled(&dst, dst_alignment, nbytes + 2, 'y');
src_aligned[2] = '\0';
dst_aligned[nbytes - 1] = '\0';
while (state.KeepRunning()) { while (state.KeepRunning()) {
strcat(dst.data(), src.data()); strcat(dst_aligned, src_aligned);
dst[nbytes - 1] = '\0'; dst_aligned[nbytes - 1] = '\0';
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
} }
BENCHMARK(BM_string_strcat_seek_only)->AT_COMMON_SIZES; BENCHMARK(BM_string_strcat_seek_only)->AT_ALIGNED_TWOBUF;
static void BM_string_strcat_half_copy_half_seek(benchmark::State& state) { static void BM_string_strcat_half_copy_half_seek(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
std::vector<char> src(nbytes / 2, 'x'); const size_t src_alignment = state.range(1);
std::vector<char> dst(nbytes / 2, 'y'); const size_t dst_alignment = state.range(2);
src[nbytes / 2 - 1] = '\0';
dst[nbytes / 2 - 1] = '\0'; std::vector<char> src;
std::vector<char> dst;
char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes / 2, 'x');
char* dst_aligned = GetAlignedPtrFilled(&dst, dst_alignment, nbytes, 'y');
src_aligned[nbytes / 2 - 1] = '\0';
dst_aligned[nbytes / 2 - 1] = '\0';
while (state.KeepRunning()) { while (state.KeepRunning()) {
strcat(dst.data(), src.data()); strcat(dst_aligned, src_aligned);
dst[nbytes / 2 - 1] = '\0'; dst_aligned[nbytes / 2 - 1] = '\0';
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
} }
BENCHMARK(BM_string_strcat_half_copy_half_seek)->AT_COMMON_SIZES; BENCHMARK(BM_string_strcat_half_copy_half_seek)->AT_ALIGNED_TWOBUF;
static void BM_string_strcpy(benchmark::State& state) { static void BM_string_strcpy(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
std::vector<char> src(nbytes, 'x'); const size_t src_alignment = state.range(1);
std::vector<char> dst(nbytes); const size_t dst_alignment = state.range(2);
src[nbytes - 1] = '\0';
std::vector<char> src;
std::vector<char> dst;
char* src_aligned = GetAlignedPtrFilled(&src, src_alignment, nbytes, 'x');
char* dst_aligned = GetAlignedPtr(&dst, dst_alignment, nbytes);
src_aligned[nbytes - 1] = '\0';
while (state.KeepRunning()) { while (state.KeepRunning()) {
strcpy(dst.data(), src.data()); strcpy(dst_aligned, src_aligned);
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
} }
BENCHMARK(BM_string_strcpy)->AT_COMMON_SIZES; BENCHMARK(BM_string_strcpy)->AT_ALIGNED_TWOBUF;
static void BM_string_strcmp(benchmark::State& state) { static void BM_string_strcmp(benchmark::State& state) {
const size_t nbytes = state.range(0); const size_t nbytes = state.range(0);
std::vector<char> s1(nbytes, 'x'); const size_t s1_alignment = state.range(1);
std::vector<char> s2(nbytes, 'x'); const size_t s2_alignment = state.range(2);
s1[nbytes - 1] = '\0';
s2[nbytes - 1] = '\0'; std::vector<char> s1;
std::vector<char> s2;
char* s1_aligned = GetAlignedPtrFilled(&s1, s1_alignment, nbytes, 'x');
char* s2_aligned = GetAlignedPtrFilled(&s2, s2_alignment, nbytes, 'x');
s1_aligned[nbytes - 1] = '\0';
s2_aligned[nbytes - 1] = '\0';
volatile int c __attribute__((unused)); volatile int c __attribute__((unused));
while (state.KeepRunning()) { while (state.KeepRunning()) {
c = strcmp(s1.data(), s2.data()); c = strcmp(s1_aligned, s2_aligned);
} }
state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes)); state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
} }
BENCHMARK(BM_string_strcmp)->AT_COMMON_SIZES; BENCHMARK(BM_string_strcmp)->AT_ALIGNED_TWOBUF;

View file

@ -0,0 +1,39 @@
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <util.h>
TEST(benchmark, memory_align) {
std::vector<char> buf(100);
for (size_t alignment = 1; alignment <= 32; alignment *= 2) {
for (size_t or_mask = 0; or_mask < alignment; ++or_mask) {
uintptr_t aligned_ptr = reinterpret_cast<uintptr_t>(GetAlignedMemory(buf.data(), alignment,
or_mask));
ASSERT_EQ(aligned_ptr % alignment, or_mask);
ASSERT_EQ(aligned_ptr & alignment, alignment);
}
}
}
TEST(benchmark, ptr_align) {
std::vector<char> buf;
for (size_t alignment = 1; alignment <= 2048; alignment *= 2) {
uintptr_t aligned_ptr = reinterpret_cast<uintptr_t>(GetAlignedPtr(&buf, alignment, 100));
ASSERT_EQ(aligned_ptr & alignment, alignment);
ASSERT_EQ(aligned_ptr & (alignment - 1), 0u);
}
}

92
benchmarks/util.cpp Normal file
View file

@ -0,0 +1,92 @@
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "util.h"
#include <sched.h>
#include <stdio.h>
#include <string.h>
#include <cstdlib>
#include <vector>
// This function returns a pointer less than 2 * alignment + or_mask bytes into the array.
char *GetAlignedMemory(char *orig_ptr, size_t alignment, size_t or_mask) {
if ((alignment & (alignment - 1)) != 0) {
fprintf(stderr, "warning: alignment passed into GetAlignedMemory is not a power of two.\n");
std::abort();
}
if (or_mask > alignment) {
fprintf(stderr, "warning: or_mask passed into GetAlignedMemory is too high.\n");
std::abort();
}
uintptr_t ptr = reinterpret_cast<uintptr_t>(orig_ptr);
if (alignment > 0) {
// When setting the alignment, set it to exactly the alignment chosen.
// The pointer returned will be guaranteed not to be aligned to anything
// more than that.
ptr += alignment - (ptr & (alignment - 1));
ptr |= alignment | or_mask;
}
return reinterpret_cast<char*>(ptr);
}
char *GetAlignedPtr(std::vector<char>* buf, size_t alignment, size_t nbytes) {
buf->resize(nbytes + 3 * alignment);
return GetAlignedMemory(buf->data(), alignment, 0);
}
char *GetAlignedPtrFilled(std::vector<char>* buf, size_t alignment, size_t nbytes, char fill_byte) {
char* buf_aligned = GetAlignedPtr(buf, alignment, nbytes);
memset(buf_aligned, fill_byte, nbytes);
return buf_aligned;
}
bool LockToCPU(int cpu_to_lock) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
perror("sched_getaffinity failed");
return false;
}
if (cpu_to_lock < 0) {
// Lock to the last active core we find.
for (int i = 0; i < CPU_SETSIZE; i++) {
if (CPU_ISSET(i, &cpuset)) {
cpu_to_lock = i;
}
}
} else if (!CPU_ISSET(cpu_to_lock, &cpuset)) {
printf("Cpu %d does not exist.\n", cpu_to_lock);
return false;
}
if (cpu_to_lock < 0) {
printf("Cannot find any valid cpu to lock.\n");
return false;
}
CPU_ZERO(&cpuset);
CPU_SET(cpu_to_lock, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
perror("sched_setaffinity failed");
return false;
}
return true;
}

31
benchmarks/util.h Normal file
View file

@ -0,0 +1,31 @@
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _BIONIC_BENCHMARKS_UTIL_H_
#define _BIONIC_BENCHMARKS_UTIL_H_
#include <vector>
// This function returns a pointer less than 2 * alignment + or_mask bytes into the array.
char *GetAlignedMemory(char *orig_ptr, size_t alignment, size_t or_mask);
char *GetAlignedPtr(std::vector<char>* buf, size_t alignment, size_t nbytes);
char *GetAlignedPtrFilled(std::vector<char>* buf, size_t alignment, size_t nbytes, char fill_byte);
bool LockToCPU(int cpu_to_lock);
#endif // _BIONIC_BENCHMARKS_UTIL_H