* commit 'da2317edb36acdaa6a7bc49cef8deba2a42bfb15': Use mmap to create the pthread_internal_t
This commit is contained in:
commit
a15bd72cf0
8 changed files with 171 additions and 88 deletions
|
@ -47,6 +47,21 @@ static void BM_pthread_getspecific(int iters) {
|
|||
}
|
||||
BENCHMARK(BM_pthread_getspecific);
|
||||
|
||||
static void BM_pthread_setspecific(int iters) {
|
||||
StopBenchmarkTiming();
|
||||
pthread_key_t key;
|
||||
pthread_key_create(&key, NULL);
|
||||
StartBenchmarkTiming();
|
||||
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
pthread_setspecific(key, NULL);
|
||||
}
|
||||
|
||||
StopBenchmarkTiming();
|
||||
pthread_key_delete(key);
|
||||
}
|
||||
BENCHMARK(BM_pthread_setspecific);
|
||||
|
||||
static void DummyPthreadOnceInitFunction() {
|
||||
}
|
||||
|
||||
|
@ -137,3 +152,80 @@ static void BM_pthread_rw_lock_write(int iters) {
|
|||
pthread_rwlock_destroy(&lock);
|
||||
}
|
||||
BENCHMARK(BM_pthread_rw_lock_write);
|
||||
|
||||
static void* IdleThread(void*) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void BM_pthread_create(int iters) {
|
||||
StopBenchmarkTiming();
|
||||
pthread_t thread;
|
||||
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
StartBenchmarkTiming();
|
||||
pthread_create(&thread, NULL, IdleThread, NULL);
|
||||
StopBenchmarkTiming();
|
||||
pthread_join(thread, NULL);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_pthread_create);
|
||||
|
||||
static void* RunThread(void*) {
|
||||
StopBenchmarkTiming();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void BM_pthread_create_and_run(int iters) {
|
||||
StopBenchmarkTiming();
|
||||
pthread_t thread;
|
||||
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
StartBenchmarkTiming();
|
||||
pthread_create(&thread, NULL, RunThread, NULL);
|
||||
pthread_join(thread, NULL);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_pthread_create_and_run);
|
||||
|
||||
static void* ExitThread(void*) {
|
||||
StartBenchmarkTiming();
|
||||
pthread_exit(NULL);
|
||||
}
|
||||
|
||||
static void BM_pthread_exit_and_join(int iters) {
|
||||
StopBenchmarkTiming();
|
||||
pthread_t thread;
|
||||
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
pthread_create(&thread, NULL, ExitThread, NULL);
|
||||
pthread_join(thread, NULL);
|
||||
StopBenchmarkTiming();
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_pthread_exit_and_join);
|
||||
|
||||
static void BM_pthread_key_create(int iters) {
|
||||
StopBenchmarkTiming();
|
||||
pthread_key_t key;
|
||||
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
StartBenchmarkTiming();
|
||||
pthread_key_create(&key, NULL);
|
||||
StopBenchmarkTiming();
|
||||
pthread_key_delete(key);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_pthread_key_create);
|
||||
|
||||
static void BM_pthread_key_delete(int iters) {
|
||||
StopBenchmarkTiming();
|
||||
pthread_key_t key;
|
||||
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
pthread_key_create(&key, NULL);
|
||||
StartBenchmarkTiming();
|
||||
pthread_key_delete(key);
|
||||
StopBenchmarkTiming();
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_pthread_key_delete);
|
||||
|
|
|
@ -74,9 +74,7 @@ uintptr_t __stack_chk_guard = 0;
|
|||
void __libc_init_tls(KernelArgumentBlock& args) {
|
||||
__libc_auxv = args.auxv;
|
||||
|
||||
static void* tls[BIONIC_TLS_SLOTS];
|
||||
static pthread_internal_t main_thread;
|
||||
main_thread.tls = tls;
|
||||
|
||||
// Tell the kernel to clear our tid field when we exit, so we're like any other pthread.
|
||||
// As a side-effect, this tells us our pid (which is the same as the main thread's tid).
|
||||
|
@ -96,7 +94,7 @@ void __libc_init_tls(KernelArgumentBlock& args) {
|
|||
__init_thread(&main_thread, false);
|
||||
__init_tls(&main_thread);
|
||||
__set_tls(main_thread.tls);
|
||||
tls[TLS_SLOT_BIONIC_PREINIT] = &args;
|
||||
main_thread.tls[TLS_SLOT_BIONIC_PREINIT] = &args;
|
||||
|
||||
__init_alternate_signal_stack(&main_thread);
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "pthread_internal.h"
|
||||
|
||||
#include "private/bionic_macros.h"
|
||||
#include "private/bionic_prctl.h"
|
||||
#include "private/bionic_ssp.h"
|
||||
#include "private/bionic_tls.h"
|
||||
#include "private/libc_logging.h"
|
||||
|
@ -72,6 +73,10 @@ void __init_alternate_signal_stack(pthread_internal_t* thread) {
|
|||
ss.ss_flags = 0;
|
||||
sigaltstack(&ss, NULL);
|
||||
thread->alternate_signal_stack = ss.ss_sp;
|
||||
|
||||
// We can only use const static allocated string for mapped region name, as Android kernel
|
||||
// uses the string pointer directly when dumping /proc/pid/maps.
|
||||
prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ss.ss_sp, ss.ss_size, "thread signal stack");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -101,31 +106,64 @@ int __init_thread(pthread_internal_t* thread, bool add_to_thread_list) {
|
|||
return error;
|
||||
}
|
||||
|
||||
static void* __create_thread_stack(pthread_internal_t* thread) {
|
||||
static void* __create_thread_stack(const pthread_attr_t& attr) {
|
||||
// Create a new private anonymous map.
|
||||
int prot = PROT_READ | PROT_WRITE;
|
||||
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
|
||||
void* stack = mmap(NULL, thread->attr.stack_size, prot, flags, -1, 0);
|
||||
void* stack = mmap(NULL, attr.stack_size, prot, flags, -1, 0);
|
||||
if (stack == MAP_FAILED) {
|
||||
__libc_format_log(ANDROID_LOG_WARN,
|
||||
"libc",
|
||||
"pthread_create failed: couldn't allocate %zd-byte stack: %s",
|
||||
thread->attr.stack_size, strerror(errno));
|
||||
attr.stack_size, strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Set the guard region at the end of the stack to PROT_NONE.
|
||||
if (mprotect(stack, thread->attr.guard_size, PROT_NONE) == -1) {
|
||||
if (mprotect(stack, attr.guard_size, PROT_NONE) == -1) {
|
||||
__libc_format_log(ANDROID_LOG_WARN, "libc",
|
||||
"pthread_create failed: couldn't mprotect PROT_NONE %zd-byte stack guard region: %s",
|
||||
thread->attr.guard_size, strerror(errno));
|
||||
munmap(stack, thread->attr.stack_size);
|
||||
attr.guard_size, strerror(errno));
|
||||
munmap(stack, attr.stack_size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return stack;
|
||||
}
|
||||
|
||||
static int __allocate_thread(pthread_attr_t* attr, pthread_internal_t** threadp, void** child_stack) {
|
||||
if (attr->stack_base == NULL) {
|
||||
// The caller didn't provide a stack, so allocate one.
|
||||
// Make sure the stack size and guard size are multiples of PAGE_SIZE.
|
||||
attr->stack_size = BIONIC_ALIGN(attr->stack_size, PAGE_SIZE);
|
||||
attr->guard_size = BIONIC_ALIGN(attr->guard_size, PAGE_SIZE);
|
||||
attr->stack_base = __create_thread_stack(*attr);
|
||||
if (attr->stack_base == NULL) {
|
||||
return EAGAIN;
|
||||
}
|
||||
} else {
|
||||
// The caller did provide a stack, so remember we're not supposed to free it.
|
||||
attr->flags |= PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK;
|
||||
}
|
||||
|
||||
// Thread stack is used for two sections:
|
||||
// pthread_internal_t.
|
||||
// regular stack, from top to down.
|
||||
uint8_t* stack_top = reinterpret_cast<uint8_t*>(attr->stack_base) + attr->stack_size;
|
||||
stack_top -= sizeof(pthread_internal_t);
|
||||
pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
|
||||
|
||||
// No need to check stack_top alignment. The size of pthread_internal_t is 16-bytes aligned,
|
||||
// and user allocated stack is guaranteed by pthread_attr_setstack.
|
||||
|
||||
thread->attr = *attr;
|
||||
__init_tls(thread);
|
||||
|
||||
*threadp = thread;
|
||||
*child_stack = stack_top;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __pthread_start(void* arg) {
|
||||
pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
|
||||
|
||||
|
@ -158,43 +196,21 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
|
|||
// Inform the rest of the C library that at least one thread was created.
|
||||
__isthreaded = 1;
|
||||
|
||||
pthread_internal_t* thread = __create_thread_struct();
|
||||
if (thread == NULL) {
|
||||
return EAGAIN;
|
||||
}
|
||||
|
||||
pthread_attr_t thread_attr;
|
||||
if (attr == NULL) {
|
||||
pthread_attr_init(&thread->attr);
|
||||
pthread_attr_init(&thread_attr);
|
||||
} else {
|
||||
thread->attr = *attr;
|
||||
thread_attr = *attr;
|
||||
attr = NULL; // Prevent misuse below.
|
||||
}
|
||||
|
||||
// Make sure the stack size and guard size are multiples of PAGE_SIZE.
|
||||
thread->attr.stack_size = BIONIC_ALIGN(thread->attr.stack_size, PAGE_SIZE);
|
||||
thread->attr.guard_size = BIONIC_ALIGN(thread->attr.guard_size, PAGE_SIZE);
|
||||
|
||||
if (thread->attr.stack_base == NULL) {
|
||||
// The caller didn't provide a stack, so allocate one.
|
||||
thread->attr.stack_base = __create_thread_stack(thread);
|
||||
if (thread->attr.stack_base == NULL) {
|
||||
__free_thread_struct(thread);
|
||||
return EAGAIN;
|
||||
}
|
||||
} else {
|
||||
// The caller did provide a stack, so remember we're not supposed to free it.
|
||||
thread->attr.flags |= PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK;
|
||||
pthread_internal_t* thread = NULL;
|
||||
void* child_stack = NULL;
|
||||
int result = __allocate_thread(&thread_attr, &thread, &child_stack);
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Make room for the TLS area.
|
||||
// The child stack is the same address, just growing in the opposite direction.
|
||||
// At offsets >= 0, we have the TLS slots.
|
||||
// At offsets < 0, we have the child stack.
|
||||
thread->tls = reinterpret_cast<void**>(reinterpret_cast<uint8_t*>(thread->attr.stack_base) +
|
||||
thread->attr.stack_size - BIONIC_ALIGN(BIONIC_TLS_SLOTS * sizeof(void*), 16));
|
||||
void* child_stack = thread->tls;
|
||||
__init_tls(thread);
|
||||
|
||||
// Create a mutex for the thread in TLS to wait on once it starts so we can keep
|
||||
// it from doing anything until after we notify the debugger about it
|
||||
//
|
||||
|
@ -211,7 +227,7 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
|
|||
|
||||
int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
|
||||
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
|
||||
void* tls = thread->tls;
|
||||
void* tls = reinterpret_cast<void*>(thread->tls);
|
||||
#if defined(__i386__)
|
||||
// On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than
|
||||
// a pointer to the TLS itself.
|
||||
|
@ -229,7 +245,6 @@ int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
|
|||
if (!thread->user_allocated_stack()) {
|
||||
munmap(thread->attr.stack_base, thread->attr.stack_size);
|
||||
}
|
||||
__free_thread_struct(thread);
|
||||
__libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s", strerror(errno));
|
||||
return clone_errno;
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ int pthread_detach(pthread_t t) {
|
|||
|
||||
if (thread->tid == 0) {
|
||||
// Already exited; clean up.
|
||||
_pthread_internal_remove_locked(thread.get());
|
||||
_pthread_internal_remove_locked(thread.get(), true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -90,7 +90,7 @@ void pthread_exit(void* return_value) {
|
|||
// Keep track of what we need to know about the stack before we lose the pthread_internal_t.
|
||||
void* stack_base = thread->attr.stack_base;
|
||||
size_t stack_size = thread->attr.stack_size;
|
||||
bool user_allocated_stack = thread->user_allocated_stack();
|
||||
bool free_stack = false;
|
||||
|
||||
pthread_mutex_lock(&g_thread_list_lock);
|
||||
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0) {
|
||||
|
@ -98,24 +98,18 @@ void pthread_exit(void* return_value) {
|
|||
// First make sure that the kernel does not try to clear the tid field
|
||||
// because we'll have freed the memory before the thread actually exits.
|
||||
__set_tid_address(NULL);
|
||||
_pthread_internal_remove_locked(thread);
|
||||
} else {
|
||||
// Make sure that the pthread_internal_t doesn't have stale pointers to a stack that
|
||||
// will be unmapped after the exit call below.
|
||||
if (!user_allocated_stack) {
|
||||
thread->attr.stack_base = NULL;
|
||||
thread->attr.stack_size = 0;
|
||||
thread->tls = NULL;
|
||||
|
||||
// pthread_internal_t is freed below with stack, not here.
|
||||
_pthread_internal_remove_locked(thread, false);
|
||||
if (!thread->user_allocated_stack()) {
|
||||
free_stack = true;
|
||||
}
|
||||
// pthread_join is responsible for destroying the pthread_internal_t for non-detached threads.
|
||||
// The kernel will futex_wake on the pthread_internal_t::tid field to wake pthread_join.
|
||||
}
|
||||
pthread_mutex_unlock(&g_thread_list_lock);
|
||||
|
||||
if (user_allocated_stack) {
|
||||
// Cleaning up this thread's stack is the creator's responsibility, not ours.
|
||||
__exit(0);
|
||||
} else {
|
||||
// Detached threads exit with stack teardown, and everything deallocated here.
|
||||
// Threads that can be joined exit but leave their stacks for the pthread_join caller to clean up.
|
||||
if (free_stack) {
|
||||
// We need to munmap the stack we're running on before calling exit.
|
||||
// That's not something we can do in C.
|
||||
|
||||
|
@ -126,5 +120,7 @@ void pthread_exit(void* return_value) {
|
|||
sigprocmask(SIG_SETMASK, &mask, NULL);
|
||||
|
||||
_exit_with_stack_teardown(stack_base, stack_size);
|
||||
} else {
|
||||
__exit(0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
|
||||
#include <pthread.h>
|
||||
|
||||
#include "private/bionic_tls.h"
|
||||
|
||||
/* Has the thread been detached by a pthread_join or pthread_detach call? */
|
||||
#define PTHREAD_ATTR_FLAG_DETACHED 0x00000001
|
||||
|
||||
|
@ -72,8 +74,6 @@ struct pthread_internal_t {
|
|||
return (attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK) != 0;
|
||||
}
|
||||
|
||||
void** tls;
|
||||
|
||||
pthread_attr_t attr;
|
||||
|
||||
__pthread_cleanup_t* cleanup_stack;
|
||||
|
@ -86,16 +86,16 @@ struct pthread_internal_t {
|
|||
|
||||
pthread_mutex_t startup_handshake_mutex;
|
||||
|
||||
void* tls[BIONIC_TLS_SLOTS];
|
||||
|
||||
/*
|
||||
* The dynamic linker implements dlerror(3), which makes it hard for us to implement this
|
||||
* per-thread buffer by simply using malloc(3) and free(3).
|
||||
*/
|
||||
#define __BIONIC_DLERROR_BUFFER_SIZE 512
|
||||
char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
|
||||
};
|
||||
} __attribute__((aligned(16))); // Align it as thread stack top below it should be aligned.
|
||||
|
||||
__LIBC_HIDDEN__ pthread_internal_t* __create_thread_struct();
|
||||
__LIBC_HIDDEN__ void __free_thread_struct(pthread_internal_t*);
|
||||
__LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread, bool add_to_thread_list);
|
||||
__LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread);
|
||||
__LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*);
|
||||
|
@ -105,7 +105,7 @@ __LIBC_HIDDEN__ void _pthread_internal_add(pthread_internal_t* thread);
|
|||
extern "C" __LIBC64_HIDDEN__ pthread_internal_t* __get_thread(void);
|
||||
|
||||
__LIBC_HIDDEN__ void pthread_key_clean_all(void);
|
||||
__LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread);
|
||||
__LIBC_HIDDEN__ void _pthread_internal_remove_locked(pthread_internal_t* thread, bool free_thread);
|
||||
|
||||
/*
|
||||
* Traditionally we gave threads a 1MiB stack. When we started
|
||||
|
|
|
@ -41,26 +41,7 @@
|
|||
pthread_internal_t* g_thread_list = NULL;
|
||||
pthread_mutex_t g_thread_list_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
pthread_internal_t* __create_thread_struct() {
|
||||
void* result = mmap(NULL, sizeof(pthread_internal_t), PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
|
||||
if (result == MAP_FAILED) {
|
||||
__libc_format_log(ANDROID_LOG_WARN, "libc",
|
||||
"__create_thread_struct() failed: %s", strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
return reinterpret_cast<pthread_internal_t*>(result);
|
||||
}
|
||||
|
||||
void __free_thread_struct(pthread_internal_t* thread) {
|
||||
int result = munmap(thread, sizeof(pthread_internal_t));
|
||||
if (result != 0) {
|
||||
__libc_format_log(ANDROID_LOG_WARN, "libc",
|
||||
"__free_thread_struct() failed: %s", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
void _pthread_internal_remove_locked(pthread_internal_t* thread) {
|
||||
void _pthread_internal_remove_locked(pthread_internal_t* thread, bool free_thread) {
|
||||
if (thread->next != NULL) {
|
||||
thread->next->prev = thread->prev;
|
||||
}
|
||||
|
@ -70,10 +51,11 @@ void _pthread_internal_remove_locked(pthread_internal_t* thread) {
|
|||
g_thread_list = thread->next;
|
||||
}
|
||||
|
||||
// The main thread is not heap-allocated. See __libc_init_tls for the declaration,
|
||||
// and __libc_init_common for the point where it's added to the thread list.
|
||||
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_MAIN_THREAD) == 0) {
|
||||
__free_thread_struct(thread);
|
||||
// For threads using user allocated stack (including the main thread), the pthread_internal_t
|
||||
// can't be freed since it is on the stack.
|
||||
if (free_thread && !(thread->attr.flags & PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK)) {
|
||||
// Use one munmap to free the whole thread stack, including pthread_internal_t.
|
||||
munmap(thread->attr.stack_base, thread->attr.stack_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -74,6 +74,6 @@ int pthread_join(pthread_t t, void** return_value) {
|
|||
*return_value = thread->return_value;
|
||||
}
|
||||
|
||||
_pthread_internal_remove_locked(thread.get());
|
||||
_pthread_internal_remove_locked(thread.get(), true);
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue