platform_bionic/libc/bionic/pthread_internal.h

242 lines
8.8 KiB
C
Raw Normal View History

/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#pragma once
#include <pthread.h>
#include <stdatomic.h>
#if __has_feature(hwaddress_sanitizer)
#include <sanitizer/hwasan_interface.h>
#else
#define __hwasan_thread_enter()
#define __hwasan_thread_exit()
#endif
#include "private/bionic_elf_tls.h"
#include "private/bionic_lock.h"
#include "private/bionic_tls.h"
Implement pthread_attr_getinheritsched/pthread_attr_setinheritsched. Historically, Android defaulted to EXPLICIT but with a special case because SCHED_NORMAL/priority 0 was awkward. Because the code couldn't actually tell whether SCHED_NORMAL/priority 0 was a genuine attempt to explicitly set those attributes (because the parent thread is SCHED_FIFO, say) or just because the pthread_attr_t was left at its defaults. Now we support INHERIT, we could call sched_getscheduler to see whether we actually need to call sched_setscheduler, but since the major cost is the fixed syscall overhead, we may as well just conservatively call sched_setscheduler and let the kernel decide whether it's a no-op. (Especially because we'd then have to add both sched_getscheduler and sched_setscheduler to any seccomp filter.) Platform code (or app code that only needs to support >= P) can actually add a call to pthread_attr_setinheritsched to say that they just want to inherit (if they know that none of their threads actually mess with scheduler attributes at all), which will save them a sched_setscheduler call except in the doubly-special case of SCHED_RESET_ON_FORK (which we do handle). An alternative would be "make pthread_attr_setschedparams and pthread_attr_setschedprio set EXPLICIT and change the platform default to INHERIT", but even though I can only think of weird pathological examples where anyone would notice that change, that behavior -- of pthread_attr_setschedparams/pthread_attr_setschedprio overriding an earlier call to pthread_attr_setinheritsched -- isn't allowed by POSIX (whereas defaulting to EXPLICIT is). If we have a lot of trouble with this change in the app compatibility testing phase, though, we'll want to reconsider this decision! -*- This change also removes a comment about setting the scheduler attributes in main_thread because we'd have to actually keep them up to date, and it's not clear that doing so would be worth the trouble. Also make async_safe_format_log preserve errno so we don't have to be so careful around it. Bug: http://b/67471710 Test: ran tests Change-Id: Idd026c4ce78a536656adcb57aa2e7b2c616eeddf
2017-10-18 00:34:41 +02:00
// Has the thread been detached by a pthread_join or pthread_detach call?
#define PTHREAD_ATTR_FLAG_DETACHED 0x00000001
Implement pthread_attr_getinheritsched/pthread_attr_setinheritsched. Historically, Android defaulted to EXPLICIT but with a special case because SCHED_NORMAL/priority 0 was awkward. Because the code couldn't actually tell whether SCHED_NORMAL/priority 0 was a genuine attempt to explicitly set those attributes (because the parent thread is SCHED_FIFO, say) or just because the pthread_attr_t was left at its defaults. Now we support INHERIT, we could call sched_getscheduler to see whether we actually need to call sched_setscheduler, but since the major cost is the fixed syscall overhead, we may as well just conservatively call sched_setscheduler and let the kernel decide whether it's a no-op. (Especially because we'd then have to add both sched_getscheduler and sched_setscheduler to any seccomp filter.) Platform code (or app code that only needs to support >= P) can actually add a call to pthread_attr_setinheritsched to say that they just want to inherit (if they know that none of their threads actually mess with scheduler attributes at all), which will save them a sched_setscheduler call except in the doubly-special case of SCHED_RESET_ON_FORK (which we do handle). An alternative would be "make pthread_attr_setschedparams and pthread_attr_setschedprio set EXPLICIT and change the platform default to INHERIT", but even though I can only think of weird pathological examples where anyone would notice that change, that behavior -- of pthread_attr_setschedparams/pthread_attr_setschedprio overriding an earlier call to pthread_attr_setinheritsched -- isn't allowed by POSIX (whereas defaulting to EXPLICIT is). If we have a lot of trouble with this change in the app compatibility testing phase, though, we'll want to reconsider this decision! -*- This change also removes a comment about setting the scheduler attributes in main_thread because we'd have to actually keep them up to date, and it's not clear that doing so would be worth the trouble. Also make async_safe_format_log preserve errno so we don't have to be so careful around it. Bug: http://b/67471710 Test: ran tests Change-Id: Idd026c4ce78a536656adcb57aa2e7b2c616eeddf
2017-10-18 00:34:41 +02:00
// Has the thread been joined by another thread?
#define PTHREAD_ATTR_FLAG_JOINED 0x00000002
// Used for pthread_attr_setinheritsched. We need two flags for this apparent
// boolean because our historical behavior matches neither of the POSIX choices.
Implement pthread_attr_getinheritsched/pthread_attr_setinheritsched. Historically, Android defaulted to EXPLICIT but with a special case because SCHED_NORMAL/priority 0 was awkward. Because the code couldn't actually tell whether SCHED_NORMAL/priority 0 was a genuine attempt to explicitly set those attributes (because the parent thread is SCHED_FIFO, say) or just because the pthread_attr_t was left at its defaults. Now we support INHERIT, we could call sched_getscheduler to see whether we actually need to call sched_setscheduler, but since the major cost is the fixed syscall overhead, we may as well just conservatively call sched_setscheduler and let the kernel decide whether it's a no-op. (Especially because we'd then have to add both sched_getscheduler and sched_setscheduler to any seccomp filter.) Platform code (or app code that only needs to support >= P) can actually add a call to pthread_attr_setinheritsched to say that they just want to inherit (if they know that none of their threads actually mess with scheduler attributes at all), which will save them a sched_setscheduler call except in the doubly-special case of SCHED_RESET_ON_FORK (which we do handle). An alternative would be "make pthread_attr_setschedparams and pthread_attr_setschedprio set EXPLICIT and change the platform default to INHERIT", but even though I can only think of weird pathological examples where anyone would notice that change, that behavior -- of pthread_attr_setschedparams/pthread_attr_setschedprio overriding an earlier call to pthread_attr_setinheritsched -- isn't allowed by POSIX (whereas defaulting to EXPLICIT is). If we have a lot of trouble with this change in the app compatibility testing phase, though, we'll want to reconsider this decision! -*- This change also removes a comment about setting the scheduler attributes in main_thread because we'd have to actually keep them up to date, and it's not clear that doing so would be worth the trouble. Also make async_safe_format_log preserve errno so we don't have to be so careful around it. Bug: http://b/67471710 Test: ran tests Change-Id: Idd026c4ce78a536656adcb57aa2e7b2c616eeddf
2017-10-18 00:34:41 +02:00
#define PTHREAD_ATTR_FLAG_INHERIT 0x00000004
#define PTHREAD_ATTR_FLAG_EXPLICIT 0x00000008
Implement pthread_attr_getinheritsched/pthread_attr_setinheritsched. Historically, Android defaulted to EXPLICIT but with a special case because SCHED_NORMAL/priority 0 was awkward. Because the code couldn't actually tell whether SCHED_NORMAL/priority 0 was a genuine attempt to explicitly set those attributes (because the parent thread is SCHED_FIFO, say) or just because the pthread_attr_t was left at its defaults. Now we support INHERIT, we could call sched_getscheduler to see whether we actually need to call sched_setscheduler, but since the major cost is the fixed syscall overhead, we may as well just conservatively call sched_setscheduler and let the kernel decide whether it's a no-op. (Especially because we'd then have to add both sched_getscheduler and sched_setscheduler to any seccomp filter.) Platform code (or app code that only needs to support >= P) can actually add a call to pthread_attr_setinheritsched to say that they just want to inherit (if they know that none of their threads actually mess with scheduler attributes at all), which will save them a sched_setscheduler call except in the doubly-special case of SCHED_RESET_ON_FORK (which we do handle). An alternative would be "make pthread_attr_setschedparams and pthread_attr_setschedprio set EXPLICIT and change the platform default to INHERIT", but even though I can only think of weird pathological examples where anyone would notice that change, that behavior -- of pthread_attr_setschedparams/pthread_attr_setschedprio overriding an earlier call to pthread_attr_setinheritsched -- isn't allowed by POSIX (whereas defaulting to EXPLICIT is). If we have a lot of trouble with this change in the app compatibility testing phase, though, we'll want to reconsider this decision! -*- This change also removes a comment about setting the scheduler attributes in main_thread because we'd have to actually keep them up to date, and it's not clear that doing so would be worth the trouble. Also make async_safe_format_log preserve errno so we don't have to be so careful around it. Bug: http://b/67471710 Test: ran tests Change-Id: Idd026c4ce78a536656adcb57aa2e7b2c616eeddf
2017-10-18 00:34:41 +02:00
enum ThreadJoinState {
THREAD_NOT_JOINED,
THREAD_EXITED_NOT_JOINED,
THREAD_JOINED,
THREAD_DETACHED
};
class thread_local_dtor;
class pthread_internal_t {
Remove the global thread list. Another release, another attempt to fix this bug. This change affects pthread_detach, pthread_getcpuclockid, pthread_getschedparam/pthread_setschedparam, pthread_join, and pthread_kill: instead of returning ESRCH when passed an invalid pthread_t, they'll now SEGV. Note that this doesn't change behavior as much as you might think: the old lookup only held the global thread list lock for the duration of the lookup, so there was still a race between that and the dereference in the caller, given that callers actually need the tid to pass to some syscall or other, and sometimes update fields in the pthread_internal_t struct too. We can't check thread->tid against 0 to see whether a pthread_t is still valid because a dead thread gets its thread struct unmapped along with its stack, so the dereference isn't safe. Taking the affected functions one by one: * pthread_getcpuclockid and pthread_getschedparam/pthread_setschedparam should be fine. Unsafe calls to those seem highly unlikely. * Unsafe pthread_detach callers probably want to switch to pthread_attr_setdetachstate instead, or using pthread_detach(pthread_self()) from the new thread's start routine rather than doing the detach in the parent. * pthread_join calls should be safe anyway, because a joinable thread won't actually exit and unmap until it's joined. If you're joining an unjoinable thread, the fix is to stop marking it detached. If you're joining an already-joined thread, you need to rethink your design. * Unsafe pthread_kill calls aren't portably fixable. (And are obviously inherently non-portable as-is.) The best alternative on Android is to use pthread_gettid_np at some point that you know the thread to be alive, and then call kill/tgkill directly. That's still not completely safe because if you're too late, the tid may have been reused, but then your code is inherently unsafe anyway. If we find too much code is still broken, we can come back and disable the global thread list lookups for anything targeting >= O and then have another go at really removing this in P... Bug: http://b/19636317 Test: N6P boots, bionic tests pass Change-Id: Ia92641212f509344b99ee2a9bfab5383147fcba6
2017-01-04 23:12:54 +01:00
public:
class pthread_internal_t* next;
class pthread_internal_t* prev;
pid_t tid;
private:
pid_t cached_pid_;
public:
pid_t invalidate_cached_pid() {
pid_t old_value;
get_cached_pid(&old_value);
set_cached_pid(0);
return old_value;
}
void set_cached_pid(pid_t value) {
cached_pid_ = value;
}
bool get_cached_pid(pid_t* cached_pid) {
*cached_pid = cached_pid_;
return (*cached_pid != 0);
}
pthread_attr_t attr;
_Atomic(ThreadJoinState) join_state;
__pthread_cleanup_t* cleanup_stack;
void* (*start_routine)(void*);
void* start_routine_arg;
void* return_value;
sigset64_t start_mask;
void* alternate_signal_stack;
// The start address of the shadow call stack's guard region (arm64 only).
// This address is only used to deallocate the shadow call stack on thread
// exit; the address of the stack itself is stored only in the x18 register.
// Because the protection offered by SCS relies on the secrecy of the stack
// address, storing the address here weakens the protection, but only
// slightly, because it is relatively easy for an attacker to discover the
// address of the guard region anyway (e.g. it can be discovered by reference
// to other allocations), but not the stack itself, which is <0.1% of the size
// of the guard region.
//
// There are at least two other options for discovering the start address of
// the guard region on thread exit, but they are not as simple as storing in
// TLS.
// 1) Derive it from the value of the x18 register. This is only possible in
// processes that do not contain legacy code that might clobber x18,
// therefore each process must declare early during process startup whether
// it might load legacy code.
// 2) Mark the guard region as such using prctl(PR_SET_VMA_ANON_NAME) and
// discover its address by reading /proc/self/maps. One issue with this is
// that reading /proc/self/maps can race with allocations, so we may need
// code to handle retries.
void* shadow_call_stack_guard_region;
// A pointer to the top of the stack. This lets android_unsafe_frame_pointer_chase determine the
// top of the stack quickly, which would otherwise require special logic for the main thread.
uintptr_t stack_top;
Lock startup_handshake_lock;
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
void* mmap_base;
size_t mmap_size;
// The location of the VMA to label as the thread's stack_and_tls.
void* mmap_base_unguarded;
size_t mmap_size_unguarded;
char vma_name_buffer[32];
thread_local_dtor* thread_local_dtors;
/*
* The dynamic linker implements dlerror(3), which makes it hard for us to implement this
* per-thread buffer by simply using malloc(3) and free(3).
*/
char* current_dlerror;
#define __BIONIC_DLERROR_BUFFER_SIZE 512
char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
bionic_tls* bionic_tls;
int errno_value;
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
};
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
struct ThreadMapping {
char* mmap_base;
size_t mmap_size;
char* mmap_base_unguarded;
size_t mmap_size_unguarded;
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
char* static_tls;
char* stack_base;
char* stack_top;
};
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
__LIBC_HIDDEN__ void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread);
__LIBC_HIDDEN__ void __init_tcb_stack_guard(bionic_tcb* tcb);
__LIBC_HIDDEN__ void __init_tcb_dtv(bionic_tcb* tcb);
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
__LIBC_HIDDEN__ void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls);
__LIBC_HIDDEN__ bionic_tls* __allocate_temp_bionic_tls();
__LIBC_HIDDEN__ void __free_temp_bionic_tls(bionic_tls* tls);
__LIBC_HIDDEN__ void __init_additional_stacks(pthread_internal_t*);
__LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
__LIBC_HIDDEN__ ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size);
__LIBC_HIDDEN__ void __set_stack_and_tls_vma_name(bool is_main_thread);
__LIBC_HIDDEN__ pthread_t __pthread_internal_add(pthread_internal_t* thread);
__LIBC_HIDDEN__ pthread_internal_t* __pthread_internal_find(pthread_t pthread_id, const char* caller);
__LIBC_HIDDEN__ pid_t __pthread_internal_gettid(pthread_t pthread_id, const char* caller);
__LIBC_HIDDEN__ void __pthread_internal_remove(pthread_internal_t* thread);
__LIBC_HIDDEN__ void __pthread_internal_remove_and_free(pthread_internal_t* thread);
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
static inline __always_inline bionic_tcb* __get_bionic_tcb() {
return reinterpret_cast<bionic_tcb*>(&__get_tls()[MIN_TLS_SLOT]);
}
// Make __get_thread() inlined for performance reason. See http://b/19825434.
static inline __always_inline pthread_internal_t* __get_thread() {
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
return static_cast<pthread_internal_t*>(__get_tls()[TLS_SLOT_THREAD_ID]);
}
static inline __always_inline bionic_tls& __get_bionic_tls() {
Reorganize static TLS memory for ELF TLS For ELF TLS "local-exec" accesses, the static linker assumes that an executable's TLS segment is located at a statically-known offset from the thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86). Because these layouts are incompatible, Bionic generally needs to allocate its TLS slots differently between different architectures. To allow per-architecture TLS slots: - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are generally negative, while new x86 slots are generally positive. - Define a bionic_tcb struct that provides two things: - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field - an inline accessor function: void*& tls_slot(size_t tpindex); For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots), because the runtime linker doesn't know how large the static TLS area is until after it has loaded all of the initial solibs. To accommodate Golang, it's necessary to allocate the pthread keys at a fixed, small, positive offset from the thread pointer. This CL moves the pthread keys into bionic_tls, then allocates a single mapping per thread that looks like so: - stack guard - stack [omitted for main thread and with pthread_attr_setstack] - static TLS: - bionic_tcb [exec TLS will either precede or succeed the TCB] - bionic_tls [prefixed by the pthread keys] - [solib TLS segments will be placed here] - guard page As before, if the new mapping includes a stack, the pthread_internal_t is allocated on it. At startup, Bionic allocates a temporary bionic_tcb object on the stack, then allocates a temporary bionic_tls object using mmap. This mmap is delayed because the linker can't currently call async_safe_fatal() before relocating itself. Later, Bionic allocates a stack-less thread mapping for the main thread, and copies slots from the temporary TCB to the new TCB. (See *::copy_from_bootstrap methods.) Bug: http://b/78026329 Test: bionic unit tests Test: verify that a Golang app still works Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3 (cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
2019-01-03 11:51:30 +01:00
return *static_cast<bionic_tls*>(__get_tls()[TLS_SLOT_BIONIC_TLS]);
}
static inline __always_inline TlsDtv* __get_tcb_dtv(bionic_tcb* tcb) {
uintptr_t dtv_slot = reinterpret_cast<uintptr_t>(tcb->tls_slot(TLS_SLOT_DTV));
return reinterpret_cast<TlsDtv*>(dtv_slot - offsetof(TlsDtv, generation));
}
static inline void __set_tcb_dtv(bionic_tcb* tcb, TlsDtv* val) {
tcb->tls_slot(TLS_SLOT_DTV) = &val->generation;
}
extern "C" __LIBC_HIDDEN__ int __set_tls(void* ptr);
__LIBC_HIDDEN__ void pthread_key_clean_all(void);
// Address space is precious on LP32, so use the minimum unit: one page.
// On LP64, we could use more but there's no obvious advantage to doing
// so, and the various media processes use RLIMIT_AS as a way to limit
// the amount of allocation they'll do.
#define PTHREAD_GUARD_SIZE PAGE_SIZE
// SIGSTKSZ (8KiB) is not big enough.
// An snprintf to a stack buffer of size PATH_MAX consumes ~7KiB of stack.
// On 64-bit, logging uses more than 8KiB by itself, ucontext is comically
// large on aarch64, and we have effectively infinite address space, so double
// the signal stack size.
#if defined(__LP64__)
#define SIGNAL_STACK_SIZE_WITHOUT_GUARD (32 * 1024)
#else
#define SIGNAL_STACK_SIZE_WITHOUT_GUARD (16 * 1024)
#endif
// Traditionally we gave threads a 1MiB stack. When we started
// allocating per-thread alternate signal stacks to ease debugging of
// stack overflows, we subtracted the same amount we were using there
// from the default thread stack size. This should keep memory usage
// roughly constant.
#define PTHREAD_STACK_SIZE_DEFAULT ((1 * 1024 * 1024) - SIGNAL_STACK_SIZE_WITHOUT_GUARD)
// Leave room for a guard page in the internally created signal stacks.
#define SIGNAL_STACK_SIZE (SIGNAL_STACK_SIZE_WITHOUT_GUARD + PTHREAD_GUARD_SIZE)
// Needed by fork.
__LIBC_HIDDEN__ extern void __bionic_atfork_run_prepare();
__LIBC_HIDDEN__ extern void __bionic_atfork_run_child();
__LIBC_HIDDEN__ extern void __bionic_atfork_run_parent();