memtag_stack: vfork and longjmp support.

With memtag_stack, each function is responsible for cleaning up
allocation tags for its stack frame. Allocation tags for anything below
SP must match the address tag in SP.

Both vfork and longjmp implement non-local control transfer which
abandons part of the stack without proper cleanup. Update allocation
tags:
* For longjmp, we know both source and destination values of SP.
* For vfork, save the value of SP before exit() or exec*() - the only
  valid ways of ending the child process according to POSIX - and reset
  tags from there to SP-in-parent.

This is not 100% solid and can be confused by a number of hopefully
uncommon conditions:
* Segmented stacks.
* Longjmp from sigaltstack into the main stack.
* Some kind of userspace thread implementation using longjmp (that's UB,
  longjmp can only return to the caller on the current stack).
* and other strange things.

This change adds a sanity limit on the size of the tag cleanup. Also,
this logic is only activated in the binaries that carry the
NT_MEMTAG_STACK note (set by -fsanitize=memtag-stack) which is meant as
a debugging configuration, is not compatible with pre-armv9 CPUs, and
should not be set on production code.

Bug: b/174878242
Test: fvp_mini with ToT LLVM (more test in a separate change)

Change-Id: Ibef8b2fc5a6ce85c8e562dead1019964d9f6b80b
This commit is contained in:
Evgenii Stepanov 2022-05-12 15:50:47 -07:00
parent 39de8b944e
commit 3031a7e45e
8 changed files with 105 additions and 14 deletions

View file

@ -26,7 +26,7 @@
# This file is processed by a python script named gensyscalls.py, run via
# genrules in Android.bp.
int execve(const char*, char* const*, char* const*) all
int __execve:execve(const char*, char* const*, char* const*) all
uid_t getuid:getuid32() lp32
uid_t getuid:getuid() lp64
@ -320,7 +320,7 @@ int __epoll_pwait:epoll_pwait(int, struct epoll_event*, int, int, const sigset64
int __eventfd:eventfd2(unsigned int, int) all
void _exit|_Exit:exit_group(int) all
void __exit_group:exit_group(int) all
void __exit:exit(int) all
int inotify_init1(int) all

View file

@ -194,7 +194,7 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(siglongjmp)
cmp x2, x12
bne __bionic_setjmp_checksum_mismatch
#if __has_feature(hwaddress_sanitizer)
// Update stack memory tags (MTE + hwasan).
stp x0, x30, [sp, #-16]!
.cfi_adjust_cfa_offset 16
.cfi_rel_offset x0, 0
@ -206,7 +206,7 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(siglongjmp)
bic x2, x2, #1
ldr x0, [x0, #(_JB_X30_SP * 8 + 8)]
eor x0, x0, x2
bl __hwasan_handle_longjmp
bl memtag_handle_longjmp
mov x1, x19 // Restore 'value'.
// Restore original x0 and lr.
@ -214,7 +214,6 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(siglongjmp)
.cfi_adjust_cfa_offset -16
.cfi_restore x0
.cfi_restore x30
#endif
// Do we need to restore the signal mask?
ldr x2, [x0, #(_JB_SIGFLAG * 8)]

View file

@ -45,6 +45,9 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(vfork)
ldr w10, [x9, #20]
str w0, [x9, #20]
// Clear vfork_child_stack_bottom_.
str xzr, [x9, #776]
mov x0, #(CLONE_VM | CLONE_VFORK | SIGCHLD)
mov x1, xzr
mov x2, xzr
@ -62,9 +65,6 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(vfork)
cneg x0, x0, hi
b.hi __set_errno_internal
#if __has_feature(hwaddress_sanitizer)
cbz x0, .L_exit
// Clean up stack shadow in the parent process.
// https://github.com/google/sanitizers/issues/925
paciasp
@ -75,7 +75,7 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(vfork)
.cfi_rel_offset x30, 8
add x0, sp, #16
bl __hwasan_handle_vfork
bl memtag_handle_vfork
ldp x0, x30, [sp], #16
.cfi_adjust_cfa_offset -16
@ -84,8 +84,6 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(vfork)
autiasp
.cfi_negate_ra_state
#endif
.L_exit:
ret
END(vfork)

View file

@ -39,10 +39,12 @@
#include <string.h>
#include <unistd.h>
#include "private/__bionic_get_shell_path.h"
#include "private/FdPath.h"
#include "private/__bionic_get_shell_path.h"
#include "pthread_internal.h"
extern "C" char** environ;
extern "C" int __execve(const char* pathname, char* const* argv, char* const* envp);
enum { ExecL, ExecLE, ExecLP };
@ -181,3 +183,9 @@ int fexecve(int fd, char* const* argv, char* const* envp) {
if (errno == ENOENT) errno = EBADF;
return -1;
}
__attribute__((no_sanitize("memtag"))) int execve(const char* pathname, char* const* argv,
char* const* envp) {
__get_thread()->vfork_child_stack_bottom = __builtin_frame_address(0);
return __execve(pathname, argv, envp);
}

View file

@ -30,9 +30,18 @@
#include <unistd.h>
#include "private/bionic_defs.h"
#include "pthread_internal.h"
extern "C" void __cxa_finalize(void* dso_handle);
extern "C" void __cxa_thread_finalize();
extern "C" __noreturn void __exit_group(int status);
__attribute__((no_sanitize("memtag"))) void _exit(int status) {
__get_thread()->vfork_child_stack_bottom = __builtin_frame_address(0);
__exit_group(status);
}
__strong_alias(_Exit, _exit);
__BIONIC_WEAK_FOR_NATIVE_BRIDGE
void exit(int status) {

View file

@ -32,6 +32,8 @@
#include <bionic/pthread_internal.h>
#include <platform/bionic/malloc.h>
#include <sanitizer/hwasan_interface.h>
#include <sys/auxv.h>
extern "C" void scudo_malloc_disable_memory_tagging();
extern "C" void scudo_malloc_set_track_allocation_stacks(int);
@ -170,3 +172,69 @@ bool SetHeapTaggingLevel(HeapTaggingLevel tag_level) {
return true;
}
#ifdef __aarch64__
static inline __attribute__((no_sanitize("memtag"))) void untag_memory(void* from, void* to) {
__asm__ __volatile__(
".arch_extension mte\n"
"1:\n"
"stg %[Ptr], [%[Ptr]], #16\n"
"cmp %[Ptr], %[End]\n"
"b.lt 1b\n"
: [Ptr] "+&r"(from)
: [End] "r"(to)
: "memory");
}
#endif
#ifdef __aarch64__
// 128Mb of stack should be enough for anybody.
static constexpr size_t kUntagLimit = 128 * 1024 * 1024;
#endif // __aarch64__
extern "C" __LIBC_HIDDEN__ __attribute__((no_sanitize("memtag"))) void memtag_handle_longjmp(
void* sp_dst __unused) {
#ifdef __aarch64__
if (__libc_globals->memtag_stack) {
void* sp = __builtin_frame_address(0);
size_t distance = reinterpret_cast<uintptr_t>(sp_dst) - reinterpret_cast<uintptr_t>(sp);
if (distance > kUntagLimit) {
async_safe_fatal(
"memtag_handle_longjmp: stack adjustment too large! %p -> %p, distance %zx > %zx\n", sp,
sp_dst, distance, kUntagLimit);
} else {
untag_memory(sp, sp_dst);
}
}
#endif // __aarch64__
#if __has_feature(hwaddress_sanitizer)
__hwasan_handle_longjmp(sp_dst);
#endif // __has_feature(hwaddress_sanitizer)
}
extern "C" __LIBC_HIDDEN__ __attribute__((no_sanitize("memtag"), no_sanitize("hwaddress"))) void
memtag_handle_vfork(void* sp __unused) {
#ifdef __aarch64__
if (__libc_globals->memtag_stack) {
void* child_sp = __get_thread()->vfork_child_stack_bottom;
__get_thread()->vfork_child_stack_bottom = nullptr;
if (child_sp) {
size_t distance = reinterpret_cast<uintptr_t>(sp) - reinterpret_cast<uintptr_t>(child_sp);
if (distance > kUntagLimit) {
async_safe_fatal(
"memtag_handle_vfork: stack adjustment too large! %p -> %p, distance %zx > %zx\n",
child_sp, sp, distance, kUntagLimit);
} else {
untag_memory(child_sp, sp);
}
} else {
async_safe_fatal("memtag_handle_vfork: child SP unknown\n");
}
}
#endif // __aarch64__
#if __has_feature(hwaddress_sanitizer)
__hwasan_handle_vfork(sp);
#endif // __has_feature(hwaddress_sanitizer)
}

View file

@ -160,6 +160,13 @@ class pthread_internal_t {
bionic_tls* bionic_tls;
int errno_value;
// The last observed value of SP in a vfork child process.
// The part of the stack between this address and the value of SP when the vfork parent process
// regains control may have stale MTE tags and needs cleanup. This field is only meaningful while
// the parent is waiting for the vfork child to return control by calling either exec*() or
// exit().
void* vfork_child_stack_bottom;
};
struct ThreadMapping {

View file

@ -30,7 +30,7 @@ void tests(CheckSize check_size, CheckOffset check_offset) {
#define CHECK_OFFSET(name, field, offset) \
check_offset(#name, #field, offsetof(name, field), offset);
#ifdef __LP64__
CHECK_SIZE(pthread_internal_t, 776);
CHECK_SIZE(pthread_internal_t, 784);
CHECK_OFFSET(pthread_internal_t, next, 0);
CHECK_OFFSET(pthread_internal_t, prev, 8);
CHECK_OFFSET(pthread_internal_t, tid, 16);
@ -55,6 +55,7 @@ void tests(CheckSize check_size, CheckOffset check_offset) {
CHECK_OFFSET(pthread_internal_t, dlerror_buffer, 248);
CHECK_OFFSET(pthread_internal_t, bionic_tls, 760);
CHECK_OFFSET(pthread_internal_t, errno_value, 768);
CHECK_OFFSET(pthread_internal_t, vfork_child_stack_bottom, 776);
CHECK_SIZE(bionic_tls, 12200);
CHECK_OFFSET(bionic_tls, key_data, 0);
CHECK_OFFSET(bionic_tls, locale, 2080);
@ -72,7 +73,7 @@ void tests(CheckSize check_size, CheckOffset check_offset) {
CHECK_OFFSET(bionic_tls, bionic_systrace_disabled, 12193);
CHECK_OFFSET(bionic_tls, padding, 12194);
#else
CHECK_SIZE(pthread_internal_t, 668);
CHECK_SIZE(pthread_internal_t, 672);
CHECK_OFFSET(pthread_internal_t, next, 0);
CHECK_OFFSET(pthread_internal_t, prev, 4);
CHECK_OFFSET(pthread_internal_t, tid, 8);
@ -97,6 +98,7 @@ void tests(CheckSize check_size, CheckOffset check_offset) {
CHECK_OFFSET(pthread_internal_t, dlerror_buffer, 148);
CHECK_OFFSET(pthread_internal_t, bionic_tls, 660);
CHECK_OFFSET(pthread_internal_t, errno_value, 664);
CHECK_OFFSET(pthread_internal_t, vfork_child_stack_bottom, 668);
CHECK_SIZE(bionic_tls, 11080);
CHECK_OFFSET(bionic_tls, key_data, 0);
CHECK_OFFSET(bionic_tls, locale, 1040);