From 4d4377881d462458de5d91840d11c10206e2e34d Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Tue, 30 Jan 2024 23:03:14 +0000 Subject: [PATCH] [riscv][bionic] Prototype TLS Descriptor support Add basic assembly stubs for TLS Descriptor support in the dynamic linker, and enable several code paths related to TLSDESC for RISC-V. Note: This patch requires an updated toolchain that supports TLSDESC for RISC-V, and the `-mtls-dialect=` compiler option specifically. Test: adb shell /data/nativetest64/bionic-unit-tests/bionic-unit-tests --gtest_filter=*tls* Bug: 322984914 Change-Id: I74bd0fa216b44b4ca2c5a5a6aec37b3fc47b00d9 --- libc/private/bionic_elf_dtv_offset.h | 44 ++++++ libc/private/bionic_elf_tls.h | 16 +- linker/Android.bp | 1 + linker/arch/riscv64/tlsdesc_resolver.S | 201 ++++++++++++++++++++++++ linker/linker_relocate.cpp | 14 +- tests/elftls_dl_test.cpp | 18 ++- tests/libs/elftls_dtv_resize_helper.cpp | 4 +- tests/libs/elftls_dynamic.cpp | 12 ++ 8 files changed, 286 insertions(+), 24 deletions(-) create mode 100644 libc/private/bionic_elf_dtv_offset.h create mode 100644 linker/arch/riscv64/tlsdesc_resolver.S diff --git a/libc/private/bionic_elf_dtv_offset.h b/libc/private/bionic_elf_dtv_offset.h new file mode 100644 index 000000000..8d9f3b9b4 --- /dev/null +++ b/libc/private/bionic_elf_dtv_offset.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#pragma once + +#if defined(__riscv) +// TLS_DTV_OFFSET is a constant used in relocation fields, defined in RISC-V ELF Specification[1] +// The front of the TCB contains a pointer to the DTV, and each pointer in DTV +// points to 0x800 past the start of a TLS block to make full use of the range +// of load/store instructions, refer to [2]. +// +// [1]: RISC-V ELF Specification. +// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#constants +// [2]: Documentation of TLS data structures +// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/issues/53 +#define TLS_DTV_OFFSET 0x800 +#else +#define TLS_DTV_OFFSET 0 +#endif diff --git a/libc/private/bionic_elf_tls.h b/libc/private/bionic_elf_tls.h index 8bd5bc524..04297ad73 100644 --- a/libc/private/bionic_elf_tls.h +++ b/libc/private/bionic_elf_tls.h @@ -34,6 +34,8 @@ #include #include +#include "bionic_elf_dtv_offset.h" + __LIBC_HIDDEN__ extern _Atomic(size_t) __libc_tls_generation_copy; struct TlsAlign { @@ -227,17 +229,3 @@ struct bionic_tcb; void __free_dynamic_tls(bionic_tcb* tcb); void __notify_thread_exit_callbacks(); -#if defined(__riscv) -// TLS_DTV_OFFSET is a constant used in relocation fields, defined in RISC-V ELF Specification[1] -// The front of the TCB contains a pointer to the DTV, and each pointer in DTV -// points to 0x800 past the start of a TLS block to make full use of the range -// of load/store instructions, refer to [2]. -// -// [1]: RISC-V ELF Specification. -// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#constants -// [2]: Documentation of TLS data structures -// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/issues/53 -#define TLS_DTV_OFFSET 0x800 -#else -#define TLS_DTV_OFFSET 0 -#endif diff --git a/linker/Android.bp b/linker/Android.bp index 1ede380de..78109e8f6 100644 --- a/linker/Android.bp +++ b/linker/Android.bp @@ -231,6 +231,7 @@ filegroup { name: "linker_sources_riscv64", srcs: [ "arch/riscv64/begin.S", + "arch/riscv64/tlsdesc_resolver.S", ], } diff --git a/linker/arch/riscv64/tlsdesc_resolver.S b/linker/arch/riscv64/tlsdesc_resolver.S new file mode 100644 index 000000000..fedc92634 --- /dev/null +++ b/linker/arch/riscv64/tlsdesc_resolver.S @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#ifndef TLS_DTV_OFFSET + #error "TLS_DTV_OFFSET not defined" +#endif + +.globl __tls_get_addr + +// spill a register onto the stack +.macro spill reg, idx, f= + \f\()sd \reg, \idx*8(sp) + .cfi_rel_offset \reg, (\idx)*8 +.endm + +// reload a value from the stack +.macro reload reg, idx, f= + \f\()ld \reg, \idx*8(sp) + .cfi_same_value \reg +.endm + +.macro spill_vector_regs + csrr a3, vlenb + slli a3, a3, 3 + sub sp, sp, a3 + vs8r.v v0, (sp) + sub sp, sp, a3 + vs8r.v v8, (sp) + sub sp, sp, a3 + vs8r.v v16, (sp) + sub sp, sp, a3 + vs8r.v v24, (sp) +.endm + +.macro reload_vector_regs + csrr a3, vlenb + slli a3, a3, 3 + vl8r.v v24, (sp) + add sp, sp, a3 + vl8r.v v16, (sp) + add sp, sp, a3 + vl8r.v v8, (sp) + add sp, sp, a3 + vl8r.v v0, (sp) + add sp, sp, a3 +.endm + +// We save a total of 35 registers +.macro for_each_saved_reg op max + \op ra, 1 + \op a1, 2 + \op a2, 3 + \op a3, 4 + \op a4, 5 + \op a5, 6 + \op a6, 7 + \op a7, 8 + \op t0, 9 + \op t1, 10 + \op t2, 11 + \op t3, 12 + \op t4, 13 + \op t5, 14 + \op t6, 15 + // save floating point regs + \op ft0, 16, f + \op ft1, 17, f + \op ft2, 18, f + \op ft3, 19, f + \op ft4, 20, f + \op ft5, 21, f + \op ft6, 22, f + \op ft7, 23, f + \op ft8, 24, f + \op ft9, 25, f + \op ft10, 26, f + \op ft11, 27, f + \op fa0, 28, f + \op fa1, 29, f + \op fa2, 30, f + \op fa3, 31, f + \op fa4, 32, f + \op fa5, 33, f + \op fa6, 34, f + \op fa7, 35, f +.endm + +// These resolver functions must preserve every register except a0. They set a0 +// to the offset of the TLS symbol relative to the thread pointer. + +ENTRY_PRIVATE(tlsdesc_resolver_static) + ld a0, 8(a0) + jr t0 +END(tlsdesc_resolver_static) + +ENTRY_PRIVATE(tlsdesc_resolver_dynamic) + // We only need 3 stack slots, but still require a 4th slot for alignment + addi sp, sp, -4*8 + .cfi_def_cfa_offset 4*8 + spill a1, 1 + spill a2, 2 + spill a3, 3 + + ld a2, (TLS_SLOT_DTV * 8)(tp) // a2 = &DTV + ld a1, (a2) // a1 = TlsDtv::generation (DTV[0]) + + ld a0, 8(a0) // a0 = TlsDynamicResolverArg* + ld a3, (a0) // a3 = TlsDynamicResolverArg::generation + + // Fallback if TlsDtv::generation < TlsDynamicResolverArg::generation + // since we need to call __tls_get_addr + blt a1, a3, L(fallback) + + // We can't modify a0 yet, since tlsdesc_resolver_dynamic_slow_path requires + // a pointer to the TlsIndex, which is the second field of the + // TlsDynamicResolverArg. As a result, we can't modify a0 until we will no + // longer fallback. + ld a1, 8(a0) // a1 = TlsIndex::module_id + slli a1, a1, 3 // a1 = module_id*8 -- scale the idx + add a1, a2, a1 // a1 = &TlsDtv::modules[module_id] + ld a1, (a1) // a1 = TlsDtv::modules[module_id] + beqz a1, L(fallback) + ld a3, 16(a0) // a3 = TlsIndex::offset + add a0, a1, a3 // a0 = TlsDtv::modules[module_id] + offset + sub a0, a0, tp // a0 = TlsDtv::modules[module_id] + offset - tp + + .cfi_remember_state + reload a3, 3 + reload a2, 2 + reload a1, 1 + addi sp, sp, 4*8 + .cfi_adjust_cfa_offset -4*8 + jr t0 + +L(fallback): + reload a3, 3 + reload a2, 2 + reload a1, 1 + addi sp, sp, 4*8 + .cfi_adjust_cfa_offset -4*8 + j tlsdesc_resolver_dynamic_slow_path +END(tlsdesc_resolver_dynamic) + +// On entry, a0 is the address of a TlsDynamicResolverArg object rather than +// the TlsDescriptor address passed to the original resolver function. +ENTRY_PRIVATE(tlsdesc_resolver_dynamic_slow_path) + // We save a total of 35 registers, but vector spills require an alignment + // of 16, so use an extra slot to align it correctly. + addi sp, sp, (-8*36) + .cfi_def_cfa_offset (8 * 36) + for_each_saved_reg spill, 36 + spill_vector_regs + + add a0, a0, 8 + call __tls_get_addr + addi a0, a0, (-1 * TLS_DTV_OFFSET) // Correct the address by TLS_DTV_OFFSET + sub a0, a0, tp + + reload_vector_regs + for_each_saved_reg reload, 36 + addi sp, sp, 8*36 + .cfi_def_cfa_offset 0 + jr t0 +END(tlsdesc_resolver_dynamic_slow_path) + +// The address of an unresolved weak TLS symbol evaluates to NULL with TLSDESC. +// The value returned by this function is added to the thread pointer, so return +// a negated thread pointer to cancel it out. +ENTRY_PRIVATE(tlsdesc_resolver_unresolved_weak) + sub a0, zero, tp + jr t0 +END(tlsdesc_resolver_unresolved_weak) diff --git a/linker/linker_relocate.cpp b/linker/linker_relocate.cpp index 85f7b3aab..3e3611405 100644 --- a/linker/linker_relocate.cpp +++ b/linker/linker_relocate.cpp @@ -438,9 +438,9 @@ static bool process_relocation_impl(Relocator& relocator, const rel_t& reloc) { } break; -#if defined(__aarch64__) - // Bionic currently only implements TLSDESC for arm64. This implementation should work with - // other architectures, as long as the resolver functions are implemented. +#if defined(__aarch64__) || defined(__riscv) + // Bionic currently implements TLSDESC for arm64 and riscv64. This implementation should work + // with other architectures, as long as the resolver functions are implemented. case R_GENERIC_TLSDESC: count_relocation_if(kRelocRelative); { @@ -482,7 +482,7 @@ static bool process_relocation_impl(Relocator& relocator, const rel_t& reloc) { } } break; -#endif // defined(__aarch64__) +#endif // defined(__aarch64__) || defined(__riscv) #if defined(__x86_64__) case R_X86_64_32: @@ -672,14 +672,14 @@ bool soinfo::relocate(const SymbolLookupList& lookup_list) { // Once the tlsdesc_args_ vector's size is finalized, we can write the addresses of its elements // into the TLSDESC relocations. -#if defined(__aarch64__) - // Bionic currently only implements TLSDESC for arm64. +#if defined(__aarch64__) || defined(__riscv) + // Bionic currently only implements TLSDESC for arm64 and riscv64. for (const std::pair& pair : relocator.deferred_tlsdesc_relocs) { TlsDescriptor* desc = pair.first; desc->func = tlsdesc_resolver_dynamic; desc->arg = reinterpret_cast(&tlsdesc_args_[pair.second]); } -#endif +#endif // defined(__aarch64__) || defined(__riscv) return true; } diff --git a/tests/elftls_dl_test.cpp b/tests/elftls_dl_test.cpp index e759e1565..e409b728a 100644 --- a/tests/elftls_dl_test.cpp +++ b/tests/elftls_dl_test.cpp @@ -107,6 +107,22 @@ TEST(elftls_dl, bump_local_vars) { void* lib = dlopen("libtest_elftls_dynamic.so", RTLD_LOCAL | RTLD_NOW); ASSERT_NE(nullptr, lib); + auto get_local_var2 = reinterpret_cast(dlsym(lib, "get_local_var2")); + ASSERT_NE(nullptr, get_local_var2); + + auto get_local_var1 = reinterpret_cast(dlsym(lib, "get_local_var1")); + ASSERT_NE(nullptr, get_local_var1); + + auto get_local_var1_addr = reinterpret_cast(dlsym(lib, "get_local_var1_addr")); + ASSERT_NE(nullptr, get_local_var1_addr); + + // Make sure subsequent accesses return the same pointer. + ASSERT_EQ(get_local_var1_addr(), get_local_var1_addr()); + + // Check the initial values are correct. + ASSERT_EQ(25, get_local_var2()); + ASSERT_EQ(15, get_local_var1()); + auto bump_local_vars = reinterpret_cast(dlsym(lib, "bump_local_vars")); ASSERT_NE(nullptr, bump_local_vars); @@ -135,7 +151,7 @@ TEST(elftls_dl, tprel_missing_weak) { // TLSDESC, the result is NULL. With __tls_get_addr, the result is the // generation count (or maybe undefined behavior)? This test only tests TLSDESC. TEST(elftls_dl, tlsdesc_missing_weak) { -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(__riscv) void* lib = dlopen("libtest_elftls_dynamic.so", RTLD_LOCAL | RTLD_NOW); ASSERT_NE(nullptr, lib); diff --git a/tests/libs/elftls_dtv_resize_helper.cpp b/tests/libs/elftls_dtv_resize_helper.cpp index 340d5df72..7fb6fb5e8 100644 --- a/tests/libs/elftls_dtv_resize_helper.cpp +++ b/tests/libs/elftls_dtv_resize_helper.cpp @@ -180,8 +180,8 @@ int main() { // Access a TLS variable from the first filler module. ASSERT_EQ(102, func1()); ASSERT_EQ(5u, highest_modid_in_dtv()); -#if defined(__aarch64__) - // The arm64 TLSDESC resolver doesn't update the DTV if it is new enough for +#if defined(__aarch64__) || defined(__riscv) + // The arm64 and riscv64 TLSDESC resolver doesn't update the DTV if it is new enough for // the given access. ASSERT_EQ(initial_dtv, dtv()); ASSERT_EQ(5u, dtv()->count); diff --git a/tests/libs/elftls_dynamic.cpp b/tests/libs/elftls_dynamic.cpp index 25004848c..df3ad75a6 100644 --- a/tests/libs/elftls_dynamic.cpp +++ b/tests/libs/elftls_dynamic.cpp @@ -66,6 +66,18 @@ extern "C" int bump_local_vars() { return ++local_var_1 + ++local_var_2; } +extern "C" int get_local_var1() { + return local_var_1; +} + +extern "C" int* get_local_var1_addr() { + return &local_var_1; +} + +extern "C" int get_local_var2() { + return local_var_2; +} + __attribute__((weak)) extern "C" __thread int missing_weak_dyn_tls; extern "C" int* missing_weak_dyn_tls_addr() {