riscv64: switch from x18 to gp for shadow call stack.
We want to give back a useful callee-saved general purpose
register (x18) that was only "chosen" because it was what llvm
allowed for historical reasons. gp is a better choice because it's
effectively unused otherwise anyway.
Unfortunately, that means we need extra space in jmp_buf (which I've
reserved in an earlier change, e7b3b8b467
),
so let's rearrange the entries in jmp_buf to match their order in the
register file.
Bug: https://github.com/google/android-riscv64/issues/72
Bug: http://b/277909695
Test: treehugger
Change-Id: Ia629409a894c1a83d2052885702bbdd895c758e1
This commit is contained in:
parent
97950cb186
commit
7dd3896fe1
3 changed files with 64 additions and 56 deletions
|
@ -36,50 +36,52 @@
|
|||
// 0 sigflag/cookie setjmp cookie in top 31 bits, signal mask flag in low bit
|
||||
// 1 sigmask 64-bit signal mask
|
||||
// 2 ra
|
||||
// 3 s0
|
||||
// 3 sp
|
||||
// 4 gp
|
||||
// 5 s0
|
||||
// ......
|
||||
// 14 s11
|
||||
// 15 sp
|
||||
// 16 fs0
|
||||
// 16 s11
|
||||
// 17 fs0
|
||||
// ......
|
||||
// 27 fs11
|
||||
// 28 checksum
|
||||
// 28 fs11
|
||||
// 29 checksum
|
||||
// _JBLEN: defined in bionic/libc/include/setjmp.h
|
||||
|
||||
#define _JB_SIGFLAG 0
|
||||
#define _JB_SIGMASK 1 * 8
|
||||
#define _JB_RA 2 * 8
|
||||
#define _JB_S0 3 * 8
|
||||
#define _JB_S1 4 * 8
|
||||
#define _JB_S2 5 * 8
|
||||
#define _JB_S3 6 * 8
|
||||
#define _JB_S4 7 * 8
|
||||
#define _JB_S5 8 * 8
|
||||
#define _JB_S6 9 * 8
|
||||
#define _JB_S7 10 * 8
|
||||
#define _JB_S8 11 * 8
|
||||
#define _JB_S9 12 * 8
|
||||
#define _JB_S10 13 * 8
|
||||
#define _JB_S11 14 * 8
|
||||
#define _JB_SP 15 * 8
|
||||
#define _JB_FS0 16 * 8
|
||||
#define _JB_FS1 17 * 8
|
||||
#define _JB_FS2 18 * 8
|
||||
#define _JB_FS3 19 * 8
|
||||
#define _JB_FS4 20 * 8
|
||||
#define _JB_FS5 21 * 8
|
||||
#define _JB_FS6 22 * 8
|
||||
#define _JB_FS7 23 * 8
|
||||
#define _JB_FS8 24 * 8
|
||||
#define _JB_FS9 25 * 8
|
||||
#define _JB_FS10 26 * 8
|
||||
#define _JB_FS11 27 * 8
|
||||
#define _JB_CHECKSUM 28 * 8
|
||||
#define _JB_SP 3 * 8
|
||||
#define _JB_GP 4 * 8
|
||||
#define _JB_S0 5 * 8
|
||||
#define _JB_S1 6 * 8
|
||||
#define _JB_S2 7 * 8
|
||||
#define _JB_S3 8 * 8
|
||||
#define _JB_S4 9 * 8
|
||||
#define _JB_S5 10 * 8
|
||||
#define _JB_S6 11 * 8
|
||||
#define _JB_S7 12 * 8
|
||||
#define _JB_S8 13 * 8
|
||||
#define _JB_S9 14 * 8
|
||||
#define _JB_S10 15 * 8
|
||||
#define _JB_S11 16 * 8
|
||||
#define _JB_FS0 17 * 8
|
||||
#define _JB_FS1 18 * 8
|
||||
#define _JB_FS2 19 * 8
|
||||
#define _JB_FS3 20 * 8
|
||||
#define _JB_FS4 21 * 8
|
||||
#define _JB_FS5 22 * 8
|
||||
#define _JB_FS6 23 * 8
|
||||
#define _JB_FS7 24 * 8
|
||||
#define _JB_FS8 25 * 8
|
||||
#define _JB_FS9 26 * 8
|
||||
#define _JB_FS10 27 * 8
|
||||
#define _JB_FS11 28 * 8
|
||||
#define _JB_CHECKSUM 29 * 8
|
||||
|
||||
.macro m_mangle_registers reg, sp_reg
|
||||
xor s0, s0, \reg
|
||||
xor s1, s1, \reg
|
||||
xor a4, a4, \reg // a4 is the masked s2 (x18) for SCS.
|
||||
xor s2, s2, \reg
|
||||
xor s3, s3, \reg
|
||||
xor s4, s4, \reg
|
||||
xor s5, s5, \reg
|
||||
|
@ -89,12 +91,13 @@
|
|||
xor s9, s9, \reg
|
||||
xor s10, s10, \reg
|
||||
xor s11, s11, \reg
|
||||
xor a4, a4, \reg // a4 is the masked gp (x3) for SCS.
|
||||
xor \sp_reg, \sp_reg, \reg
|
||||
.endm
|
||||
|
||||
.macro m_calculate_checksum dst, src, scratch
|
||||
li \dst, 0
|
||||
.irp i,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27
|
||||
.irp i,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28
|
||||
ld \scratch, (\i * 8)(\src)
|
||||
xor \dst, \dst, \scratch
|
||||
.endr
|
||||
|
@ -152,19 +155,21 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(sigsetjmp)
|
|||
andi a1, a1, -2
|
||||
|
||||
// Mask off the high bits of the shadow call stack pointer.
|
||||
// We only store the low bits of x18 to avoid leaking the
|
||||
// We only store the low bits of gp to avoid leaking the
|
||||
// shadow call stack address into memory.
|
||||
// See the SCS commentary in pthread_internal.h for more detail.
|
||||
li a4, SCS_MASK
|
||||
and a4, a4, x18
|
||||
and a4, a4, gp
|
||||
|
||||
// Save core registers.
|
||||
mv a2, sp
|
||||
m_mangle_registers a1, sp_reg=a2
|
||||
sd ra, _JB_RA(a0)
|
||||
sd a4, _JB_GP(a0) // a4 is the masked gp (x3) for SCS.
|
||||
sd a2, _JB_SP(a0)
|
||||
sd s0, _JB_S0(a0)
|
||||
sd s1, _JB_S1(a0)
|
||||
sd a4, _JB_S2(a0) // a4 is the masked s2 (x18) for SCS.
|
||||
sd s2, _JB_S2(a0)
|
||||
sd s3, _JB_S3(a0)
|
||||
sd s4, _JB_S4(a0)
|
||||
sd s5, _JB_S5(a0)
|
||||
|
@ -174,7 +179,6 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(sigsetjmp)
|
|||
sd s9, _JB_S9(a0)
|
||||
sd s10, _JB_S10(a0)
|
||||
sd s11, _JB_S11(a0)
|
||||
sd a2, _JB_SP(a0)
|
||||
m_unmangle_registers a1, sp_reg=a2
|
||||
|
||||
// Save floating point registers.
|
||||
|
@ -236,9 +240,10 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(siglongjmp)
|
|||
// Restore core registers.
|
||||
andi a2, a2, -2
|
||||
ld ra, _JB_RA(a0)
|
||||
ld a4, _JB_GP(a0) // Don't clobber the upper bits of gp (x3) used for SCS yet.
|
||||
ld s0, _JB_S0(a0)
|
||||
ld s1, _JB_S1(a0)
|
||||
ld a4, _JB_S2(a0) // Don't clobber s2 (x18) used for SCS yet.
|
||||
ld s2, _JB_S2(a0)
|
||||
ld s3, _JB_S3(a0)
|
||||
ld s4, _JB_S4(a0)
|
||||
ld s5, _JB_S5(a0)
|
||||
|
@ -254,8 +259,8 @@ __BIONIC_WEAK_ASM_FOR_NATIVE_BRIDGE(siglongjmp)
|
|||
|
||||
// Restore the low bits of the shadow call stack pointer.
|
||||
li a5, ~SCS_MASK
|
||||
and x18, x18, a5
|
||||
or x18, a4, x18
|
||||
and gp, gp, a5
|
||||
or gp, gp, a4
|
||||
|
||||
addi sp, sp, -24
|
||||
sd ra, 0(sp)
|
||||
|
|
|
@ -133,14 +133,14 @@ static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
|
|||
size_t scs_offset =
|
||||
(getpid() == 1) ? 0 : (arc4random_uniform(SCS_GUARD_REGION_SIZE / SCS_SIZE - 1) * SCS_SIZE);
|
||||
|
||||
// Make the stack readable and writable and store its address in x18.
|
||||
// This is deliberately the only place where the address is stored.
|
||||
// Make the stack read-write, and store its address in the register we're using as the shadow
|
||||
// stack pointer. This is deliberately the only place where the address is stored.
|
||||
char* scs = scs_aligned_guard_region + scs_offset;
|
||||
mprotect(scs, SCS_SIZE, PROT_READ | PROT_WRITE);
|
||||
#if defined(__aarch64__)
|
||||
__asm__ __volatile__("mov x18, %0" ::"r"(scs));
|
||||
#elif defined(__riscv)
|
||||
__asm__ __volatile__("mv x18, %0" ::"r"(scs));
|
||||
__asm__ __volatile__("mv gp, %0" ::"r"(scs));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -110,7 +110,8 @@ class pthread_internal_t {
|
|||
// are actually used.
|
||||
//
|
||||
// This address is only used to deallocate the shadow call stack on thread
|
||||
// exit; the address of the stack itself is stored only in the x18 register.
|
||||
// exit; the address of the stack itself is stored only in the register used
|
||||
// as the shadow stack pointer (x18 on arm64, gp on riscv64).
|
||||
//
|
||||
// Because the protection offered by SCS relies on the secrecy of the stack
|
||||
// address, storing the address here weakens the protection, but only
|
||||
|
@ -119,22 +120,24 @@ class pthread_internal_t {
|
|||
// to other allocations), but not the stack itself, which is <0.1% of the size
|
||||
// of the guard region.
|
||||
//
|
||||
// longjmp()/setjmp() don't store all the bits of x18, only the bottom bits
|
||||
// covered by SCS_MASK. Since longjmp()/setjmp() between different threads is
|
||||
// undefined behavior (and unsupported on Android), we can retrieve the high
|
||||
// bits of x18 from the current value in x18 --- all the jmp_buf needs to store
|
||||
// is where exactly the shadow stack pointer is in the thread's shadow stack:
|
||||
// the bottom bits of x18.
|
||||
// longjmp()/setjmp() don't store all the bits of the shadow stack pointer,
|
||||
// only the bottom bits covered by SCS_MASK. Since longjmp()/setjmp() between
|
||||
// different threads is undefined behavior (and unsupported on Android), we
|
||||
// can retrieve the high bits of the shadow stack pointer from the current
|
||||
// value in the register --- all the jmp_buf needs to store is where exactly
|
||||
// the shadow stack pointer is *within* the thread's shadow stack: the bottom
|
||||
// bits of the register.
|
||||
//
|
||||
// There are at least two other options for discovering the start address of
|
||||
// the guard region on thread exit, but they are not as simple as storing in
|
||||
// TLS.
|
||||
//
|
||||
// 1) Derive it from the value of the x18 register. This is only possible in
|
||||
// processes that do not contain legacy code that might clobber x18,
|
||||
// therefore each process must declare early during process startup whether
|
||||
// it might load legacy code.
|
||||
// TODO: riscv64 has no legacy code, so we can actually go this route there!
|
||||
// 1) Derive it from the current value of the shadow stack pointer. This is
|
||||
// only possible in processes that do not contain legacy code that might
|
||||
// clobber x18 on arm64, therefore each process must declare early during
|
||||
// process startup whether it might load legacy code.
|
||||
// TODO: riscv64 has no legacy code, so we can actually go this route
|
||||
// there, but hopefully we'll actually get the Zsslpcfi extension instead.
|
||||
// 2) Mark the guard region as such using prctl(PR_SET_VMA_ANON_NAME) and
|
||||
// discover its address by reading /proc/self/maps. One issue with this is
|
||||
// that reading /proc/self/maps can race with allocations, so we may need
|
||||
|
|
Loading…
Reference in a new issue