Merge "debuggerd_fallback: fix race."
This commit is contained in:
commit
cd2fa1f432
1 changed files with 89 additions and 16 deletions
|
@ -55,7 +55,7 @@
|
|||
using android::base::unique_fd;
|
||||
using unwindstack::Regs;
|
||||
|
||||
extern "C" void __linker_enable_fallback_allocator();
|
||||
extern "C" bool __linker_enable_fallback_allocator();
|
||||
extern "C" void __linker_disable_fallback_allocator();
|
||||
|
||||
// This is incredibly sketchy to do inside of a signal handler, especially when libbacktrace
|
||||
|
@ -65,7 +65,11 @@ extern "C" void __linker_disable_fallback_allocator();
|
|||
// This isn't the default method of dumping because it can fail in cases such as address space
|
||||
// exhaustion.
|
||||
static void debuggerd_fallback_trace(int output_fd, ucontext_t* ucontext) {
|
||||
__linker_enable_fallback_allocator();
|
||||
if (!__linker_enable_fallback_allocator()) {
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "fallback allocator already in use");
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_ptr<Regs> regs;
|
||||
|
||||
|
@ -84,7 +88,11 @@ static void debuggerd_fallback_trace(int output_fd, ucontext_t* ucontext) {
|
|||
|
||||
static void debuggerd_fallback_tombstone(int output_fd, ucontext_t* ucontext, siginfo_t* siginfo,
|
||||
void* abort_message) {
|
||||
__linker_enable_fallback_allocator();
|
||||
if (!__linker_enable_fallback_allocator()) {
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "fallback allocator already in use");
|
||||
return;
|
||||
}
|
||||
|
||||
engrave_tombstone_ucontext(output_fd, reinterpret_cast<uintptr_t>(abort_message), siginfo,
|
||||
ucontext);
|
||||
__linker_disable_fallback_allocator();
|
||||
|
@ -116,7 +124,7 @@ static void iterate_siblings(bool (*callback)(pid_t, int), int output_fd) {
|
|||
closedir(dir);
|
||||
}
|
||||
|
||||
static bool forward_output(int src_fd, int dst_fd) {
|
||||
static bool forward_output(int src_fd, int dst_fd, pid_t expected_tid) {
|
||||
// Make sure the thread actually got the signal.
|
||||
struct pollfd pfd = {
|
||||
.fd = src_fd, .events = POLLIN,
|
||||
|
@ -127,6 +135,18 @@ static bool forward_output(int src_fd, int dst_fd) {
|
|||
return false;
|
||||
}
|
||||
|
||||
pid_t tid;
|
||||
if (TEMP_FAILURE_RETRY(read(src_fd, &tid, sizeof(tid))) != sizeof(tid)) {
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to read tid");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tid != expected_tid) {
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "received tid %d, expected %d", tid,
|
||||
expected_tid);
|
||||
return false;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
char buf[512];
|
||||
ssize_t rc = TEMP_FAILURE_RETRY(read(src_fd, buf, sizeof(buf)));
|
||||
|
@ -144,16 +164,54 @@ static bool forward_output(int src_fd, int dst_fd) {
|
|||
}
|
||||
}
|
||||
|
||||
struct __attribute__((__packed__)) packed_thread_output {
|
||||
int32_t tid;
|
||||
int32_t fd;
|
||||
};
|
||||
|
||||
static uint64_t pack_thread_fd(pid_t tid, int fd) {
|
||||
packed_thread_output packed = {.tid = tid, .fd = fd};
|
||||
uint64_t result;
|
||||
static_assert(sizeof(packed) == sizeof(result));
|
||||
memcpy(&result, &packed, sizeof(packed));
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::pair<pid_t, int> unpack_thread_fd(uint64_t value) {
|
||||
packed_thread_output result;
|
||||
memcpy(&result, &value, sizeof(value));
|
||||
return std::make_pair(result.tid, result.fd);
|
||||
}
|
||||
|
||||
static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
|
||||
static std::atomic<int> trace_output_fd(-1);
|
||||
static std::atomic<uint64_t> trace_output(pack_thread_fd(-1, -1));
|
||||
|
||||
if (info->si_value.sival_int == ~0) {
|
||||
// Asked to dump by the original signal recipient.
|
||||
debuggerd_fallback_trace(trace_output_fd, ucontext);
|
||||
uint64_t val = trace_output.load();
|
||||
auto [tid, fd] = unpack_thread_fd(val);
|
||||
if (tid != gettid()) {
|
||||
// We received some other thread's info request?
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc",
|
||||
"thread %d received output fd for thread %d?", gettid(), tid);
|
||||
return;
|
||||
}
|
||||
|
||||
int tmp = trace_output_fd.load();
|
||||
trace_output_fd.store(-1);
|
||||
close(tmp);
|
||||
if (!trace_output.compare_exchange_strong(val, pack_thread_fd(-1, -1))) {
|
||||
// Presumably, the timeout in forward_output expired, and the main thread moved on.
|
||||
// If this happened, the main thread closed our fd for us, so just return.
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "cmpxchg for thread %d failed", gettid());
|
||||
return;
|
||||
}
|
||||
|
||||
// Write our tid to the output fd to let the main thread know that we're working.
|
||||
if (TEMP_FAILURE_RETRY(write(fd, &tid, sizeof(tid))) == sizeof(tid)) {
|
||||
debuggerd_fallback_trace(fd, ucontext);
|
||||
} else {
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to write to output fd");
|
||||
}
|
||||
|
||||
close(fd);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -189,7 +247,14 @@ static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
|
|||
return false;
|
||||
}
|
||||
|
||||
trace_output_fd.store(pipe_write.get());
|
||||
uint64_t expected = pack_thread_fd(-1, -1);
|
||||
if (!trace_output.compare_exchange_strong(expected,
|
||||
pack_thread_fd(tid, pipe_write.release()))) {
|
||||
auto [tid, fd] = unpack_thread_fd(expected);
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc",
|
||||
"thread %d is already outputting to fd %d?", tid, fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
siginfo_t siginfo = {};
|
||||
siginfo.si_code = SI_QUEUE;
|
||||
|
@ -203,12 +268,20 @@ static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool success = forward_output(pipe_read.get(), output_fd);
|
||||
if (success) {
|
||||
// The signaled thread has closed trace_output_fd already.
|
||||
(void)pipe_write.release();
|
||||
} else {
|
||||
trace_output_fd.store(-1);
|
||||
bool success = forward_output(pipe_read.get(), output_fd, tid);
|
||||
if (!success) {
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc",
|
||||
"timeout expired while waiting for thread %d to dump", tid);
|
||||
}
|
||||
|
||||
// Regardless of whether the poll succeeds, check to see if the thread took fd ownership.
|
||||
uint64_t post_wait = trace_output.exchange(pack_thread_fd(-1, -1));
|
||||
if (post_wait != pack_thread_fd(-1, -1)) {
|
||||
auto [tid, fd] = unpack_thread_fd(post_wait);
|
||||
if (fd != -1) {
|
||||
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "closing fd %d for thread %d", fd, tid);
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
Loading…
Reference in a new issue