Merge "debuggerd_fallback: fix race."

This commit is contained in:
Treehugger Robot 2018-02-10 01:44:07 +00:00 committed by Gerrit Code Review
commit cd2fa1f432

View file

@ -55,7 +55,7 @@
using android::base::unique_fd;
using unwindstack::Regs;
extern "C" void __linker_enable_fallback_allocator();
extern "C" bool __linker_enable_fallback_allocator();
extern "C" void __linker_disable_fallback_allocator();
// This is incredibly sketchy to do inside of a signal handler, especially when libbacktrace
@ -65,7 +65,11 @@ extern "C" void __linker_disable_fallback_allocator();
// This isn't the default method of dumping because it can fail in cases such as address space
// exhaustion.
static void debuggerd_fallback_trace(int output_fd, ucontext_t* ucontext) {
__linker_enable_fallback_allocator();
if (!__linker_enable_fallback_allocator()) {
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "fallback allocator already in use");
return;
}
{
std::unique_ptr<Regs> regs;
@ -84,7 +88,11 @@ static void debuggerd_fallback_trace(int output_fd, ucontext_t* ucontext) {
static void debuggerd_fallback_tombstone(int output_fd, ucontext_t* ucontext, siginfo_t* siginfo,
void* abort_message) {
__linker_enable_fallback_allocator();
if (!__linker_enable_fallback_allocator()) {
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "fallback allocator already in use");
return;
}
engrave_tombstone_ucontext(output_fd, reinterpret_cast<uintptr_t>(abort_message), siginfo,
ucontext);
__linker_disable_fallback_allocator();
@ -116,7 +124,7 @@ static void iterate_siblings(bool (*callback)(pid_t, int), int output_fd) {
closedir(dir);
}
static bool forward_output(int src_fd, int dst_fd) {
static bool forward_output(int src_fd, int dst_fd, pid_t expected_tid) {
// Make sure the thread actually got the signal.
struct pollfd pfd = {
.fd = src_fd, .events = POLLIN,
@ -127,6 +135,18 @@ static bool forward_output(int src_fd, int dst_fd) {
return false;
}
pid_t tid;
if (TEMP_FAILURE_RETRY(read(src_fd, &tid, sizeof(tid))) != sizeof(tid)) {
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to read tid");
return false;
}
if (tid != expected_tid) {
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "received tid %d, expected %d", tid,
expected_tid);
return false;
}
while (true) {
char buf[512];
ssize_t rc = TEMP_FAILURE_RETRY(read(src_fd, buf, sizeof(buf)));
@ -144,16 +164,54 @@ static bool forward_output(int src_fd, int dst_fd) {
}
}
struct __attribute__((__packed__)) packed_thread_output {
int32_t tid;
int32_t fd;
};
static uint64_t pack_thread_fd(pid_t tid, int fd) {
packed_thread_output packed = {.tid = tid, .fd = fd};
uint64_t result;
static_assert(sizeof(packed) == sizeof(result));
memcpy(&result, &packed, sizeof(packed));
return result;
}
static std::pair<pid_t, int> unpack_thread_fd(uint64_t value) {
packed_thread_output result;
memcpy(&result, &value, sizeof(value));
return std::make_pair(result.tid, result.fd);
}
static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
static std::atomic<int> trace_output_fd(-1);
static std::atomic<uint64_t> trace_output(pack_thread_fd(-1, -1));
if (info->si_value.sival_int == ~0) {
// Asked to dump by the original signal recipient.
debuggerd_fallback_trace(trace_output_fd, ucontext);
uint64_t val = trace_output.load();
auto [tid, fd] = unpack_thread_fd(val);
if (tid != gettid()) {
// We received some other thread's info request?
async_safe_format_log(ANDROID_LOG_ERROR, "libc",
"thread %d received output fd for thread %d?", gettid(), tid);
return;
}
int tmp = trace_output_fd.load();
trace_output_fd.store(-1);
close(tmp);
if (!trace_output.compare_exchange_strong(val, pack_thread_fd(-1, -1))) {
// Presumably, the timeout in forward_output expired, and the main thread moved on.
// If this happened, the main thread closed our fd for us, so just return.
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "cmpxchg for thread %d failed", gettid());
return;
}
// Write our tid to the output fd to let the main thread know that we're working.
if (TEMP_FAILURE_RETRY(write(fd, &tid, sizeof(tid))) == sizeof(tid)) {
debuggerd_fallback_trace(fd, ucontext);
} else {
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to write to output fd");
}
close(fd);
return;
}
@ -189,7 +247,14 @@ static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
return false;
}
trace_output_fd.store(pipe_write.get());
uint64_t expected = pack_thread_fd(-1, -1);
if (!trace_output.compare_exchange_strong(expected,
pack_thread_fd(tid, pipe_write.release()))) {
auto [tid, fd] = unpack_thread_fd(expected);
async_safe_format_log(ANDROID_LOG_ERROR, "libc",
"thread %d is already outputting to fd %d?", tid, fd);
return false;
}
siginfo_t siginfo = {};
siginfo.si_code = SI_QUEUE;
@ -203,12 +268,20 @@ static void trace_handler(siginfo_t* info, ucontext_t* ucontext) {
return false;
}
bool success = forward_output(pipe_read.get(), output_fd);
if (success) {
// The signaled thread has closed trace_output_fd already.
(void)pipe_write.release();
} else {
trace_output_fd.store(-1);
bool success = forward_output(pipe_read.get(), output_fd, tid);
if (!success) {
async_safe_format_log(ANDROID_LOG_ERROR, "libc",
"timeout expired while waiting for thread %d to dump", tid);
}
// Regardless of whether the poll succeeds, check to see if the thread took fd ownership.
uint64_t post_wait = trace_output.exchange(pack_thread_fd(-1, -1));
if (post_wait != pack_thread_fd(-1, -1)) {
auto [tid, fd] = unpack_thread_fd(post_wait);
if (fd != -1) {
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "closing fd %d for thread %d", fd, tid);
close(fd);
}
}
return true;