llkd: handle 'adbd shell setsid' to preserve adbd
A zombie setsid process occurs when adb shell setsid <command> is
issued, however llkd can only detect if it is a result of a kernel
livelock by killing the associated parent, which would be adbd;
resulting in the adb connection(s) being terminated. Will special
case this condition in order to preserve adbd for debugging purposes.
We parse <parent>&<child> in ro.llk.blacklist.parent as this
association, thus adbd&[setsid] covers this special case.
Ampersand was selected because it is never part of a process name,
however a setprop in the shell requires it to be escaped or quoted;
init rc file where this is normally specified does not have issue.
getComm() is effectively pure, so hold on to the return value for
sake of efficiency.
This also reverts commit 599958d114
which granted adbd blanket parent immunity from monitoring on
userdebug builds. The new logic is a more refined means of
preserving the live lock checking associated with adbd and allows
the operation to be performed on user builds.
POC: date ; adb shell setsid sleep 900 ; date
Positive for bug, reports less than 15 minutes, otherwise solved.
Test: llkd_unit_test
Bug: 120983740
Change-Id: I6442463a48499d925a3a074423a24a1622905559
This commit is contained in:
parent
8a5f081763
commit
da2aeb0c42
4 changed files with 110 additions and 14 deletions
|
@ -165,9 +165,14 @@ size of 92.
|
|||
NB: false is a very very very unlikely process to want to blacklist.
|
||||
|
||||
#### ro.llk.blacklist.parent
|
||||
default 0,2,adbd (kernel, [kthreadd] and adbd).
|
||||
default 0,2,adbd&[setsid] (kernel, [kthreadd] and adbd *only for zombie setsid*).
|
||||
Do not watch processes that have this parent.
|
||||
A parent process can be comm, cmdline or pid reference.
|
||||
An ampersand (*&*) separator is used to specify that the parent is ignored
|
||||
only in combination with the target child process.
|
||||
Ampersand was selected because it is never part of a process name,
|
||||
however a setprop in the shell requires it to be escaped or quoted;
|
||||
init rc file where this is normally specified does not have this issue.
|
||||
A parent or target processes can be specified as comm, cmdline or pid reference.
|
||||
|
||||
#### ro.llk.blacklist.uid
|
||||
default *empty* or false, comma separated list of uid numbers or names.
|
||||
|
|
|
@ -56,11 +56,7 @@ unsigned llkCheckMilliseconds(void);
|
|||
#define LLK_BLACKLIST_PROCESS_DEFAULT \
|
||||
"0,1,2,init,[kthreadd],[khungtaskd],lmkd,llkd,watchdogd,[watchdogd],[watchdogd/0]"
|
||||
#define LLK_BLACKLIST_PARENT_PROPERTY "ro.llk.blacklist.parent"
|
||||
#ifdef __PTRACE_ENABLED__ // defined if userdebug build
|
||||
#define LLK_BLACKLIST_PARENT_DEFAULT "0,2,[kthreadd],adbd"
|
||||
#else
|
||||
#define LLK_BLACKLIST_PARENT_DEFAULT "0,2,[kthreadd]"
|
||||
#endif
|
||||
#define LLK_BLACKLIST_PARENT_DEFAULT "0,2,[kthreadd],adbd&[setsid]"
|
||||
#define LLK_BLACKLIST_UID_PROPERTY "ro.llk.blacklist.uid"
|
||||
#define LLK_BLACKLIST_UID_DEFAULT ""
|
||||
#define LLK_BLACKLIST_STACK_PROPERTY "ro.llk.blacklist.process.stack"
|
||||
|
|
|
@ -108,6 +108,9 @@ std::unordered_set<std::string> llkBlacklistProcess;
|
|||
// list of parent pids, comm or cmdline names to skip. default:
|
||||
// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
|
||||
std::unordered_set<std::string> llkBlacklistParent;
|
||||
// list of parent and target processes to skip. default:
|
||||
// adbd *and* [setsid]
|
||||
std::unordered_map<std::string, std::unordered_set<std::string>> llkBlacklistParentAndChild;
|
||||
// list of uids, and uid names, to skip, default nothing
|
||||
std::unordered_set<std::string> llkBlacklistUid;
|
||||
#ifdef __PTRACE_ENABLED__
|
||||
|
@ -624,6 +627,19 @@ std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
std::string llkFormat(
|
||||
const std::unordered_map<std::string, std::unordered_set<std::string>>& blacklist,
|
||||
bool leading_comma = false) {
|
||||
std::string ret;
|
||||
for (const auto& entry : blacklist) {
|
||||
for (const auto& target : entry.second) {
|
||||
if (leading_comma || !ret.empty()) ret += ",";
|
||||
ret += entry.first + "&" + target;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// This function parses the properties as a list, incorporating the supplied
|
||||
// default. A leading comma separator means preserve the defaults and add
|
||||
// entries (with an optional leading + sign), or removes entries with a leading
|
||||
|
@ -691,6 +707,27 @@ bool llkSkipProc(proc* procp,
|
|||
return false;
|
||||
}
|
||||
|
||||
const std::unordered_set<std::string>& llkSkipName(
|
||||
const std::string& name,
|
||||
const std::unordered_map<std::string, std::unordered_set<std::string>>& blacklist) {
|
||||
static const std::unordered_set<std::string> empty;
|
||||
if (name.empty() || blacklist.empty()) return empty;
|
||||
auto found = blacklist.find(name);
|
||||
if (found == blacklist.end()) return empty;
|
||||
return found->second;
|
||||
}
|
||||
|
||||
bool llkSkipPproc(proc* pprocp, proc* procp,
|
||||
const std::unordered_map<std::string, std::unordered_set<std::string>>&
|
||||
blacklist = llkBlacklistParentAndChild) {
|
||||
if (!pprocp || !procp || blacklist.empty()) return false;
|
||||
if (llkSkipProc(procp, llkSkipName(std::to_string(pprocp->pid), blacklist))) return true;
|
||||
if (llkSkipProc(procp, llkSkipName(pprocp->getComm(), blacklist))) return true;
|
||||
if (llkSkipProc(procp, llkSkipName(pprocp->getCmdline(), blacklist))) return true;
|
||||
return llkSkipProc(procp,
|
||||
llkSkipName(android::base::Basename(pprocp->getCmdline()), blacklist));
|
||||
}
|
||||
|
||||
bool llkSkipPid(pid_t pid) {
|
||||
return llkSkipName(std::to_string(pid), llkBlacklistProcess);
|
||||
}
|
||||
|
@ -875,7 +912,8 @@ void llkLogConfig(void) {
|
|||
<< LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
|
||||
#endif
|
||||
<< LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
|
||||
<< LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
|
||||
<< LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent)
|
||||
<< llkFormat(llkBlacklistParentAndChild, true) << "\n"
|
||||
<< LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
|
||||
}
|
||||
|
||||
|
@ -1050,7 +1088,8 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
break;
|
||||
}
|
||||
|
||||
if (llkSkipName(procp->getComm())) {
|
||||
auto process_comm = procp->getComm();
|
||||
if (llkSkipName(process_comm)) {
|
||||
continue;
|
||||
}
|
||||
if (llkSkipName(procp->getCmdline())) {
|
||||
|
@ -1065,6 +1104,7 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
|
||||
}
|
||||
if (pprocp) {
|
||||
if (llkSkipPproc(pprocp, procp)) break;
|
||||
if (llkSkipProc(pprocp, llkBlacklistParent)) break;
|
||||
} else {
|
||||
if (llkSkipName(std::to_string(ppid), llkBlacklistParent)) break;
|
||||
|
@ -1084,7 +1124,7 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
stuck = true;
|
||||
} else if (procp->count != 0ms) {
|
||||
LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
|
||||
<< pid << "->" << tid << ' ' << procp->getComm();
|
||||
<< pid << "->" << tid << ' ' << process_comm;
|
||||
}
|
||||
}
|
||||
if (!stuck) continue;
|
||||
|
@ -1092,7 +1132,7 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
|
||||
if (procp->count != 0ms) {
|
||||
LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
|
||||
<< pid << "->" << tid << ' ' << procp->getComm();
|
||||
<< pid << "->" << tid << ' ' << process_comm;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -1120,7 +1160,7 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
break;
|
||||
}
|
||||
LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
|
||||
<< pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
|
||||
<< pid << "->" << tid << ' ' << process_comm << " [kill]";
|
||||
if ((llkKillOneProcess(pprocp, procp) >= 0) ||
|
||||
(llkKillOneProcess(ppid, procp) >= 0)) {
|
||||
continue;
|
||||
|
@ -1137,7 +1177,7 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
// kernel (worse).
|
||||
default:
|
||||
LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
|
||||
<< "->" << tid << ' ' << procp->getComm() << " [kill]";
|
||||
<< "->" << tid << ' ' << process_comm << " [kill]";
|
||||
if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
|
||||
(llkKillOneProcess(pid, state, tid) >= 0) ||
|
||||
(llkKillOneProcess(procp, procp) >= 0) ||
|
||||
|
@ -1150,7 +1190,7 @@ milliseconds llkCheck(bool checkRunning) {
|
|||
// We are here because we have confirmed kernel live-lock
|
||||
const auto message = state + " "s + llkFormat(procp->count) + " " +
|
||||
std::to_string(ppid) + "->" + std::to_string(pid) + "->" +
|
||||
std::to_string(tid) + " " + procp->getComm() + " [panic]";
|
||||
std::to_string(tid) + " " + process_comm + " [panic]";
|
||||
llkPanicKernel(dump, tid,
|
||||
(state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping",
|
||||
message);
|
||||
|
@ -1274,6 +1314,26 @@ bool llkInit(const char* threadname) {
|
|||
llkBlacklistParent = llkSplit(LLK_BLACKLIST_PARENT_PROPERTY,
|
||||
std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
|
||||
"," LLK_BLACKLIST_PARENT_DEFAULT);
|
||||
// derive llkBlacklistParentAndChild by moving entries with '&' from above
|
||||
for (auto it = llkBlacklistParent.begin(); it != llkBlacklistParent.end();) {
|
||||
auto pos = it->find('&');
|
||||
if (pos == std::string::npos) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
auto parent = it->substr(0, pos);
|
||||
auto child = it->substr(pos + 1);
|
||||
it = llkBlacklistParent.erase(it);
|
||||
|
||||
auto found = llkBlacklistParentAndChild.find(parent);
|
||||
if (found == llkBlacklistParentAndChild.end()) {
|
||||
llkBlacklistParentAndChild.emplace(std::make_pair(
|
||||
std::move(parent), std::unordered_set<std::string>({std::move(child)})));
|
||||
} else {
|
||||
found->second.emplace(std::move(child));
|
||||
}
|
||||
}
|
||||
|
||||
llkBlacklistUid = llkSplit(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT);
|
||||
|
||||
// internal watchdog
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
|
@ -333,3 +334,37 @@ TEST(llkd, sleep) {
|
|||
|
||||
unlink(stack_pipe_file);
|
||||
}
|
||||
|
||||
// b/120983740
|
||||
TEST(llkd, adbd_and_setsid) {
|
||||
if (checkKill("kernel_panic,sysrq,livelock,zombie")) {
|
||||
return;
|
||||
}
|
||||
const auto period = llkdSleepPeriod('S');
|
||||
|
||||
// expect llkd.zombie to trigger, but not for adbd&[setsid]
|
||||
// Create a Persistent Zombie setsid Process
|
||||
pid_t child_pid = fork();
|
||||
ASSERT_LE(0, child_pid);
|
||||
if (!child_pid) {
|
||||
prctl(PR_SET_NAME, "adbd");
|
||||
auto zombie_pid = fork();
|
||||
ASSERT_LE(0, zombie_pid);
|
||||
if (!zombie_pid) {
|
||||
prctl(PR_SET_NAME, "setsid");
|
||||
sleep(1);
|
||||
exit(0);
|
||||
}
|
||||
sleep(period.count());
|
||||
exit(42);
|
||||
}
|
||||
|
||||
// Reverse of waitForPid, do _not_ expect kill
|
||||
int wstatus;
|
||||
ASSERT_LE(0, waitpid(child_pid, &wstatus, 0));
|
||||
EXPECT_TRUE(WIFEXITED(wstatus));
|
||||
if (WIFEXITED(wstatus)) {
|
||||
EXPECT_EQ(42, WEXITSTATUS(wstatus));
|
||||
}
|
||||
ASSERT_FALSE(WIFSIGNALED(wstatus)) << "[ INFO ] signo=" << WTERMSIG(wstatus);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue