Merge changes from topic "revert-2819069-LYZVLDOFQA" into main

* changes:
  Revert "libprocessgroup: Poll on cgroup.events"
  Revert "libprocessgroup: Use cgroup.kill"
This commit is contained in:
T.J. Mercier 2023-12-08 01:50:41 +00:00 committed by Gerrit Code Review
commit b73d41c50b
2 changed files with 127 additions and 220 deletions

View file

@ -65,8 +65,9 @@ bool UsePerAppMemcg();
// should be active again. E.g. Zygote specialization for child process.
void DropTaskProfilesResourceCaching();
// Return 0 if all processes were killed and the cgroup was successfully removed.
// Returns -1 in the case of an error occurring or if there are processes still running.
// Return 0 and removes the cgroup if there are no longer any processes in it.
// Returns -1 in the case of an error occurring or if there are processes still running
// even after retrying for up to 200ms.
int killProcessGroup(uid_t uid, int initialPid, int signal);
// Returns the same as killProcessGroup(), however it does not retry, which means
@ -75,9 +76,8 @@ int killProcessGroupOnce(uid_t uid, int initialPid, int signal);
// Sends the provided signal to all members of a process group, but does not wait for processes to
// exit, or for the cgroup to be removed. Callers should also ensure that killProcessGroup is called
// later to ensure the cgroup is fully removed, otherwise system resources will leak.
// Returns true if no errors are encountered sending signals, otherwise false.
bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal);
// later to ensure the cgroup is fully removed, otherwise system resources may leak.
int sendSignalToProcessGroup(uid_t uid, int initialPid, int signal);
int createProcessGroup(uid_t uid, int initialPid, bool memControl = false);

View file

@ -22,7 +22,6 @@
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
@ -31,7 +30,6 @@
#include <unistd.h>
#include <chrono>
#include <cstring>
#include <map>
#include <memory>
#include <mutex>
@ -55,9 +53,7 @@ using android::base::WriteStringToFile;
using namespace std::chrono_literals;
#define PROCESSGROUP_CGROUP_PROCS_FILE "cgroup.procs"
#define PROCESSGROUP_CGROUP_KILL_FILE "cgroup.kill"
#define PROCESSGROUP_CGROUP_EVENTS_FILE "cgroup.events"
#define PROCESSGROUP_CGROUP_PROCS_FILE "/cgroup.procs"
bool CgroupsAvailable() {
static bool cgroups_available = access("/proc/cgroups", F_OK) == 0;
@ -78,29 +74,6 @@ bool CgroupGetControllerPath(const std::string& cgroup_name, std::string* path)
return true;
}
static std::string ConvertUidToPath(const char* cgroup, uid_t uid) {
return StringPrintf("%s/uid_%u", cgroup, uid);
}
static std::string ConvertUidPidToPath(const char* cgroup, uid_t uid, int pid) {
return StringPrintf("%s/uid_%u/pid_%d", cgroup, uid, pid);
}
static bool CgroupKillAvailable() {
static std::once_flag f;
static bool cgroup_kill_available = false;
std::call_once(f, []() {
std::string cg_kill;
CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &cg_kill);
// cgroup.kill is not on the root cgroup, so check a non-root cgroup that should always
// exist
cg_kill = ConvertUidToPath(cg_kill.c_str(), AID_ROOT) + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
cgroup_kill_available = access(cg_kill.c_str(), F_OK) == 0;
});
return cgroup_kill_available;
}
static bool CgroupGetMemcgAppsPath(std::string* path) {
CgroupController controller = CgroupMap::GetInstance().FindController("memory");
@ -232,21 +205,38 @@ bool SetUserProfiles(uid_t uid, const std::vector<std::string>& profiles) {
false);
}
static int RemoveCgroup(const char* cgroup, uid_t uid, int pid) {
auto path = ConvertUidPidToPath(cgroup, uid, pid);
int ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
static std::string ConvertUidToPath(const char* cgroup, uid_t uid) {
return StringPrintf("%s/uid_%u", cgroup, uid);
}
static std::string ConvertUidPidToPath(const char* cgroup, uid_t uid, int pid) {
return StringPrintf("%s/uid_%u/pid_%d", cgroup, uid, pid);
}
static int RemoveCgroup(const char* cgroup, uid_t uid, int pid, unsigned int retries) {
int ret = 0;
auto uid_pid_path = ConvertUidPidToPath(cgroup, uid, pid);
while (retries--) {
ret = rmdir(uid_pid_path.c_str());
// If we get an error 2 'No such file or directory' , that means the
// cgroup is already removed, treat it as success and return 0 for
// idempotency.
if (ret < 0 && errno == ENOENT) {
ret = 0;
}
if (!ret || errno != EBUSY || !retries) break;
std::this_thread::sleep_for(5ms);
}
if (!ret && uid >= AID_ISOLATED_START && uid <= AID_ISOLATED_END) {
// Isolated UIDs are unlikely to be reused soon after removal,
// so free up the kernel resources for the UID level cgroup.
path = ConvertUidToPath(cgroup, uid);
ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
}
if (ret < 0 && errno == ENOENT) {
// This function is idempoetent, but still warn here.
LOG(WARNING) << "RemoveCgroup: " << path << " does not exist.";
ret = 0;
const auto uid_path = ConvertUidToPath(cgroup, uid);
ret = rmdir(uid_path.c_str());
if (ret < 0 && errno == ENOENT) {
ret = 0;
}
}
return ret;
@ -370,47 +360,38 @@ err:
return false;
}
bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
std::set<pid_t> pgids, pids;
// Returns number of processes killed on success
// Returns 0 if there are no processes in the process cgroup left to kill
// Returns -1 on error
static int DoKillProcessGroupOnce(const char* cgroup, uid_t uid, int initialPid, int signal) {
// We separate all of the pids in the cgroup into those pids that are also the leaders of
// process groups (stored in the pgids set) and those that are not (stored in the pids set).
std::set<pid_t> pgids;
pgids.emplace(initialPid);
std::set<pid_t> pids;
int processes = 0;
std::unique_ptr<FILE, decltype(&fclose)> fd(nullptr, fclose);
if (CgroupsAvailable()) {
std::string hierarchy_root_path, cgroup_v2_path;
CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
cgroup_v2_path = ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
if (signal == SIGKILL && CgroupKillAvailable()) {
LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_KILL_FILE << " to SIGKILL "
<< cgroup_v2_path;
const std::string killfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
if (WriteStringToFile("1", killfilepath)) {
return true;
} else {
PLOG(ERROR) << "Failed to write 1 to " << killfilepath;
// Fallback to cgroup.procs below
auto path = ConvertUidPidToPath(cgroup, uid, initialPid) + PROCESSGROUP_CGROUP_PROCS_FILE;
fd.reset(fopen(path.c_str(), "re"));
if (!fd) {
if (errno == ENOENT) {
// This happens when the process is already dead or if, as the result of a bug, it
// has been migrated to another cgroup. An example of a bug that can cause migration
// to another cgroup is using the JoinCgroup action with a cgroup controller that
// has been activated in the v2 cgroup hierarchy.
goto kill;
}
PLOG(WARNING) << __func__ << " failed to open process cgroup uid " << uid << " pid "
<< initialPid;
return -1;
}
// Since cgroup.kill only sends SIGKILLs, we read cgroup.procs to find each process to
// signal individually. This is more costly than using cgroup.kill for SIGKILLs.
LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_PROCS_FILE << " to signal (" << signal
<< ") " << cgroup_v2_path;
// We separate all of the pids in the cgroup into those pids that are also the leaders of
// process groups (stored in the pgids set) and those that are not (stored in the pids set).
const auto procsfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
std::unique_ptr<FILE, decltype(&fclose)> fp(fopen(procsfilepath.c_str(), "re"), fclose);
if (!fp) {
// This should only happen if the cgroup has already been removed with a successful call
// to killProcessGroup. Callers should only retry sendSignalToProcessGroup or
// killProcessGroup calls if they fail without ENOENT.
PLOG(ERROR) << "Failed to open " << procsfilepath;
kill(-initialPid, signal);
return false;
}
pid_t pid;
bool file_is_empty = true;
while (fscanf(fp.get(), "%d\n", &pid) == 1 && pid >= 0) {
while (fscanf(fd.get(), "%d\n", &pid) == 1 && pid >= 0) {
processes++;
file_is_empty = false;
if (pid == 0) {
// Should never happen... but if it does, trying to kill this
@ -440,8 +421,7 @@ bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
}
}
pgids.emplace(initialPid);
kill:
// Kill all process groups.
for (const auto pgid : pgids) {
LOG(VERBOSE) << "Killing process group " << -pgid << " in uid " << uid
@ -462,174 +442,101 @@ bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
}
}
return true;
return (!fd || feof(fd.get())) ? processes : -1;
}
template <typename T>
static std::chrono::milliseconds toMillisec(T&& duration) {
return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
}
enum class populated_status
{
populated,
not_populated,
error
};
static populated_status cgroupIsPopulated(int events_fd) {
const std::string POPULATED_KEY("populated ");
const std::string::size_type MAX_EVENTS_FILE_SIZE = 32;
std::string buf;
buf.resize(MAX_EVENTS_FILE_SIZE);
ssize_t len = TEMP_FAILURE_RETRY(pread(events_fd, buf.data(), buf.size(), 0));
if (len == -1) {
PLOG(ERROR) << "Could not read cgroup.events: ";
// Potentially ENODEV if the cgroup has been removed since we opened this file, but that
// shouldn't have happened yet.
return populated_status::error;
}
if (len == 0) {
LOG(ERROR) << "cgroup.events EOF";
return populated_status::error;
}
buf.resize(len);
const std::string::size_type pos = buf.find(POPULATED_KEY);
if (pos == std::string::npos) {
LOG(ERROR) << "Could not find populated key in cgroup.events";
return populated_status::error;
}
if (pos + POPULATED_KEY.size() + 1 > len) {
LOG(ERROR) << "Partial read of cgroup.events";
return populated_status::error;
}
return buf[pos + POPULATED_KEY.size()] == '1' ?
populated_status::populated : populated_status::not_populated;
}
// The default timeout of 2200ms comes from the default number of retries in a previous
// implementation of this function. The default retry value was 40 for killing and 400 for cgroup
// removal with 5ms sleeps between each retry.
static int KillProcessGroup(
uid_t uid, int initialPid, int signal, bool once = false,
std::chrono::steady_clock::time_point until = std::chrono::steady_clock::now() + 2200ms) {
static int KillProcessGroup(uid_t uid, int initialPid, int signal, int retries) {
CHECK_GE(uid, 0);
CHECK_GT(initialPid, 0);
// Always attempt to send a kill signal to at least the initialPid, at least once, regardless of
// whether its cgroup exists or not. This should only be necessary if a bug results in the
// migration of the targeted process out of its cgroup, which we will also attempt to kill.
const bool signal_ret = sendSignalToProcessGroup(uid, initialPid, signal);
if (!CgroupsAvailable() || !signal_ret) return signal_ret ? 0 : -1;
std::string hierarchy_root_path;
CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
if (CgroupsAvailable()) {
CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
}
const char* cgroup = hierarchy_root_path.c_str();
const std::string cgroup_v2_path =
ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
const std::string eventsfile = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_EVENTS_FILE;
android::base::unique_fd events_fd(open(eventsfile.c_str(), O_RDONLY));
if (events_fd.get() == -1) {
PLOG(WARNING) << "Error opening " << eventsfile << " for KillProcessGroup";
int retry = retries;
int processes;
while ((processes = DoKillProcessGroupOnce(cgroup, uid, initialPid, signal)) > 0) {
LOG(VERBOSE) << "Killed " << processes << " processes for processgroup " << initialPid;
if (!CgroupsAvailable()) {
// makes no sense to retry, because there are no cgroup_procs file
processes = 0; // no remaining processes
break;
}
if (retry > 0) {
std::this_thread::sleep_for(5ms);
--retry;
} else {
break;
}
}
if (processes < 0) {
PLOG(ERROR) << "Error encountered killing process cgroup uid " << uid << " pid "
<< initialPid;
return -1;
}
struct pollfd fds = {
.fd = events_fd,
.events = POLLPRI,
};
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
const std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
// We only calculate the number of 'processes' when killing the processes.
// In the retries == 0 case, we only kill the processes once and therefore
// will not have waited then recalculated how many processes are remaining
// after the first signals have been sent.
// Logging anything regarding the number of 'processes' here does not make sense.
// The primary reason to loop here is to capture any new forks or migrations that could occur
// after we send signals to the original set of processes, but before all of those processes
// exit and the cgroup becomes unpopulated, or before we remove the cgroup. We try hard to
// ensure this completes successfully to avoid permanent memory leaks, but we still place a
// large default upper bound on the amount of time we spend in this loop. The amount of CPU
// contention, and the amount of work that needs to be done in do_exit for each process
// determines how long this will take.
int ret;
do {
populated_status populated;
while ((populated = cgroupIsPopulated(events_fd.get())) == populated_status::populated &&
std::chrono::steady_clock::now() < until) {
sendSignalToProcessGroup(uid, initialPid, signal);
if (once) {
populated = cgroupIsPopulated(events_fd.get());
break;
}
const std::chrono::steady_clock::time_point poll_start =
std::chrono::steady_clock::now();
if (poll_start < until)
ret = TEMP_FAILURE_RETRY(poll(&fds, 1, toMillisec(until - poll_start).count()));
if (ret == -1) {
// Fallback to 5ms sleeps if poll fails
PLOG(ERROR) << "Poll on " << eventsfile << "failed";
const std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
if (now < until)
std::this_thread::sleep_for(std::min(5ms, toMillisec(until - now)));
}
LOG(VERBOSE) << "Waited "
<< toMillisec(std::chrono::steady_clock::now() - poll_start).count()
<< " ms for " << eventsfile << " poll";
if (processes == 0) {
if (retries > 0) {
LOG(INFO) << "Successfully killed process cgroup uid " << uid << " pid " << initialPid
<< " in " << static_cast<int>(ms) << "ms";
}
const std::chrono::milliseconds kill_duration =
toMillisec(std::chrono::steady_clock::now() - start);
if (populated == populated_status::populated) {
LOG(WARNING) << "Still waiting on process(es) to exit for cgroup " << cgroup_v2_path
<< " after " << kill_duration.count() << " ms";
// We'll still try the cgroup removal below which we expect to log an error.
} else if (populated == populated_status::not_populated) {
LOG(VERBOSE) << "Killed all processes under cgroup " << cgroup_v2_path
<< " after " << kill_duration.count() << " ms";
if (!CgroupsAvailable()) {
// nothing to do here, if cgroups isn't available
return 0;
}
ret = RemoveCgroup(hierarchy_root_path.c_str(), uid, initialPid);
if (ret)
PLOG(ERROR) << "Unable to remove cgroup " << cgroup_v2_path;
else
LOG(INFO) << "Removed cgroup " << cgroup_v2_path;
// 400 retries correspond to 2 secs max timeout
int err = RemoveCgroup(cgroup, uid, initialPid, 400);
if (isMemoryCgroupSupported() && UsePerAppMemcg()) {
// This per-application memcg v1 case should eventually be removed after migration to
// memcg v2.
std::string memcg_apps_path;
if (CgroupGetMemcgAppsPath(&memcg_apps_path) &&
(ret = RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid)) < 0) {
const auto memcg_v1_cgroup_path =
ConvertUidPidToPath(memcg_apps_path.c_str(), uid, initialPid);
PLOG(ERROR) << "Unable to remove memcg v1 cgroup " << memcg_v1_cgroup_path;
RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid, 400) < 0) {
return -1;
}
}
if (once) break;
if (std::chrono::steady_clock::now() >= until) break;
} while (ret && errno == EBUSY);
return ret;
return err;
} else {
if (retries > 0) {
LOG(ERROR) << "Failed to kill process cgroup uid " << uid << " pid " << initialPid
<< " in " << static_cast<int>(ms) << "ms, " << processes
<< " processes remain";
}
return -1;
}
}
int killProcessGroup(uid_t uid, int initialPid, int signal) {
return KillProcessGroup(uid, initialPid, signal);
return KillProcessGroup(uid, initialPid, signal, 40 /*retries*/);
}
int killProcessGroupOnce(uid_t uid, int initialPid, int signal) {
return KillProcessGroup(uid, initialPid, signal, true);
return KillProcessGroup(uid, initialPid, signal, 0 /*retries*/);
}
int sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
std::string hierarchy_root_path;
if (CgroupsAvailable()) {
CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
}
const char* cgroup = hierarchy_root_path.c_str();
return DoKillProcessGroupOnce(cgroup, uid, initialPid, signal);
}
static int createProcessGroupInternal(uid_t uid, int initialPid, std::string cgroup,
@ -669,7 +576,7 @@ static int createProcessGroupInternal(uid_t uid, int initialPid, std::string cgr
return -errno;
}
auto uid_pid_procs_file = uid_pid_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
auto uid_pid_procs_file = uid_pid_path + PROCESSGROUP_CGROUP_PROCS_FILE;
if (!WriteStringToFile(std::to_string(initialPid), uid_pid_procs_file)) {
ret = -errno;