libprocessgroup: Prevent error spam when tests disable all cpus in a cpuset

UserLifecycleTests test disables all Little cores in the course of the
test, which causes attempts to add a process into /dev/cpuset/restricted
cpuset cgroup to fail with ENOSPC error code, indicating that a process
is joining a cpuset cgroup with no online cpus. Current libprocessgroup
implementation will log an error on each such occurrence, which spams
the logs and makes it hard to analyze test results. Because this
situation does not happen in production environment (we do not offline
cpus), we can prevent flooding the logs by identifying this case,
logging an appropriate error one time and ignore all later similar errors.

Bug: 158766131
Test: adb shell "echo 0 > /sys/devices/system/cpu/cpu[0-3]/online"
Test: start some apps, observe libprocessgroup errors in the logcat
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: Ia91d8839d86787569c255481bde077be51c43d93
This commit is contained in:
Suren Baghdasaryan 2021-09-02 19:47:12 -07:00
parent c39d42ddde
commit ec88556460
2 changed files with 29 additions and 12 deletions

View file

@ -194,22 +194,39 @@ void SetCgroupAction::DropResourceCaching() {
fd_.reset(FDS_NOT_CACHED);
}
bool SetCgroupAction::AddTidToCgroup(int tid, int fd) {
bool SetCgroupAction::AddTidToCgroup(int tid, int fd, const char* controller_name) {
if (tid <= 0) {
return true;
}
std::string value = std::to_string(tid);
if (TEMP_FAILURE_RETRY(write(fd, value.c_str(), value.length())) < 0) {
// If the thread is in the process of exiting, don't flag an error
if (errno != ESRCH) {
PLOG(ERROR) << "AddTidToCgroup failed to write '" << value << "'; fd=" << fd;
return false;
}
if (TEMP_FAILURE_RETRY(write(fd, value.c_str(), value.length())) == value.length()) {
return true;
}
return true;
// If the thread is in the process of exiting, don't flag an error
if (errno == ESRCH) {
return true;
}
// ENOSPC is returned when cpuset cgroup that we are joining has no online cpus
if (errno == ENOSPC && !strcmp(controller_name, "cpuset")) {
// This is an abnormal case happening only in testing, so report it only once
static bool empty_cpuset_reported = false;
if (empty_cpuset_reported) {
return true;
}
LOG(ERROR) << "Failed to add task '" << value
<< "' into cpuset because all cpus in that cpuset are offline";
empty_cpuset_reported = true;
} else {
PLOG(ERROR) << "AddTidToCgroup failed to write '" << value << "'; fd=" << fd;
}
return false;
}
bool SetCgroupAction::ExecuteForProcess(uid_t uid, pid_t pid) const {
@ -219,7 +236,7 @@ bool SetCgroupAction::ExecuteForProcess(uid_t uid, pid_t pid) const {
PLOG(WARNING) << "Failed to open " << procs_path;
return false;
}
if (!AddTidToCgroup(pid, tmp_fd)) {
if (!AddTidToCgroup(pid, tmp_fd, controller()->name())) {
LOG(ERROR) << "Failed to add task into cgroup";
return false;
}
@ -231,7 +248,7 @@ bool SetCgroupAction::ExecuteForTask(int tid) const {
std::lock_guard<std::mutex> lock(fd_mutex_);
if (IsFdValid()) {
// fd is cached, reuse it
if (!AddTidToCgroup(tid, fd_)) {
if (!AddTidToCgroup(tid, fd_, controller()->name())) {
LOG(ERROR) << "Failed to add task into cgroup";
return false;
}
@ -256,7 +273,7 @@ bool SetCgroupAction::ExecuteForTask(int tid) const {
PLOG(WARNING) << "Failed to open " << tasks_path << ": " << strerror(errno);
return false;
}
if (!AddTidToCgroup(tid, tmp_fd)) {
if (!AddTidToCgroup(tid, tmp_fd, controller()->name())) {
LOG(ERROR) << "Failed to add task into cgroup";
return false;
}

View file

@ -134,7 +134,7 @@ class SetCgroupAction : public ProfileAction {
mutable std::mutex fd_mutex_;
static bool IsAppDependentPath(const std::string& path);
static bool AddTidToCgroup(int tid, int fd);
static bool AddTidToCgroup(int tid, int fd, const char* controller_name);
bool IsFdValid() const { return fd_ > FDS_INACCESSIBLE; }
};