libprocessgroup: Prevent error spam when tests disable all cpus in a cpuset

UserLifecycleTests test disables all Little cores in the course of the test, which causes attempts to add a process into /dev/cpuset/restricted cpuset cgroup to fail with ENOSPC error code, indicating that a process is joining a cpuset cgroup with no online cpus. Current libprocessgroup implementation will log an error on each such occurrence, which spams the logs and makes it hard to analyze test results. Because this situation does not happen in production environment (we do not offline cpus), we can prevent flooding the logs by identifying this case, logging an appropriate error one time and ignore all later similar errors. Bug: 158766131 Test: adb shell "echo 0 > /sys/devices/system/cpu/cpu[0-3]/online" Test: start some apps, observe libprocessgroup errors in the logcat Signed-off-by: Suren Baghdasaryan <surenb@google.com> Change-Id: Ia91d8839d86787569c255481bde077be51c43d93
2021-09-02 19:47:12 -07:00 · 2021-09-02 19:47:12 -07:00 · ec88556460
commit ec88556460
parent c39d42ddde
2 changed files with 29 additions and 12 deletions
--- a/libprocessgroup/task_profiles.cpp
+++ b/libprocessgroup/task_profiles.cpp
@ -194,22 +194,39 @@ void SetCgroupAction::DropResourceCaching() {
    fd_.reset(FDS_NOT_CACHED);
 }

-bool SetCgroupAction::AddTidToCgroup(int tid, int fd) {
+bool SetCgroupAction::AddTidToCgroup(int tid, int fd, const char* controller_name) {
    if (tid <= 0) {
        return true;
    }

    std::string value = std::to_string(tid);

-    if (TEMP_FAILURE_RETRY(write(fd, value.c_str(), value.length())) < 0) {
-        // If the thread is in the process of exiting, don't flag an error
-        if (errno != ESRCH) {
-            PLOG(ERROR) << "AddTidToCgroup failed to write '" << value << "'; fd=" << fd;
-            return false;
-        }
+    if (TEMP_FAILURE_RETRY(write(fd, value.c_str(), value.length())) == value.length()) {
+        return true;
    }

-    return true;
+    // If the thread is in the process of exiting, don't flag an error
+    if (errno == ESRCH) {
+        return true;
+    }
+
+    // ENOSPC is returned when cpuset cgroup that we are joining has no online cpus
+    if (errno == ENOSPC && !strcmp(controller_name, "cpuset")) {
+        // This is an abnormal case happening only in testing, so report it only once
+        static bool empty_cpuset_reported = false;
+
+        if (empty_cpuset_reported) {
+            return true;
+        }
+
+        LOG(ERROR) << "Failed to add task '" << value
+                   << "' into cpuset because all cpus in that cpuset are offline";
+        empty_cpuset_reported = true;
+    } else {
+        PLOG(ERROR) << "AddTidToCgroup failed to write '" << value << "'; fd=" << fd;
+    }
+
+    return false;
 }

 bool SetCgroupAction::ExecuteForProcess(uid_t uid, pid_t pid) const {
@ -219,7 +236,7 @@ bool SetCgroupAction::ExecuteForProcess(uid_t uid, pid_t pid) const {
        PLOG(WARNING) << "Failed to open " << procs_path;
        return false;
    }
-    if (!AddTidToCgroup(pid, tmp_fd)) {
+    if (!AddTidToCgroup(pid, tmp_fd, controller()->name())) {
        LOG(ERROR) << "Failed to add task into cgroup";
        return false;
    }
@ -231,7 +248,7 @@ bool SetCgroupAction::ExecuteForTask(int tid) const {
    std::lock_guard<std::mutex> lock(fd_mutex_);
    if (IsFdValid()) {
        // fd is cached, reuse it
-        if (!AddTidToCgroup(tid, fd_)) {
+        if (!AddTidToCgroup(tid, fd_, controller()->name())) {
            LOG(ERROR) << "Failed to add task into cgroup";
            return false;
        }
@ -256,7 +273,7 @@ bool SetCgroupAction::ExecuteForTask(int tid) const {
        PLOG(WARNING) << "Failed to open " << tasks_path << ": " << strerror(errno);
        return false;
    }
-    if (!AddTidToCgroup(tid, tmp_fd)) {
+    if (!AddTidToCgroup(tid, tmp_fd, controller()->name())) {
        LOG(ERROR) << "Failed to add task into cgroup";
        return false;
    }
--- a/libprocessgroup/task_profiles.h
+++ b/libprocessgroup/task_profiles.h
@ -134,7 +134,7 @@ class SetCgroupAction : public ProfileAction {
    mutable std::mutex fd_mutex_;

    static bool IsAppDependentPath(const std::string& path);
-    static bool AddTidToCgroup(int tid, int fd);
+    static bool AddTidToCgroup(int tid, int fd, const char* controller_name);

    bool IsFdValid() const { return fd_ > FDS_INACCESSIBLE; }
 };