diff --git a/libprocessgroup/task_profiles.cpp b/libprocessgroup/task_profiles.cpp index cf74e6557..e935f99de 100644 --- a/libprocessgroup/task_profiles.cpp +++ b/libprocessgroup/task_profiles.cpp @@ -194,22 +194,39 @@ void SetCgroupAction::DropResourceCaching() { fd_.reset(FDS_NOT_CACHED); } -bool SetCgroupAction::AddTidToCgroup(int tid, int fd) { +bool SetCgroupAction::AddTidToCgroup(int tid, int fd, const char* controller_name) { if (tid <= 0) { return true; } std::string value = std::to_string(tid); - if (TEMP_FAILURE_RETRY(write(fd, value.c_str(), value.length())) < 0) { - // If the thread is in the process of exiting, don't flag an error - if (errno != ESRCH) { - PLOG(ERROR) << "AddTidToCgroup failed to write '" << value << "'; fd=" << fd; - return false; - } + if (TEMP_FAILURE_RETRY(write(fd, value.c_str(), value.length())) == value.length()) { + return true; } - return true; + // If the thread is in the process of exiting, don't flag an error + if (errno == ESRCH) { + return true; + } + + // ENOSPC is returned when cpuset cgroup that we are joining has no online cpus + if (errno == ENOSPC && !strcmp(controller_name, "cpuset")) { + // This is an abnormal case happening only in testing, so report it only once + static bool empty_cpuset_reported = false; + + if (empty_cpuset_reported) { + return true; + } + + LOG(ERROR) << "Failed to add task '" << value + << "' into cpuset because all cpus in that cpuset are offline"; + empty_cpuset_reported = true; + } else { + PLOG(ERROR) << "AddTidToCgroup failed to write '" << value << "'; fd=" << fd; + } + + return false; } bool SetCgroupAction::ExecuteForProcess(uid_t uid, pid_t pid) const { @@ -219,7 +236,7 @@ bool SetCgroupAction::ExecuteForProcess(uid_t uid, pid_t pid) const { PLOG(WARNING) << "Failed to open " << procs_path; return false; } - if (!AddTidToCgroup(pid, tmp_fd)) { + if (!AddTidToCgroup(pid, tmp_fd, controller()->name())) { LOG(ERROR) << "Failed to add task into cgroup"; return false; } @@ -231,7 +248,7 @@ bool SetCgroupAction::ExecuteForTask(int tid) const { std::lock_guard lock(fd_mutex_); if (IsFdValid()) { // fd is cached, reuse it - if (!AddTidToCgroup(tid, fd_)) { + if (!AddTidToCgroup(tid, fd_, controller()->name())) { LOG(ERROR) << "Failed to add task into cgroup"; return false; } @@ -256,7 +273,7 @@ bool SetCgroupAction::ExecuteForTask(int tid) const { PLOG(WARNING) << "Failed to open " << tasks_path << ": " << strerror(errno); return false; } - if (!AddTidToCgroup(tid, tmp_fd)) { + if (!AddTidToCgroup(tid, tmp_fd, controller()->name())) { LOG(ERROR) << "Failed to add task into cgroup"; return false; } diff --git a/libprocessgroup/task_profiles.h b/libprocessgroup/task_profiles.h index 25a84b0c1..97c38f4f3 100644 --- a/libprocessgroup/task_profiles.h +++ b/libprocessgroup/task_profiles.h @@ -134,7 +134,7 @@ class SetCgroupAction : public ProfileAction { mutable std::mutex fd_mutex_; static bool IsAppDependentPath(const std::string& path); - static bool AddTidToCgroup(int tid, int fd); + static bool AddTidToCgroup(int tid, int fd, const char* controller_name); bool IsFdValid() const { return fd_ > FDS_INACCESSIBLE; } };