aee2ec8f1e
From the unique_fd.h header file: "unique_fd's operator int is dangerous, but we have way too much code that depends on it, so make this opt-in at first." From the Google C++ style guide: "Do not define implicit conversions." See also go/cstyle#Implicit_Conversions. Hence this CL that disables unique_fd::operator int(). Change-Id: I28d94755d5408f63e5819da8d1cbc285057f867f Signed-off-by: Bart Van Assche <bvanassche@google.com>
345 lines
12 KiB
C++
345 lines
12 KiB
C++
/*
|
|
* Copyright (C) 2019 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "service_utils.h"
|
|
|
|
#include <fcntl.h>
|
|
#include <grp.h>
|
|
#include <map>
|
|
#include <sys/mount.h>
|
|
#include <sys/prctl.h>
|
|
#include <sys/wait.h>
|
|
#include <unistd.h>
|
|
|
|
#include <android-base/file.h>
|
|
#include <android-base/logging.h>
|
|
#include <android-base/properties.h>
|
|
#include <android-base/stringprintf.h>
|
|
#include <android-base/strings.h>
|
|
#include <cutils/android_get_control_file.h>
|
|
#include <cutils/sockets.h>
|
|
#include <processgroup/processgroup.h>
|
|
|
|
#include "mount_namespace.h"
|
|
#include "util.h"
|
|
|
|
using android::base::GetProperty;
|
|
using android::base::StartsWith;
|
|
using android::base::StringPrintf;
|
|
using android::base::unique_fd;
|
|
using android::base::WriteStringToFile;
|
|
|
|
namespace android {
|
|
namespace init {
|
|
|
|
namespace {
|
|
|
|
Result<void> EnterNamespace(int nstype, const char* path) {
|
|
auto fd = unique_fd{open(path, O_RDONLY | O_CLOEXEC)};
|
|
if (fd == -1) {
|
|
return ErrnoError() << "Could not open namespace at " << path;
|
|
}
|
|
if (setns(fd.get(), nstype) == -1) {
|
|
return ErrnoError() << "Could not setns() namespace at " << path;
|
|
}
|
|
return {};
|
|
}
|
|
|
|
Result<void> SetUpMountNamespace(bool remount_proc, bool remount_sys) {
|
|
constexpr unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
|
|
|
|
// Recursively remount / as MS_SLAVE like zygote does so that
|
|
// unmounting and mounting /proc doesn't interfere with the parent
|
|
// namespace's /proc mount. This will also prevent any other
|
|
// mounts/unmounts initiated by the service from interfering with the
|
|
// parent namespace but will still allow mount events from the parent
|
|
// namespace to propagate to the child.
|
|
if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
|
|
return ErrnoError() << "Could not remount(/) recursively as MS_SLAVE";
|
|
}
|
|
|
|
// umount() then mount() /proc and/or /sys
|
|
// Note that it is not sufficient to mount with MS_REMOUNT.
|
|
if (remount_proc) {
|
|
if (umount("/proc") == -1) {
|
|
return ErrnoError() << "Could not umount(/proc)";
|
|
}
|
|
if (mount("", "/proc", "proc", kSafeFlags, "") == -1) {
|
|
return ErrnoError() << "Could not mount(/proc)";
|
|
}
|
|
}
|
|
if (remount_sys) {
|
|
if (umount2("/sys", MNT_DETACH) == -1) {
|
|
return ErrnoError() << "Could not umount(/sys)";
|
|
}
|
|
if (mount("", "/sys", "sysfs", kSafeFlags, "") == -1) {
|
|
return ErrnoError() << "Could not mount(/sys)";
|
|
}
|
|
}
|
|
return {};
|
|
}
|
|
|
|
Result<void> SetUpPidNamespace(const char* name) {
|
|
if (prctl(PR_SET_NAME, name) == -1) {
|
|
return ErrnoError() << "Could not set name";
|
|
}
|
|
|
|
pid_t child_pid = fork();
|
|
if (child_pid == -1) {
|
|
return ErrnoError() << "Could not fork init inside the PID namespace";
|
|
}
|
|
|
|
if (child_pid > 0) {
|
|
// So that we exit with the right status.
|
|
static int init_exitstatus = 0;
|
|
signal(SIGTERM, [](int) { _exit(init_exitstatus); });
|
|
|
|
pid_t waited_pid;
|
|
int status;
|
|
while ((waited_pid = wait(&status)) > 0) {
|
|
// This loop will end when there are no processes left inside the
|
|
// PID namespace or when the init process inside the PID namespace
|
|
// gets a signal.
|
|
if (waited_pid == child_pid) {
|
|
init_exitstatus = status;
|
|
}
|
|
}
|
|
if (!WIFEXITED(init_exitstatus)) {
|
|
_exit(EXIT_FAILURE);
|
|
}
|
|
_exit(WEXITSTATUS(init_exitstatus));
|
|
}
|
|
return {};
|
|
}
|
|
|
|
void SetupStdio(bool stdio_to_kmsg) {
|
|
auto fd = unique_fd{open("/dev/null", O_RDWR | O_CLOEXEC)};
|
|
dup2(fd.get(), STDIN_FILENO);
|
|
if (stdio_to_kmsg) {
|
|
fd.reset(open("/dev/kmsg_debug", O_WRONLY | O_CLOEXEC));
|
|
if (fd == -1) fd.reset(open("/dev/null", O_WRONLY | O_CLOEXEC));
|
|
}
|
|
dup2(fd.get(), STDOUT_FILENO);
|
|
dup2(fd.get(), STDERR_FILENO);
|
|
}
|
|
|
|
void OpenConsole(const std::string& console) {
|
|
auto fd = unique_fd{open(console.c_str(), O_RDWR | O_CLOEXEC)};
|
|
if (fd == -1) fd.reset(open("/dev/null", O_RDWR | O_CLOEXEC));
|
|
ioctl(fd.get(), TIOCSCTTY, 0);
|
|
dup2(fd.get(), 0);
|
|
dup2(fd.get(), 1);
|
|
dup2(fd.get(), 2);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
void Descriptor::Publish() const {
|
|
auto published_name = name_;
|
|
|
|
for (auto& c : published_name) {
|
|
c = isalnum(c) ? c : '_';
|
|
}
|
|
|
|
int fd = fd_.get();
|
|
// For safety, the FD is created as CLOEXEC, so that must be removed before publishing.
|
|
auto fd_flags = fcntl(fd, F_GETFD);
|
|
fd_flags &= ~FD_CLOEXEC;
|
|
if (fcntl(fd, F_SETFD, fd_flags) != 0) {
|
|
PLOG(ERROR) << "Failed to remove CLOEXEC from '" << published_name << "'";
|
|
}
|
|
|
|
std::string val = std::to_string(fd);
|
|
setenv(published_name.c_str(), val.c_str(), 1);
|
|
}
|
|
|
|
Result<Descriptor> SocketDescriptor::Create(const std::string& global_context) const {
|
|
const auto& socket_context = context.empty() ? global_context : context;
|
|
auto result = CreateSocket(name, type | SOCK_CLOEXEC, passcred, listen, perm, uid, gid,
|
|
socket_context);
|
|
if (!result.ok()) {
|
|
return result.error();
|
|
}
|
|
|
|
return Descriptor(ANDROID_SOCKET_ENV_PREFIX + name, unique_fd(*result));
|
|
}
|
|
|
|
Result<Descriptor> FileDescriptor::Create() const {
|
|
int flags = (type == "r") ? O_RDONLY : (type == "w") ? O_WRONLY : O_RDWR;
|
|
|
|
// Make sure we do not block on open (eg: devices can chose to block on carrier detect). Our
|
|
// intention is never to delay launch of a service for such a condition. The service can
|
|
// perform its own blocking on carrier detect.
|
|
unique_fd fd(TEMP_FAILURE_RETRY(open(name.c_str(), flags | O_NONBLOCK | O_CLOEXEC)));
|
|
|
|
if (fd < 0) {
|
|
return ErrnoError() << "Failed to open file '" << name << "'";
|
|
}
|
|
|
|
// Fixup as we set O_NONBLOCK for open, the intent for fd is to block reads.
|
|
fcntl(fd.get(), F_SETFL, flags);
|
|
|
|
return Descriptor(ANDROID_FILE_ENV_PREFIX + name, std::move(fd));
|
|
}
|
|
|
|
Result<void> EnterNamespaces(const NamespaceInfo& info, const std::string& name,
|
|
std::optional<MountNamespace> override_mount_namespace) {
|
|
for (const auto& [nstype, path] : info.namespaces_to_enter) {
|
|
if (auto result = EnterNamespace(nstype, path.c_str()); !result.ok()) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
#if defined(__ANDROID__)
|
|
if (override_mount_namespace.has_value()) {
|
|
if (auto result = SwitchToMountNamespaceIfNeeded(override_mount_namespace.value());
|
|
!result.ok()) {
|
|
return result;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (info.flags & CLONE_NEWNS) {
|
|
bool remount_proc = info.flags & CLONE_NEWPID;
|
|
bool remount_sys =
|
|
std::any_of(info.namespaces_to_enter.begin(), info.namespaces_to_enter.end(),
|
|
[](const auto& entry) { return entry.first == CLONE_NEWNET; });
|
|
if (auto result = SetUpMountNamespace(remount_proc, remount_sys); !result.ok()) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
if (info.flags & CLONE_NEWPID) {
|
|
// This will fork again to run an init process inside the PID namespace.
|
|
if (auto result = SetUpPidNamespace(name.c_str()); !result.ok()) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
return {};
|
|
}
|
|
|
|
Result<void> SetProcessAttributes(const ProcessAttributes& attr, InterprocessFifo setsid_finished) {
|
|
if (attr.ioprio_class != IoSchedClass_NONE) {
|
|
if (android_set_ioprio(getpid(), attr.ioprio_class, attr.ioprio_pri)) {
|
|
PLOG(ERROR) << "failed to set pid " << getpid() << " ioprio=" << attr.ioprio_class
|
|
<< "," << attr.ioprio_pri;
|
|
}
|
|
}
|
|
|
|
if (RequiresConsole(attr)) {
|
|
setsid();
|
|
setsid_finished.Write(kSetSidFinished);
|
|
setsid_finished.Close();
|
|
OpenConsole(attr.console);
|
|
} else {
|
|
// Without PID namespaces, this call duplicates the setpgid() call from
|
|
// the parent process. With PID namespaces, this setpgid() call sets the
|
|
// process group ID for a child of the init process in the PID
|
|
// namespace.
|
|
if (setpgid(0, 0) == -1) {
|
|
return ErrnoError() << "setpgid failed";
|
|
}
|
|
SetupStdio(attr.stdio_to_kmsg);
|
|
}
|
|
|
|
for (const auto& rlimit : attr.rlimits) {
|
|
if (setrlimit(rlimit.first, &rlimit.second) == -1) {
|
|
return ErrnoErrorf("setrlimit({}, {{rlim_cur={}, rlim_max={}}}) failed", rlimit.first,
|
|
rlimit.second.rlim_cur, rlimit.second.rlim_max);
|
|
}
|
|
}
|
|
|
|
if (attr.gid) {
|
|
if (setgid(attr.gid) != 0) {
|
|
return ErrnoError() << "setgid failed";
|
|
}
|
|
}
|
|
if (setgroups(attr.supp_gids.size(), const_cast<gid_t*>(&attr.supp_gids[0])) != 0) {
|
|
return ErrnoError() << "setgroups failed";
|
|
}
|
|
if (attr.uid) {
|
|
if (setuid(attr.uid) != 0) {
|
|
return ErrnoError() << "setuid failed";
|
|
}
|
|
}
|
|
|
|
if (attr.priority != 0) {
|
|
if (setpriority(PRIO_PROCESS, 0, attr.priority) != 0) {
|
|
return ErrnoError() << "setpriority failed";
|
|
}
|
|
}
|
|
return {};
|
|
}
|
|
|
|
Result<void> WritePidToFiles(std::vector<std::string>* files) {
|
|
if (files->empty()) {
|
|
// No files to write pid to, exit early.
|
|
return {};
|
|
}
|
|
|
|
if (!CgroupsAvailable()) {
|
|
return Error() << "cgroups are not available";
|
|
}
|
|
|
|
// See if there were "writepid" instructions to write to files under cpuset path.
|
|
std::string cpuset_path;
|
|
if (CgroupGetControllerPath("cpuset", &cpuset_path)) {
|
|
auto cpuset_predicate = [&cpuset_path](const std::string& path) {
|
|
return StartsWith(path, cpuset_path + "/");
|
|
};
|
|
auto iter = std::find_if(files->begin(), files->end(), cpuset_predicate);
|
|
if (iter == files->end()) {
|
|
// There were no "writepid" instructions for cpusets, check if the system default
|
|
// cpuset is specified to be used for the process.
|
|
std::string default_cpuset = GetProperty("ro.cpuset.default", "");
|
|
if (!default_cpuset.empty()) {
|
|
// Make sure the cpuset name starts and ends with '/'.
|
|
// A single '/' means the 'root' cpuset.
|
|
if (default_cpuset.front() != '/') {
|
|
default_cpuset.insert(0, 1, '/');
|
|
}
|
|
if (default_cpuset.back() != '/') {
|
|
default_cpuset.push_back('/');
|
|
}
|
|
files->push_back(
|
|
StringPrintf("%s%stasks", cpuset_path.c_str(), default_cpuset.c_str()));
|
|
}
|
|
}
|
|
} else {
|
|
LOG(ERROR) << "cpuset cgroup controller is not mounted!";
|
|
}
|
|
|
|
// Issue a warning whenever writepid is being used with a cgroup. This can't be done during
|
|
// command parsing because cgroups might not be configured at the time or parsing.
|
|
for (const auto& file : *files) {
|
|
if (CgroupGetControllerFromPath(file, nullptr)) {
|
|
LOG(WARNING) << "writepid usage with cgroups path '" << file
|
|
<< "' is obsolete, please use task_profiles!";
|
|
}
|
|
}
|
|
|
|
std::string pid_str = std::to_string(getpid());
|
|
for (const auto& file : *files) {
|
|
if (!WriteStringToFile(pid_str, file)) {
|
|
return ErrnoError() << "couldn't write " << pid_str << " to " << file;
|
|
}
|
|
}
|
|
return {};
|
|
}
|
|
|
|
} // namespace init
|
|
} // namespace android
|