Configure backing device max_ratio for FUSE filesystems.

By default FUSE filesystems have a max_ratio of 1%, meaning only 1% of
dirty pages on the system can belong to a FUSE filesystem before we
start writing back pages (and throttling, if writeback can't keep up).
This limit is useful for untrusted filesystems, but in our case, we
trust the FUSE filesystem. Since FUSE writes result in writes to the
lower filesystem, FUSE should take at most 50%. Let's start with
changing max_ratio to 40%, to avoid needless throttling.

Bug: 159254170
Bug: 159770752
Test: inspect /sys/class/bdi manually after boot
Change-Id: I467e3770fc4afba0a08fa480c0b86aa054c8b875
This commit is contained in:
Martijn Coenen 2020-06-29 11:53:34 +02:00
parent f71707916f
commit a485006ab1
4 changed files with 67 additions and 9 deletions

View file

@ -1386,22 +1386,54 @@ status_t EnsureDirExists(const std::string& path, mode_t mode, uid_t uid, gid_t
return OK; return OK;
} }
// Gets the sysfs path for parameters of the backing device info (bdi)
static std::string getBdiPathForMount(const std::string& mount) {
// First figure out MAJOR:MINOR of mount. Simplest way is to stat the path.
struct stat info;
if (stat(mount.c_str(), &info) != 0) {
PLOG(ERROR) << "Failed to stat " << mount;
return "";
}
unsigned int maj = major(info.st_dev);
unsigned int min = minor(info.st_dev);
return StringPrintf("/sys/class/bdi/%u:%u", maj, min);
}
// Configures max_ratio for the FUSE filesystem.
void ConfigureMaxDirtyRatioForFuse(const std::string& fuse_mount, unsigned int max_ratio) {
LOG(INFO) << "Configuring max_ratio of " << fuse_mount << " fuse filesystem to " << max_ratio;
if (max_ratio > 100) {
LOG(ERROR) << "Invalid max_ratio: " << max_ratio;
return;
}
std::string fuseBdiPath = getBdiPathForMount(fuse_mount);
if (fuseBdiPath == "") {
return;
}
std::string max_ratio_file = StringPrintf("%s/max_ratio", fuseBdiPath.c_str());
unique_fd fd(TEMP_FAILURE_RETRY(open(max_ratio_file.c_str(), O_WRONLY | O_CLOEXEC)));
if (fd.get() == -1) {
PLOG(ERROR) << "Failed to open " << max_ratio_file;
return;
}
LOG(INFO) << "Writing " << max_ratio << " to " << max_ratio_file;
if (!WriteStringToFd(std::to_string(max_ratio), fd)) {
PLOG(ERROR) << "Failed to write to " << max_ratio_file;
}
}
// Configures read ahead property of the fuse filesystem with the mount point |fuse_mount| by // Configures read ahead property of the fuse filesystem with the mount point |fuse_mount| by
// writing |read_ahead_kb| to the /sys/class/bdi/MAJOR:MINOR/read_ahead_kb. // writing |read_ahead_kb| to the /sys/class/bdi/MAJOR:MINOR/read_ahead_kb.
void ConfigureReadAheadForFuse(const std::string& fuse_mount, size_t read_ahead_kb) { void ConfigureReadAheadForFuse(const std::string& fuse_mount, size_t read_ahead_kb) {
LOG(INFO) << "Configuring read_ahead of " << fuse_mount << " fuse filesystem to " LOG(INFO) << "Configuring read_ahead of " << fuse_mount << " fuse filesystem to "
<< read_ahead_kb << "kb"; << read_ahead_kb << "kb";
// First figure out MAJOR:MINOR of fuse_mount. Simplest way is to stat the path. std::string fuseBdiPath = getBdiPathForMount(fuse_mount);
struct stat info; if (fuseBdiPath == "") {
if (stat(fuse_mount.c_str(), &info) != 0) {
PLOG(ERROR) << "Failed to stat " << fuse_mount;
return; return;
} }
unsigned int maj = major(info.st_dev); // We found the bdi path for our filesystem, time to configure read ahead!
unsigned int min = minor(info.st_dev); std::string read_ahead_file = StringPrintf("%s/read_ahead_kb", fuseBdiPath.c_str());
LOG(INFO) << fuse_mount << " has major:minor " << maj << ":" << min;
// We found major:minor of our filesystem, time to configure read ahead!
std::string read_ahead_file = StringPrintf("/sys/class/bdi/%u:%u/read_ahead_kb", maj, min);
unique_fd fd(TEMP_FAILURE_RETRY(open(read_ahead_file.c_str(), O_WRONLY | O_CLOEXEC))); unique_fd fd(TEMP_FAILURE_RETRY(open(read_ahead_file.c_str(), O_WRONLY | O_CLOEXEC)));
if (fd.get() == -1) { if (fd.get() == -1) {
PLOG(ERROR) << "Failed to open " << read_ahead_file; PLOG(ERROR) << "Failed to open " << read_ahead_file;

View file

@ -176,6 +176,8 @@ bool FsyncDirectory(const std::string& dirname);
bool writeStringToFile(const std::string& payload, const std::string& filename); bool writeStringToFile(const std::string& payload, const std::string& filename);
void ConfigureMaxDirtyRatioForFuse(const std::string& fuse_mount, unsigned int max_ratio);
void ConfigureReadAheadForFuse(const std::string& fuse_mount, size_t read_ahead_kb); void ConfigureReadAheadForFuse(const std::string& fuse_mount, size_t read_ahead_kb);
status_t MountUserFuse(userid_t user_id, const std::string& absolute_lower_path, status_t MountUserFuse(userid_t user_id, const std::string& absolute_lower_path,

View file

@ -404,6 +404,27 @@ status_t EmulatedVolume::doMount() {
ConfigureReadAheadForFuse(GetFuseMountPathForUser(user_id, label), 256u); ConfigureReadAheadForFuse(GetFuseMountPathForUser(user_id, label), 256u);
// By default, FUSE has a max_dirty ratio of 1%. This means that out of
// all dirty pages in the system, only 1% is allowed to belong to any
// FUSE filesystem. The reason this is in place is that FUSE
// filesystems shouldn't be trusted by default; a FUSE filesystem could
// take up say 100% of dirty pages, and subsequently refuse to write
// them back to storage. The kernel will then apply rate-limiting, and
// block other tasks from writing. For this particular FUSE filesystem
// however, we trust the implementation, because it is a part of the
// Android platform. So use the default ratio of 100%.
//
// The reason we're setting this is that there's a suspicion that the
// kernel starts rate-limiting the FUSE filesystem under extreme
// memory pressure scenarios. While the kernel will only rate limit if
// the writeback can't keep up with the write rate, under extreme
// memory pressure the write rate may dip as well, in which case FUSE
// writes to a 1% max_ratio filesystem are throttled to an extreme amount.
//
// To prevent this, just give FUSE 40% max_ratio, meaning it can take
// up to 40% of all dirty pages in the system.
ConfigureMaxDirtyRatioForFuse(GetFuseMountPathForUser(user_id, label), 40u);
// All mounts where successful, disable scope guards // All mounts where successful, disable scope guards
sdcardfs_guard.Disable(); sdcardfs_guard.Disable();
fuse_guard.Disable(); fuse_guard.Disable();

View file

@ -255,6 +255,9 @@ status_t PublicVolume::doMount() {
} }
ConfigureReadAheadForFuse(GetFuseMountPathForUser(user_id, stableName), 256u); ConfigureReadAheadForFuse(GetFuseMountPathForUser(user_id, stableName), 256u);
// See comment in model/EmulatedVolume.cpp
ConfigureMaxDirtyRatioForFuse(GetFuseMountPathForUser(user_id, stableName), 40u);
} }
return OK; return OK;