libsnapshot: Add diagnostics for DM_DEV_REMOVE failures.

Example log line:

    update_engine: Block device was lazily unmounted and is still in-use:
        /dev/block/dm-28; possibly open file descriptor or attached loop device.

This will help diagnose bugs such as b/184715543 in the future.

Bug: N/A
Test: manual test
Change-Id: Ia6b17fe9bd1796d59be7fc0b355218509acfd4af
This commit is contained in:
David Anderson 2021-04-07 21:43:03 -07:00
parent b20e9a3606
commit 0b903f9910
4 changed files with 89 additions and 32 deletions

View file

@ -183,6 +183,7 @@ cc_binary {
],
static_libs: [
"libc++fs",
"libgtest_prod",
"libhealthhalutils",
"libsnapshot_cow",

View file

@ -264,6 +264,7 @@ cc_defaults {
"android.hardware.boot@1.0",
"android.hardware.boot@1.1",
"libbrotli",
"libc++fs",
"libfs_mgr",
"libgsi",
"libgmock",
@ -297,6 +298,7 @@ cc_binary {
],
static_libs: [
"libbrotli",
"libc++fs",
"libfstab",
"libsnapshot",
"libsnapshot_cow",
@ -326,6 +328,7 @@ cc_test {
"power_test.cpp",
],
static_libs: [
"libc++fs",
"libsnapshot",
"update_metadata-protos",
],
@ -355,6 +358,7 @@ cc_defaults {
static_libs: [
"libbase",
"libbrotli",
"libc++fs",
"libchrome",
"libcrypto_static",
"libcutils",
@ -416,7 +420,7 @@ cc_defaults {
"snapuserd_server.cpp",
"snapuserd.cpp",
"snapuserd_daemon.cpp",
"snapuserd_worker.cpp",
"snapuserd_worker.cpp",
],
cflags: [

View file

@ -493,6 +493,9 @@ class SnapshotManager final : public ISnapshotManager {
// Unmap a COW image device previously mapped with MapCowImage().
bool UnmapCowImage(const std::string& name);
// Unmap a COW and remove it from a MetadataBuilder.
void UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata);
// Unmap and remove all known snapshots.
bool RemoveAllSnapshots(LockedFile* lock);
@ -738,6 +741,10 @@ class SnapshotManager final : public ISnapshotManager {
// Helper of UpdateUsesCompression
bool UpdateUsesCompression(LockedFile* lock);
// Wrapper around libdm, with diagnostics.
bool DeleteDeviceIfExists(const std::string& name,
const std::chrono::milliseconds& timeout_ms = {});
std::string gsid_dir_;
std::string metadata_dir_;
std::unique_ptr<IDeviceInfo> device_;

View file

@ -21,6 +21,7 @@
#include <sys/types.h>
#include <sys/unistd.h>
#include <filesystem>
#include <optional>
#include <thread>
#include <unordered_set>
@ -587,8 +588,7 @@ bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
CHECK(lock);
auto& dm = DeviceMapper::Instance();
if (!dm.DeleteDeviceIfExists(name)) {
if (!DeleteDeviceIfExists(name)) {
LOG(ERROR) << "Could not delete snapshot device: " << name;
return false;
}
@ -1252,25 +1252,6 @@ bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::strin
return true;
}
static bool DeleteDmDevice(const std::string& name, const std::chrono::milliseconds& timeout_ms) {
auto start = std::chrono::steady_clock::now();
auto& dm = DeviceMapper::Instance();
while (true) {
if (dm.DeleteDeviceIfExists(name)) {
break;
}
auto now = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
if (elapsed >= timeout_ms) {
LOG(ERROR) << "DeleteDevice timeout: " << name;
return false;
}
std::this_thread::sleep_for(400ms);
}
return true;
}
bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
const SnapshotStatus& status) {
auto& dm = DeviceMapper::Instance();
@ -1326,11 +1307,11 @@ bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
UnmapDmUserDevice(name);
}
auto base_name = GetBaseDeviceName(name);
if (!dm.DeleteDeviceIfExists(base_name)) {
if (!DeleteDeviceIfExists(base_name)) {
LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
}
if (!DeleteDmDevice(GetSourceDeviceName(name), 4000ms)) {
if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
}
@ -2083,15 +2064,14 @@ bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
return false;
}
auto& dm = DeviceMapper::Instance();
auto base_name = GetBaseDeviceName(target_partition_name);
if (!dm.DeleteDeviceIfExists(base_name)) {
if (!DeleteDeviceIfExists(base_name)) {
LOG(ERROR) << "Cannot delete base device: " << base_name;
return false;
}
auto source_name = GetSourceDeviceName(target_partition_name);
if (!dm.DeleteDeviceIfExists(source_name)) {
if (!DeleteDeviceIfExists(source_name)) {
LOG(ERROR) << "Cannot delete source device: " << source_name;
return false;
}
@ -2181,7 +2161,7 @@ bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name)
return false;
}
if (!DeleteDmDevice(GetCowName(name), 4000ms)) {
if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
return false;
}
@ -2202,7 +2182,7 @@ bool SnapshotManager::UnmapDmUserDevice(const std::string& snapshot_name) {
return true;
}
if (!dm.DeleteDeviceIfExists(dm_user_name)) {
if (!DeleteDeviceIfExists(dm_user_name)) {
LOG(ERROR) << "Cannot unmap " << dm_user_name;
return false;
}
@ -2593,11 +2573,10 @@ bool SnapshotManager::ForceLocalImageManager() {
return true;
}
static void UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
auto& dm = DeviceMapper::Instance();
void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
std::vector<std::string> to_delete;
for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
if (!dm.DeleteDeviceIfExists(existing_cow_partition->name())) {
if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
LOG(WARNING) << existing_cow_partition->name()
<< " cannot be unmapped and its space cannot be reclaimed";
continue;
@ -3626,5 +3605,71 @@ void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
stats->set_estimated_cow_size_bytes(estimated_cow_size);
}
bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
const std::chrono::milliseconds& timeout_ms) {
auto& dm = DeviceMapper::Instance();
auto start = std::chrono::steady_clock::now();
while (true) {
if (dm.DeleteDeviceIfExists(name)) {
return true;
}
auto now = std::chrono::steady_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
if (elapsed >= timeout_ms) {
break;
}
std::this_thread::sleep_for(400ms);
}
// Try to diagnose why this failed. First get the actual device path.
std::string full_path;
if (!dm.GetDmDevicePathByName(name, &full_path)) {
LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
return false;
}
// Check for child dm-devices.
std::string block_name = android::base::Basename(full_path);
std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
std::error_code ec;
std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
return false;
}
// Check for mounted partitions.
android::fs_mgr::Fstab fstab;
android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
for (const auto& entry : fstab) {
if (android::base::Basename(entry.blk_device) == block_name) {
LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
return false;
}
}
// Check for detached mounted partitions.
for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
std::string fs_type = android::base::Basename(fs.path().c_str());
if (!(fs_type == "ext4" || fs_type == "f2fs")) {
continue;
}
std::string path = fs.path().c_str() + "/"s + block_name;
if (access(path.c_str(), F_OK) == 0) {
LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
<< "; possibly open file descriptor or attached loop device.";
return false;
}
}
LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
<< " still in use."
<< " Probably a file descriptor was leaked or held open, or a loop device is"
<< " attached.";
return false;
}
} // namespace snapshot
} // namespace android