From dda598103d84276d14aa73951857aa5c43185e3e Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 6 Mar 2019 17:45:17 -0800 Subject: [PATCH 1/2] Add vdc checkpoint restoreCheckpointPart Restores the first n entries of a checkpoint. Allows automated testing of interrupted restores. Test: vdc checkpoint restoreCheckpoint [device] [n] Change-Id: I47570e8eba0bc3c6549a04a33600df05d393990b --- Checkpoint.cpp | 9 ++++++++- Checkpoint.h | 2 +- VoldNativeService.cpp | 8 ++++++++ VoldNativeService.h | 1 + binder/android/os/IVold.aidl | 1 + vdc.cpp | 4 ++++ 6 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Checkpoint.cpp b/Checkpoint.cpp index 1020ad7..08f0fce 100644 --- a/Checkpoint.cpp +++ b/Checkpoint.cpp @@ -371,9 +371,10 @@ std::vector relocatedRead(int device_fd, Relocations const& relocations, b } // namespace -Status cp_restoreCheckpoint(const std::string& blockDevice) { +Status cp_restoreCheckpoint(const std::string& blockDevice, int restore_limit) { bool validating = true; std::string action = "Validating"; + int restore_count = 0; for (;;) { Relocations relocations; @@ -449,6 +450,12 @@ Status cp_restoreCheckpoint(const std::string& blockDevice) { } else { lseek64(device_fd, le->source * kSectorSize, SEEK_SET); write(device_fd, &buffer[0], le->size); + restore_count++; + if (restore_limit && restore_count >= restore_limit) { + LOG(WARNING) << "Hit the test limit"; + status = Status::fromExceptionCode(EAGAIN, "Hit the test limit"); + break; + } } } } diff --git a/Checkpoint.h b/Checkpoint.h index 64ceed3..70dad8a 100644 --- a/Checkpoint.h +++ b/Checkpoint.h @@ -37,7 +37,7 @@ bool cp_needsCheckpoint(); android::binder::Status cp_prepareCheckpoint(); -android::binder::Status cp_restoreCheckpoint(const std::string& mountPoint); +android::binder::Status cp_restoreCheckpoint(const std::string& mountPoint, int count = 0); android::binder::Status cp_markBootAttempt(); diff --git a/VoldNativeService.cpp b/VoldNativeService.cpp index 1001d2b..69f8a8c 100644 --- a/VoldNativeService.cpp +++ b/VoldNativeService.cpp @@ -850,6 +850,14 @@ binder::Status VoldNativeService::restoreCheckpoint(const std::string& mountPoin return cp_restoreCheckpoint(mountPoint); } +binder::Status VoldNativeService::restoreCheckpointPart(const std::string& mountPoint, int count) { + ENFORCE_UID(AID_SYSTEM); + CHECK_ARGUMENT_PATH(mountPoint); + ACQUIRE_LOCK; + + return cp_restoreCheckpoint(mountPoint, count); +} + binder::Status VoldNativeService::markBootAttempt() { ENFORCE_UID(AID_SYSTEM); ACQUIRE_LOCK; diff --git a/VoldNativeService.h b/VoldNativeService.h index 7db3e5c..954b8ae 100644 --- a/VoldNativeService.h +++ b/VoldNativeService.h @@ -127,6 +127,7 @@ class VoldNativeService : public BinderService, public os::Bn binder::Status commitChanges(); binder::Status prepareCheckpoint(); binder::Status restoreCheckpoint(const std::string& mountPoint); + binder::Status restoreCheckpointPart(const std::string& mountPoint, int count); binder::Status markBootAttempt(); binder::Status abortChanges(); binder::Status supportsCheckpoint(bool* _aidl_return); diff --git a/binder/android/os/IVold.aidl b/binder/android/os/IVold.aidl index 4b21078..83ee116 100644 --- a/binder/android/os/IVold.aidl +++ b/binder/android/os/IVold.aidl @@ -103,6 +103,7 @@ interface IVold { void commitChanges(); void prepareCheckpoint(); void restoreCheckpoint(@utf8InCpp String device); + void restoreCheckpointPart(@utf8InCpp String device, int count); void markBootAttempt(); boolean supportsCheckpoint(); diff --git a/vdc.cpp b/vdc.cpp index 35775a7..d01fb49 100644 --- a/vdc.cpp +++ b/vdc.cpp @@ -127,6 +127,10 @@ int main(int argc, char** argv) { checkStatus(vold->prepareCheckpoint()); } else if (args[0] == "checkpoint" && args[1] == "restoreCheckpoint" && args.size() == 3) { checkStatus(vold->restoreCheckpoint(args[2])); + } else if (args[0] == "checkpoint" && args[1] == "restoreCheckpointPart" && args.size() == 4) { + int count; + if (!android::base::ParseInt(args[3], &count)) exit(EINVAL); + checkStatus(vold->restoreCheckpointPart(args[2], count)); } else if (args[0] == "checkpoint" && args[1] == "markBootAttempt" && args.size() == 2) { checkStatus(vold->markBootAttempt()); } else if (args[0] == "checkpoint" && args[1] == "abortChanges" && args.size() == 2) { From 5298593b1baa45e496ce4b419782fb9c1d5baed7 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 1 Mar 2019 22:01:22 -0800 Subject: [PATCH 2/2] Make Checkpoint restore resume safe This allows us to resume rolling back in the event of an unexpected shutdown during the restore process. We save progress after we process each log sector, and whenever restarting the current log sector would result in invalid data. Test: Run restore, interrupt it, and attempt to resume Change-Id: I91cf0defb0d22fc5afdb9debc2963c956e9e171c --- Checkpoint.cpp | 102 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 91 insertions(+), 11 deletions(-) diff --git a/Checkpoint.cpp b/Checkpoint.cpp index 08f0fce..75a22ec 100644 --- a/Checkpoint.cpp +++ b/Checkpoint.cpp @@ -260,6 +260,8 @@ struct log_sector_v1_0 { // MAGIC is BOW in ascii const int kMagic = 0x00574f42; +// Partially restored MAGIC is WOB in ascii +const int kPartialRestoreMagic = 0x00424f57; void crc32(const void* data, size_t n_bytes, uint32_t* crc) { static uint32_t table[0x100] = { @@ -347,6 +349,79 @@ void relocate(Relocations& relocations, sector_t dest, sector_t source, int coun relocations.insert(slice.begin(), slice.end()); } +// A map of sectors that have been written to. +// The final entry must always be False. +// When we restart the restore after an interruption, we must take care that +// when we copy from dest to source, that the block we copy to was not +// previously copied from. +// i e. A->B C->A; If we replay this sequence, we end up copying C->B +// We must save our partial result whenever we finish a page, or when we copy +// to a location that was copied from earlier (our source is an earlier dest) +typedef std::map Used_Sectors; + +bool checkCollision(Used_Sectors& used_sectors, sector_t start, sector_t end) { + auto second_overlap = used_sectors.upper_bound(start); + auto first_overlap = --second_overlap; + + if (first_overlap->second) { + return true; + } else if (second_overlap != used_sectors.end() && second_overlap->first < end) { + return true; + } + return false; +} + +void markUsed(Used_Sectors& used_sectors, sector_t start, sector_t end) { + auto start_pos = used_sectors.insert_or_assign(start, true).first; + auto end_pos = used_sectors.insert_or_assign(end, false).first; + + if (start_pos == used_sectors.begin() || !std::prev(start_pos)->second) { + start_pos++; + } + if (std::next(end_pos) != used_sectors.end() && !std::next(end_pos)->second) { + end_pos++; + } + if (start_pos->first < end_pos->first) { + used_sectors.erase(start_pos, end_pos); + } +} + +// Restores the given log_entry's data from dest -> source +// If that entry is a log sector, set the magic to kPartialRestoreMagic and flush. +void restoreSector(int device_fd, Used_Sectors& used_sectors, std::vector& ls_buffer, + log_entry* le, std::vector& buffer) { + log_sector_v1_0& ls = *reinterpret_cast(&ls_buffer[0]); + uint32_t index = le - ((log_entry*)&ls_buffer[ls.header_size]); + int count = (le->size - 1) / kSectorSize + 1; + + if (checkCollision(used_sectors, le->source, le->source + count)) { + fsync(device_fd); + lseek64(device_fd, 0, SEEK_SET); + ls.count = index + 1; + ls.magic = kPartialRestoreMagic; + write(device_fd, &ls_buffer[0], ls.block_size); + fsync(device_fd); + used_sectors.clear(); + used_sectors[0] = false; + } + + markUsed(used_sectors, le->dest, le->dest + count); + + if (index == 0 && ls.sequence != 0) { + log_sector_v1_0* next = reinterpret_cast(&buffer[0]); + if (next->magic == kMagic) { + next->magic = kPartialRestoreMagic; + } + } + + lseek64(device_fd, le->source * kSectorSize, SEEK_SET); + write(device_fd, &buffer[0], le->size); + + if (index == 0) { + fsync(device_fd); + } +} + // Read from the device // If we are validating, the read occurs as though the relocations had happened std::vector relocatedRead(int device_fd, Relocations const& relocations, bool validating, @@ -390,7 +465,10 @@ Status cp_restoreCheckpoint(const std::string& blockDevice, int restore_limit) { log_sector_v1_0 original_ls; read(device_fd, reinterpret_cast(&original_ls), sizeof(original_ls)); - if (original_ls.magic != kMagic) { + if (original_ls.magic == kPartialRestoreMagic) { + validating = false; + action = "Restoring"; + } else if (original_ls.magic != kMagic) { LOG(ERROR) << "No magic"; return Status::fromExceptionCode(EINVAL, "No magic"); } @@ -398,10 +476,14 @@ Status cp_restoreCheckpoint(const std::string& blockDevice, int restore_limit) { LOG(INFO) << action << " " << original_ls.sequence << " log sectors"; for (int sequence = original_ls.sequence; sequence >= 0 && status.isOk(); sequence--) { - auto buffer = relocatedRead(device_fd, relocations, validating, 0, - original_ls.block_size, original_ls.block_size); - log_sector_v1_0 const& ls = *reinterpret_cast(&buffer[0]); - if (ls.magic != kMagic) { + auto ls_buffer = relocatedRead(device_fd, relocations, validating, 0, + original_ls.block_size, original_ls.block_size); + log_sector_v1_0& ls = *reinterpret_cast(&ls_buffer[0]); + + Used_Sectors used_sectors; + used_sectors[0] = false; + + if (ls.magic != kMagic && (ls.magic != kPartialRestoreMagic || validating)) { LOG(ERROR) << "No magic!"; status = Status::fromExceptionCode(EINVAL, "No magic"); break; @@ -423,10 +505,9 @@ Status cp_restoreCheckpoint(const std::string& blockDevice, int restore_limit) { } LOG(INFO) << action << " from log sector " << ls.sequence; - for (log_entry* le = - reinterpret_cast(&buffer[ls.header_size]) + ls.count - 1; - le >= reinterpret_cast(&buffer[ls.header_size]); --le) { + reinterpret_cast(&ls_buffer[ls.header_size]) + ls.count - 1; + le >= reinterpret_cast(&ls_buffer[ls.header_size]); --le) { // This is very noisy - limit to DEBUG only LOG(VERBOSE) << action << " " << le->size << " bytes from sector " << le->dest << " to " << le->source << " with checksum " << std::hex @@ -446,10 +527,9 @@ Status cp_restoreCheckpoint(const std::string& blockDevice, int restore_limit) { } if (validating) { - relocate(relocations, le->source, le->dest, le->size / kSectorSize); + relocate(relocations, le->source, le->dest, (le->size - 1) / kSectorSize + 1); } else { - lseek64(device_fd, le->source * kSectorSize, SEEK_SET); - write(device_fd, &buffer[0], le->size); + restoreSector(device_fd, used_sectors, ls_buffer, le, buffer); restore_count++; if (restore_limit && restore_count >= restore_limit) { LOG(WARNING) << "Hit the test limit";