libsnapshot:VABC: Allow batch merge

Kernel will batch the merge operations only when
block numbers of source and cow device are contiguous.

Daemon will read the COW file and post-process
the operations so that kernel can batch merge the potential
operations.

There are two key changes done in post-processing:

1: COW file contains all the copy operations at the
beginning of the file. We cannot allow batch
merging of COPY operations as a crash in between
the overlapping copies can result in a corrupted state.
Hence, allow copying individual blocks at a time.

2: Replace and Zero operations can be batch merged.
However, reading our existing COW format as-is
will not allow kernel to batch merge. Hence,
process the operations in such a way that kernel can batch
merge them.

Furthermore, it is observed that sync() after every
merge is a overhead. Hence, we will only sync()
for copy operations only. For replace and zero
operations, we will not explicitly sync. This is ok as
crash in between replace/zero merge operations can
redo those operations. However for copy, we have
to make sure that sync is completed before next copy
operation is initiated.

Merge time of a full OTA on bramble is around ~60
seconds as compared to ~10+ minutes prior to this
optimization.

Note that we still have copy operations which are not
batch merged. Hence, OTA with significant number of
copy operations can still have overhead on merge timings.

Bug: 174112589

Test: vts_libsnapshot, cow_snapuserd_test
Full OTA on bramble.

Signed-off-by: Akilesh Kailash <akailash@google.com>
Change-Id: I1dc286067a26ea399fa5d4e8e826e5622ce3fa58
This commit is contained in:
Akilesh Kailash 2020-11-26 03:56:59 +00:00
parent 333639e952
commit 37641374e8
6 changed files with 198 additions and 108 deletions

View file

@ -189,29 +189,138 @@ bool CowReader::ParseOps(std::optional<uint64_t> label) {
LOG(INFO) << "No COW Footer, recovered data";
}
if (header_.num_merge_ops > 0) {
uint64_t merge_ops = header_.num_merge_ops;
uint64_t metadata_ops = 0;
uint64_t current_op_num = 0;
CHECK(ops_buffer->size() >= merge_ops);
while (merge_ops) {
auto& current_op = ops_buffer->data()[current_op_num];
if (current_op.type == kCowLabelOp || current_op.type == kCowFooterOp) {
metadata_ops += 1;
} else {
merge_ops -= 1;
}
current_op_num += 1;
}
ops_buffer->erase(ops_buffer.get()->begin(),
ops_buffer.get()->begin() + header_.num_merge_ops + metadata_ops);
}
ops_ = ops_buffer;
return true;
}
void CowReader::InitializeMerge() {
uint64_t num_copy_ops = 0;
// Remove all the metadata operations
ops_->erase(std::remove_if(ops_.get()->begin(), ops_.get()->end(),
[](CowOperation& op) {
return (op.type == kCowFooterOp || op.type == kCowLabelOp);
}),
ops_.get()->end());
// We will re-arrange the vector in such a way that
// kernel can batch merge. Ex:
//
// Existing COW format; All the copy operations
// are at the beginning.
// =======================================
// Copy-op-1 - cow_op->new_block = 1
// Copy-op-2 - cow_op->new_block = 2
// Copy-op-3 - cow_op->new_block = 3
// Replace-op-4 - cow_op->new_block = 6
// Replace-op-5 - cow_op->new_block = 4
// Replace-op-6 - cow_op->new_block = 8
// Replace-op-7 - cow_op->new_block = 9
// Zero-op-8 - cow_op->new_block = 7
// Zero-op-9 - cow_op->new_block = 5
// =======================================
//
// First find the operation which isn't a copy-op
// and then sort all the operations in descending order
// with the key being cow_op->new_block (source block)
//
// The data-structure will look like:
//
// =======================================
// Copy-op-1 - cow_op->new_block = 1
// Copy-op-2 - cow_op->new_block = 2
// Copy-op-3 - cow_op->new_block = 3
// Replace-op-7 - cow_op->new_block = 9
// Replace-op-6 - cow_op->new_block = 8
// Zero-op-8 - cow_op->new_block = 7
// Replace-op-4 - cow_op->new_block = 6
// Zero-op-9 - cow_op->new_block = 5
// Replace-op-5 - cow_op->new_block = 4
// =======================================
//
// Daemon will read the above data-structure in reverse-order
// when reading metadata. Thus, kernel will get the metadata
// in the following order:
//
// ========================================
// Replace-op-5 - cow_op->new_block = 4
// Zero-op-9 - cow_op->new_block = 5
// Replace-op-4 - cow_op->new_block = 6
// Zero-op-8 - cow_op->new_block = 7
// Replace-op-6 - cow_op->new_block = 8
// Replace-op-7 - cow_op->new_block = 9
// Copy-op-3 - cow_op->new_block = 3
// Copy-op-2 - cow_op->new_block = 2
// Copy-op-1 - cow_op->new_block = 1
// ===========================================
//
// When merging begins, kernel will start from the last
// metadata which was read: In the above format, Copy-op-1
// will be the first merge operation.
//
// Now, batching of the merge operations happens only when
// 1: origin block numbers in the base device are contiguous
// (cow_op->new_block) and,
// 2: cow block numbers which are assigned by daemon in ReadMetadata()
// are contiguous. These are monotonically increasing numbers.
//
// When both (1) and (2) are true, kernel will batch merge the operations.
// However, we do not want copy operations to be batch merged as
// a crash or system reboot during an overlapping copy can drive the device
// to a corrupted state. Hence, merging of copy operations should always be
// done as a individual 4k block. In the above case, since the
// cow_op->new_block numbers are contiguous, we will ensure that the
// cow block numbers assigned in ReadMetadata() for these respective copy
// operations are not contiguous forcing kernel to issue merge for each
// copy operations without batch merging.
//
// For all the other operations viz. Replace and Zero op, the cow block
// numbers assigned by daemon will be contiguous allowing kernel to batch
// merge.
//
// The final format after assiging COW block numbers by the daemon will
// look something like:
//
// =========================================================
// Replace-op-5 - cow_op->new_block = 4 cow-block-num = 2
// Zero-op-9 - cow_op->new_block = 5 cow-block-num = 3
// Replace-op-4 - cow_op->new_block = 6 cow-block-num = 4
// Zero-op-8 - cow_op->new_block = 7 cow-block-num = 5
// Replace-op-6 - cow_op->new_block = 8 cow-block-num = 6
// Replace-op-7 - cow_op->new_block = 9 cow-block-num = 7
// Copy-op-3 - cow_op->new_block = 3 cow-block-num = 9
// Copy-op-2 - cow_op->new_block = 2 cow-block-num = 11
// Copy-op-1 - cow_op->new_block = 1 cow-block-num = 13
// ==========================================================
//
// Merge sequence will look like:
//
// Merge-1 - Copy-op-1
// Merge-2 - Copy-op-2
// Merge-3 - Copy-op-3
// Merge-4 - Batch-merge {Replace-op-7, Replace-op-6, Zero-op-8,
// Replace-op-4, Zero-op-9, Replace-op-5 }
//==============================================================
for (uint64_t i = 0; i < ops_->size(); i++) {
auto& current_op = ops_->data()[i];
if (current_op.type != kCowCopyOp) {
break;
}
num_copy_ops += 1;
}
std::sort(ops_.get()->begin() + num_copy_ops, ops_.get()->end(),
[](CowOperation& op1, CowOperation& op2) -> bool {
return op1.new_block > op2.new_block;
});
if (header_.num_merge_ops > 0) {
CHECK(ops_->size() >= header_.num_merge_ops);
ops_->erase(ops_.get()->begin(), ops_.get()->begin() + header_.num_merge_ops);
}
}
bool CowReader::GetHeader(CowHeader* header) {
*header = header_;
return true;

View file

@ -421,7 +421,7 @@ bool CowWriter::Sync() {
return true;
}
bool CowWriter::CommitMerge(int merged_ops) {
bool CowWriter::CommitMerge(int merged_ops, bool sync) {
CHECK(merge_in_progress_);
header_.num_merge_ops += merged_ops;
@ -436,7 +436,11 @@ bool CowWriter::CommitMerge(int merged_ops) {
return false;
}
return Sync();
// Sync only for merging of copy operations.
if (sync) {
return Sync();
}
return true;
}
bool CowWriter::Truncate(off_t length) {

View file

@ -140,6 +140,8 @@ class CowReader : public ICowReader {
void UpdateMergeProgress(uint64_t merge_ops) { header_.num_merge_ops += merge_ops; }
void InitializeMerge();
private:
bool ParseOps(std::optional<uint64_t> label);

View file

@ -98,7 +98,7 @@ class CowWriter : public ICowWriter {
bool InitializeAppend(android::base::borrowed_fd fd, uint64_t label);
void InitializeMerge(android::base::borrowed_fd fd, CowHeader* header);
bool CommitMerge(int merged_ops);
bool CommitMerge(int merged_ops, bool sync);
bool Finalize() override;

View file

@ -81,7 +81,7 @@ class Snapuserd final {
bool ReadDiskExceptions(chunk_t chunk, size_t size);
bool ReadData(chunk_t chunk, size_t size);
bool IsChunkIdMetadata(chunk_t chunk);
chunk_t GetNextAllocatableChunkId(chunk_t chunk);
chunk_t GetNextAllocatableChunkId(chunk_t chunk_id);
bool ProcessReplaceOp(const CowOperation* cow_op);
bool ProcessCopyOp(const CowOperation* cow_op);
@ -90,8 +90,7 @@ class Snapuserd final {
loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer,
int* unmerged_exceptions);
int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
int unmerged_exceptions);
bool AdvanceMergedOps(int merged_ops_cur_iter);
int unmerged_exceptions, bool* copy_op);
bool ProcessMergeComplete(chunk_t chunk, void* buffer);
sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }

View file

@ -31,7 +31,7 @@ using android::base::unique_fd;
#define SNAP_LOG(level) LOG(level) << misc_name_ << ": "
#define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "
static constexpr size_t PAYLOAD_SIZE = (1UL << 16);
static constexpr size_t PAYLOAD_SIZE = (1UL << 20);
static_assert(PAYLOAD_SIZE >= BLOCK_SIZE);
@ -156,11 +156,11 @@ bool Snapuserd::ReadData(chunk_t chunk, size_t size) {
size_t read_size = size;
bool ret = true;
chunk_t chunk_key = chunk;
uint32_t stride;
lldiv_t divresult;
// Size should always be aligned
CHECK((read_size & (BLOCK_SIZE - 1)) == 0);
if (!((read_size & (BLOCK_SIZE - 1)) == 0)) {
SNAP_LOG(ERROR) << "ReadData - unaligned read_size: " << read_size;
return false;
}
while (read_size > 0) {
const CowOperation* cow_op = chunk_map_[chunk_key];
@ -204,24 +204,8 @@ bool Snapuserd::ReadData(chunk_t chunk, size_t size) {
// are contiguous
chunk_key += 1;
if (cow_op->type == kCowCopyOp) CHECK(read_size == 0);
// This is similar to the way when chunk IDs were assigned
// in ReadMetadata().
//
// Skip if the chunk id represents a metadata chunk.
stride = exceptions_per_area_ + 1;
divresult = lldiv(chunk_key, stride);
if (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS) {
// Crossing exception boundary. Kernel will never
// issue IO which is spanning between a data chunk
// and a metadata chunk. This should be perfectly aligned.
//
// Since the input read_size is 4k aligned, we will
// always end up reading all 256 data chunks in one area.
// Thus, every multiple of 4K IO represents 256 data chunks
if (cow_op->type == kCowCopyOp) {
CHECK(read_size == 0);
break;
}
}
@ -330,7 +314,7 @@ loff_t Snapuserd::GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer
}
int Snapuserd::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
int unmerged_exceptions) {
int unmerged_exceptions, bool* copy_op) {
int merged_ops_cur_iter = 0;
// Find the operations which are merged in this cycle.
@ -346,6 +330,12 @@ int Snapuserd::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer,
if (cow_de->new_chunk != 0) {
merged_ops_cur_iter += 1;
offset += sizeof(struct disk_exception);
const CowOperation* cow_op = chunk_map_[cow_de->new_chunk];
CHECK(cow_op != nullptr);
CHECK(cow_op->new_block == cow_de->old_chunk);
if (cow_op->type == kCowCopyOp) {
*copy_op = true;
}
// zero out to indicate that operation is merged.
cow_de->old_chunk = 0;
cow_de->new_chunk = 0;
@ -367,44 +357,12 @@ int Snapuserd::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer,
}
}
if (*copy_op) {
CHECK(merged_ops_cur_iter == 1);
}
return merged_ops_cur_iter;
}
bool Snapuserd::AdvanceMergedOps(int merged_ops_cur_iter) {
// Advance the merge operation pointer in the
// vector.
//
// cowop_iter_ is already initialized in ReadMetadata(). Just resume the
// merge process
while (!cowop_iter_->Done() && merged_ops_cur_iter) {
const CowOperation* cow_op = &cowop_iter_->Get();
CHECK(cow_op != nullptr);
if (cow_op->type == kCowFooterOp || cow_op->type == kCowLabelOp) {
cowop_iter_->Next();
continue;
}
if (!(cow_op->type == kCowReplaceOp || cow_op->type == kCowZeroOp ||
cow_op->type == kCowCopyOp)) {
SNAP_LOG(ERROR) << "Unknown operation-type found during merge: " << cow_op->type;
return false;
}
merged_ops_cur_iter -= 1;
SNAP_LOG(DEBUG) << "Merge op found of type " << cow_op->type
<< "Pending-merge-ops: " << merged_ops_cur_iter;
cowop_iter_->Next();
}
if (cowop_iter_->Done()) {
CHECK(merged_ops_cur_iter == 0);
SNAP_LOG(DEBUG) << "All cow operations merged successfully in this cycle";
}
return true;
}
bool Snapuserd::ProcessMergeComplete(chunk_t chunk, void* buffer) {
uint32_t stride = exceptions_per_area_ + 1;
CowHeader header;
@ -423,21 +381,47 @@ bool Snapuserd::ProcessMergeComplete(chunk_t chunk, void* buffer) {
int unmerged_exceptions = 0;
loff_t offset = GetMergeStartOffset(buffer, vec_[divresult.quot].get(), &unmerged_exceptions);
int merged_ops_cur_iter =
GetNumberOfMergedOps(buffer, vec_[divresult.quot].get(), offset, unmerged_exceptions);
bool copy_op = false;
// Check if the merged operation is a copy operation. If so, then we need
// to explicitly sync the metadata before initiating the next merge.
// For ex: Consider a following sequence of copy operations in the COW file:
//
// Op-1: Copy 2 -> 3
// Op-2: Copy 1 -> 2
// Op-3: Copy 5 -> 10
//
// Op-1 and Op-2 are overlapping copy operations. The merge sequence will
// look like:
//
// Merge op-1: Copy 2 -> 3
// Merge op-2: Copy 1 -> 2
// Merge op-3: Copy 5 -> 10
//
// Now, let's say we have a crash _after_ Merge op-2; Block 2 contents would
// have been over-written by Block-1 after merge op-2. During next reboot,
// kernel will request the metadata for all the un-merged blocks. If we had
// not sync the metadata after Merge-op 1 and Merge op-2, snapuser daemon
// will think that these merge operations are still pending and hence will
// inform the kernel that Op-1 and Op-2 are un-merged blocks. When kernel
// resumes back the merging process, it will attempt to redo the Merge op-1
// once again. However, block 2 contents are wrong as it has the contents
// of block 1 from previous merge cycle. Although, merge will silently succeed,
// this will lead to silent data corruption.
//
int merged_ops_cur_iter = GetNumberOfMergedOps(buffer, vec_[divresult.quot].get(), offset,
unmerged_exceptions, &copy_op);
// There should be at least one operation merged in this cycle
CHECK(merged_ops_cur_iter > 0);
if (!AdvanceMergedOps(merged_ops_cur_iter)) return false;
header.num_merge_ops += merged_ops_cur_iter;
reader_->UpdateMergeProgress(merged_ops_cur_iter);
if (!writer_->CommitMerge(merged_ops_cur_iter)) {
if (!writer_->CommitMerge(merged_ops_cur_iter, copy_op)) {
SNAP_LOG(ERROR) << "CommitMerge failed...";
return false;
}
SNAP_LOG(DEBUG) << "Merge success";
SNAP_LOG(DEBUG) << "Merge success: " << merged_ops_cur_iter << "chunk: " << chunk;
return true;
}
@ -532,6 +516,7 @@ bool Snapuserd::ReadMetadata() {
CHECK(header.block_size == BLOCK_SIZE);
SNAP_LOG(DEBUG) << "Merge-ops: " << header.num_merge_ops;
reader_->InitializeMerge();
writer_ = std::make_unique<CowWriter>(options);
writer_->InitializeMerge(cow_fd_.get(), &header);
@ -543,7 +528,8 @@ bool Snapuserd::ReadMetadata() {
// Start from chunk number 2. Chunk 0 represents header and chunk 1
// represents first metadata page.
chunk_t next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1;
chunk_t data_chunk_id = NUM_SNAPSHOT_HDR_CHUNKS + 1;
size_t num_ops = 0;
loff_t offset = 0;
std::unique_ptr<uint8_t[]> de_ptr =
@ -553,7 +539,6 @@ bool Snapuserd::ReadMetadata() {
// is 0. When Area is not filled completely with all 256 exceptions,
// this memset will ensure that metadata read is completed.
memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
size_t num_ops = 0;
while (!cowop_riter_->Done()) {
const CowOperation* cow_op = &cowop_riter_->Get();
@ -565,31 +550,23 @@ bool Snapuserd::ReadMetadata() {
continue;
}
if (!(cow_op->type == kCowReplaceOp || cow_op->type == kCowZeroOp ||
cow_op->type == kCowCopyOp)) {
SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
return false;
}
metadata_found = true;
if ((cow_op->type == kCowCopyOp || prev_copy_op)) {
next_free = GetNextAllocatableChunkId(next_free);
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
}
prev_copy_op = (cow_op->type == kCowCopyOp);
// Construct the disk-exception
de->old_chunk = cow_op->new_block;
de->new_chunk = next_free;
de->new_chunk = data_chunk_id;
SNAP_LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk;
// Store operation pointer.
chunk_map_[next_free] = cow_op;
chunk_map_[data_chunk_id] = cow_op;
num_ops += 1;
offset += sizeof(struct disk_exception);
cowop_riter_->Next();
if (num_ops == exceptions_per_area_) {
@ -610,7 +587,7 @@ bool Snapuserd::ReadMetadata() {
}
}
next_free = GetNextAllocatableChunkId(next_free);
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
}
// Partially filled area or there is no metadata
@ -622,14 +599,11 @@ bool Snapuserd::ReadMetadata() {
<< "Areas : " << vec_.size();
}
SNAP_LOG(DEBUG) << "ReadMetadata() completed. chunk_id: " << next_free
<< "Num Sector: " << ChunkToSector(next_free);
// Initialize the iterator for merging
cowop_iter_ = reader_->GetOpIter();
SNAP_LOG(DEBUG) << "ReadMetadata() completed. Final_chunk_id: " << data_chunk_id
<< "Num Sector: " << ChunkToSector(data_chunk_id);
// Total number of sectors required for creating dm-user device
num_sectors_ = ChunkToSector(next_free);
num_sectors_ = ChunkToSector(data_chunk_id);
metadata_read_done_ = true;
return true;
}
@ -759,6 +733,8 @@ bool Snapuserd::Run() {
<< "Sector: " << header->sector;
}
} else {
SNAP_LOG(DEBUG) << "ReadData: chunk: " << chunk << " len: " << header->len
<< " read_size: " << read_size << " offset: " << offset;
chunk_t num_chunks_read = (offset >> BLOCK_SHIFT);
if (!ReadData(chunk + num_chunks_read, read_size)) {
SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk