libsnapshot: Cache Ops correctly

Change the meaning of batch_size_. Previously, a batch size of 200 meant 200 compressed data ops. With variable block size, each compressed data op can be up to 256k uncompressed size -> batch size meaning should be changed to 200 blocks of block size. With this being said, the default batch size can be increased to 4mb to better accomodate variable block size The way we calculate the number of blocks to compress at once also needs to be changed. Since there's no way of determining the comperssed data size ahead of time, allow overwriting the cache by batch_size_ and then flushing the cache as needed Bug: 322279333 Test: Ota on pixel and measuring system image cow Change-Id: Ie8e08d109dc5c3b4f5f36a740bbbcd37362a7ab3
2024-03-13 14:02:14 -07:00 · 2024-03-13 14:02:14 -07:00 · 5e8e488c13
commit 5e8e488c13
parent d7efbea407
3 changed files with 25 additions and 16 deletions
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h
@ -45,7 +45,7 @@ struct CowOptions {
    std::optional<uint64_t> max_blocks;

    // Number of CowOperations in a cluster. 0 for no clustering. Cannot be 1.
-    uint32_t cluster_ops = 200;
+    uint32_t cluster_ops = 1024;

    bool scratch_space = true;

--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp
@ -310,6 +310,14 @@ bool CowWriterV3::CheckOpCount(size_t op_count) {
    return true;
 }

+size_t CowWriterV3::CachedDataSize() const {
+    size_t size = 0;
+    for (const auto& i : cached_data_) {
+        size += i.size();
+    }
+    return size;
+}
+
 bool CowWriterV3::EmitCopy(uint64_t new_block, uint64_t old_block, uint64_t num_blocks) {
    if (!CheckOpCount(num_blocks)) {
        return false;
@ -342,7 +350,7 @@ bool CowWriterV3::NeedsFlush() const {
    // Allow bigger batch sizes for ops without data. A single CowOperationV3
    // struct uses 14 bytes of memory, even if we cache 200 * 16 ops in memory,
    // it's only ~44K.
-    return cached_data_.size() >= batch_size_ ||
+    return CachedDataSize() >= batch_size_ * header_.block_size ||
           cached_ops_.size() >= batch_size_ * kNonDataOpBufferSize;
 }

@ -397,13 +405,13 @@ bool CowWriterV3::EmitBlocks(uint64_t new_block_start, const void* data, size_t
        return false;
    }
    const auto bytes = reinterpret_cast<const uint8_t*>(data);
-    const size_t num_blocks = (size / header_.block_size);
-    for (size_t i = 0; i < num_blocks;) {
-        const size_t blocks_to_write =
-                std::min<size_t>(batch_size_ - cached_data_.size(), num_blocks - i);
-
-        if (!ConstructCowOpCompressedBuffers(new_block_start + i, bytes + header_.block_size * i,
-                                             old_block + i, offset, type, blocks_to_write)) {
+    size_t num_blocks = (size / header_.block_size);
+    size_t total_written = 0;
+    while (total_written < num_blocks) {
+        size_t chunk = std::min(num_blocks - total_written, batch_size_);
+        if (!ConstructCowOpCompressedBuffers(new_block_start + total_written,
+                                             bytes + header_.block_size * total_written,
+                                             old_block + total_written, offset, type, chunk)) {
            return false;
        }

@ -413,8 +421,7 @@ bool CowWriterV3::EmitBlocks(uint64_t new_block_start, const void* data, size_t
                       << ", op type: " << type;
            return false;
        }
-
-        i += blocks_to_write;
+        total_written += chunk;
    }

    return true;
@ -482,7 +489,8 @@ bool CowWriterV3::EmitSequenceData(size_t num_ops, const uint32_t* data) {

    header_.sequence_data_count = num_ops;

-    // Ensure next_data_pos_ is updated as previously initialized + the newly added sequence buffer.
+    // Ensure next_data_pos_ is updated as previously initialized + the newly added sequence
+    // buffer.
    CHECK_EQ(next_data_pos_ + header_.sequence_data_count * sizeof(uint32_t),
             GetDataOffset(header_));
    next_data_pos_ = GetDataOffset(header_);
@ -640,8 +648,8 @@ std::vector<CowWriterV3::CompressedBuffer> CowWriterV3::ProcessBlocksWithThreade
    //                   t1     t2     t1     t2    <- processed by these threads
    // Ordering is important here. We need to retrieve the compressed data in the same order we
    // processed it and assume that that we submit data beginning with the first thread and then
-    // round robin the consecutive data calls. We need to Fetch compressed buffers from the threads
-    // via the same ordering
+    // round robin the consecutive data calls. We need to Fetch compressed buffers from the
+    // threads via the same ordering
    for (size_t i = 0; i < compressed_vec.size(); i++) {
        compressed_buf.emplace_back(worker_buffers[i % num_threads][i / num_threads]);
    }
@ -733,7 +741,8 @@ bool CowWriterV3::WriteOperation(std::span<const CowOperationV3> ops,
        }
        if (total_written != total_data_size) {
            PLOG(ERROR) << "write failed for data of size: " << data.size()
-                        << " at offset: " << next_data_pos_ << " " << errno;
+                        << " at offset: " << next_data_pos_ << " " << errno
+                        << ", only wrote: " << total_written;
            return false;
        }
    }
--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h
@ -94,7 +94,7 @@ class CowWriterV3 : public CowWriterBase {
        }
        return false;
    }
-
+    size_t CachedDataSize() const;
    bool ReadBackVerification();
    bool FlushCacheOps();
    void InitWorkers();