Merge "snapuserd: Update verification" am: 2ba3ae310e

Original change: https://android-review.googlesource.com/c/platform/system/core/+/2038964

Change-Id: If13a32e7719876f4160803eba9cf157575fd367b
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
Akilesh Kailash 2022-06-09 23:03:33 +00:00 committed by Automerger Merge Worker
commit 62482af6a1
8 changed files with 320 additions and 239 deletions

View file

@ -47,6 +47,7 @@ cc_library_static {
"libbase",
"liblog",
],
export_include_dirs: ["include"],
ramdisk_available: true,
}
@ -68,6 +69,7 @@ cc_defaults {
"user-space-merge/snapuserd_readahead.cpp",
"user-space-merge/snapuserd_transitions.cpp",
"user-space-merge/snapuserd_server.cpp",
"user-space-merge/snapuserd_verify.cpp",
],
cflags: [
@ -88,6 +90,11 @@ cc_defaults {
"libext4_utils",
"liburing",
],
header_libs: [
"libstorage_literals_headers",
],
include_dirs: ["bionic/libc/kernel"],
system_shared_libs: [],

View file

@ -89,6 +89,10 @@ class SnapuserdClient {
// Return the status of the snapshot
std::string QuerySnapshotStatus(const std::string& misc_name);
// Check the update verification status - invoked by update_verifier during
// boot
bool QueryUpdateVerification();
};
} // namespace snapshot

View file

@ -269,5 +269,15 @@ std::string SnapuserdClient::QuerySnapshotStatus(const std::string& misc_name) {
return Receivemsg();
}
bool SnapuserdClient::QueryUpdateVerification() {
std::string msg = "update-verify";
if (!Sendmsg(msg)) {
LOG(ERROR) << "Failed to send message " << msg << " to snapuserd";
return false;
}
std::string response = Receivemsg();
return response == "success";
}
} // namespace snapshot
} // namespace android

View file

@ -18,6 +18,7 @@
#include <sys/utsname.h>
#include <android-base/chrono_utils.h>
#include <android-base/properties.h>
#include <android-base/scopeguard.h>
#include <android-base/strings.h>
@ -70,6 +71,9 @@ bool SnapshotHandler::InitializeWorkers() {
read_ahead_thread_ = std::make_unique<ReadAhead>(cow_device_, backing_store_device_, misc_name_,
GetSharedPtr());
update_verify_ = std::make_unique<UpdateVerify>(misc_name_);
return true;
}
@ -306,206 +310,6 @@ bool SnapshotHandler::InitCowDevice() {
return ReadMetadata();
}
void SnapshotHandler::FinalizeIouring() {
io_uring_queue_exit(ring_.get());
}
bool SnapshotHandler::InitializeIouring(int io_depth) {
ring_ = std::make_unique<struct io_uring>();
int ret = io_uring_queue_init(io_depth, ring_.get(), 0);
if (ret) {
LOG(ERROR) << "io_uring_queue_init failed with ret: " << ret;
return false;
}
LOG(INFO) << "io_uring_queue_init success with io_depth: " << io_depth;
return true;
}
bool SnapshotHandler::ReadBlocksAsync(const std::string& dm_block_device,
const std::string& partition_name, size_t size) {
// 64k block size with io_depth of 64 is optimal
// for a single thread. We just need a single thread
// to read all the blocks from all dynamic partitions.
size_t io_depth = 64;
size_t bs = (64 * 1024);
if (!InitializeIouring(io_depth)) {
return false;
}
LOG(INFO) << "ReadBlockAsync start "
<< " Block-device: " << dm_block_device << " Partition-name: " << partition_name
<< " Size: " << size;
auto scope_guard = android::base::make_scope_guard([this]() -> void { FinalizeIouring(); });
std::vector<std::unique_ptr<struct iovec>> vecs;
using AlignedBuf = std::unique_ptr<void, decltype(free)*>;
std::vector<AlignedBuf> alignedBufVector;
/*
* TODO: We need aligned memory for DIRECT-IO. However, if we do
* a DIRECT-IO and verify the blocks then we need to inform
* update-verifier that block verification has been done and
* there is no need to repeat the same. We are not there yet
* as we need to see if there are any boot time improvements doing
* a DIRECT-IO.
*
* Also, we could you the same function post merge for block verification;
* again, we can do a DIRECT-IO instead of thrashing page-cache and
* hurting other applications.
*
* For now, we will just create aligned buffers but rely on buffered
* I/O until we have perf numbers to justify DIRECT-IO.
*/
for (int i = 0; i < io_depth; i++) {
auto iovec = std::make_unique<struct iovec>();
vecs.push_back(std::move(iovec));
struct iovec* iovec_ptr = vecs[i].get();
if (posix_memalign(&iovec_ptr->iov_base, BLOCK_SZ, bs)) {
LOG(ERROR) << "posix_memalign failed";
return false;
}
iovec_ptr->iov_len = bs;
alignedBufVector.push_back(
std::unique_ptr<void, decltype(free)*>(iovec_ptr->iov_base, free));
}
android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(dm_block_device.c_str(), O_RDONLY)));
if (fd.get() == -1) {
SNAP_PLOG(ERROR) << "File open failed - block-device " << dm_block_device
<< " partition-name: " << partition_name;
return false;
}
loff_t offset = 0;
size_t remain = size;
size_t read_sz = io_depth * bs;
while (remain > 0) {
size_t to_read = std::min(remain, read_sz);
size_t queue_size = to_read / bs;
for (int i = 0; i < queue_size; i++) {
struct io_uring_sqe* sqe = io_uring_get_sqe(ring_.get());
if (!sqe) {
SNAP_LOG(ERROR) << "io_uring_get_sqe() failed";
return false;
}
struct iovec* iovec_ptr = vecs[i].get();
io_uring_prep_read(sqe, fd.get(), iovec_ptr->iov_base, iovec_ptr->iov_len, offset);
sqe->flags |= IOSQE_ASYNC;
offset += bs;
}
int ret = io_uring_submit(ring_.get());
if (ret != queue_size) {
SNAP_LOG(ERROR) << "submit got: " << ret << " wanted: " << queue_size;
return false;
}
for (int i = 0; i < queue_size; i++) {
struct io_uring_cqe* cqe;
int ret = io_uring_wait_cqe(ring_.get(), &cqe);
if (ret) {
SNAP_PLOG(ERROR) << "wait_cqe failed" << ret;
return false;
}
if (cqe->res < 0) {
SNAP_LOG(ERROR) << "io failed with res: " << cqe->res;
return false;
}
io_uring_cqe_seen(ring_.get(), cqe);
}
remain -= to_read;
}
LOG(INFO) << "ReadBlockAsync complete: "
<< " Block-device: " << dm_block_device << " Partition-name: " << partition_name
<< " Size: " << size;
return true;
}
void SnapshotHandler::ReadBlocksToCache(const std::string& dm_block_device,
const std::string& partition_name, off_t offset,
size_t size) {
android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(dm_block_device.c_str(), O_RDONLY)));
if (fd.get() == -1) {
SNAP_PLOG(ERROR) << "Error reading " << dm_block_device
<< " partition-name: " << partition_name;
return;
}
size_t remain = size;
off_t file_offset = offset;
// We pick 4M I/O size based on the fact that the current
// update_verifier has a similar I/O size.
size_t read_sz = 1024 * BLOCK_SZ;
std::vector<uint8_t> buf(read_sz);
while (remain > 0) {
size_t to_read = std::min(remain, read_sz);
if (!android::base::ReadFullyAtOffset(fd.get(), buf.data(), to_read, file_offset)) {
SNAP_PLOG(ERROR) << "Failed to read block from block device: " << dm_block_device
<< " at offset: " << file_offset
<< " partition-name: " << partition_name << " total-size: " << size
<< " remain_size: " << remain;
return;
}
file_offset += to_read;
remain -= to_read;
}
SNAP_LOG(INFO) << "Finished reading block-device: " << dm_block_device
<< " partition: " << partition_name << " size: " << size
<< " offset: " << offset;
}
void SnapshotHandler::ReadBlocks(const std::string partition_name,
const std::string& dm_block_device) {
SNAP_LOG(DEBUG) << "Reading partition: " << partition_name
<< " Block-Device: " << dm_block_device;
uint64_t dev_sz = 0;
unique_fd fd(TEMP_FAILURE_RETRY(open(dm_block_device.c_str(), O_RDONLY | O_CLOEXEC)));
if (fd < 0) {
SNAP_LOG(ERROR) << "Cannot open block device";
return;
}
dev_sz = get_block_device_size(fd.get());
if (!dev_sz) {
SNAP_PLOG(ERROR) << "Could not determine block device size: " << dm_block_device;
return;
}
int num_threads = 2;
size_t num_blocks = dev_sz >> BLOCK_SHIFT;
size_t num_blocks_per_thread = num_blocks / num_threads;
size_t read_sz_per_thread = num_blocks_per_thread << BLOCK_SHIFT;
off_t offset = 0;
for (int i = 0; i < num_threads; i++) {
std::async(std::launch::async, &SnapshotHandler::ReadBlocksToCache, this, dm_block_device,
partition_name, offset, read_sz_per_thread);
offset += read_sz_per_thread;
}
}
/*
* Entry point to launch threads
*/
@ -526,42 +330,22 @@ bool SnapshotHandler::Start() {
std::async(std::launch::async, &Worker::RunThread, worker_threads_[i].get()));
}
bool second_stage_init = true;
bool partition_verification = true;
// We don't want to read the blocks during first stage init.
// We don't want to read the blocks during first stage init or
// during post-install phase.
if (android::base::EndsWith(misc_name_, "-init") || is_socket_present_) {
second_stage_init = false;
}
if (second_stage_init) {
SNAP_LOG(INFO) << "Reading blocks to cache....";
auto& dm = DeviceMapper::Instance();
auto dm_block_devices = dm.FindDmPartitions();
if (dm_block_devices.empty()) {
SNAP_LOG(ERROR) << "No dm-enabled block device is found.";
} else {
auto parts = android::base::Split(misc_name_, "-");
std::string partition_name = parts[0];
const char* suffix_b = "_b";
const char* suffix_a = "_a";
partition_name.erase(partition_name.find_last_not_of(suffix_b) + 1);
partition_name.erase(partition_name.find_last_not_of(suffix_a) + 1);
if (dm_block_devices.find(partition_name) == dm_block_devices.end()) {
SNAP_LOG(ERROR) << "Failed to find dm block device for " << partition_name;
} else {
ReadBlocks(partition_name, dm_block_devices.at(partition_name));
}
}
} else {
SNAP_LOG(INFO) << "Not reading block device into cache";
partition_verification = false;
}
std::future<bool> merge_thread =
std::async(std::launch::async, &Worker::RunMergeThread, merge_thread_.get());
// Now that the worker threads are up, scan the partitions.
if (partition_verification) {
update_verify_->VerifyUpdatePartition();
}
bool ret = true;
for (auto& t : threads) {
ret = t.get() && ret;

View file

@ -42,12 +42,14 @@
#include <liburing.h>
#include <snapuserd/snapuserd_buffer.h>
#include <snapuserd/snapuserd_kernel.h>
#include <storage_literals/storage_literals.h>
namespace android {
namespace snapshot {
using android::base::unique_fd;
using namespace std::chrono_literals;
using namespace android::storage_literals;
static constexpr size_t PAYLOAD_BUFFER_SZ = (1UL << 20);
static_assert(PAYLOAD_BUFFER_SZ >= BLOCK_SZ);
@ -165,6 +167,36 @@ class ReadAhead {
std::unique_ptr<struct io_uring> ring_;
};
class UpdateVerify {
public:
UpdateVerify(const std::string& misc_name);
void VerifyUpdatePartition();
bool CheckPartitionVerification();
private:
enum class UpdateVerifyState {
VERIFY_UNKNOWN,
VERIFY_FAILED,
VERIFY_SUCCESS,
};
std::string misc_name_;
UpdateVerifyState state_;
std::mutex m_lock_;
std::condition_variable m_cv_;
int kMinThreadsToVerify = 1;
int kMaxThreadsToVerify = 4;
uint64_t kThresholdSize = 512_MiB;
uint64_t kBlockSizeVerify = 1_MiB;
bool IsBlockAligned(uint64_t read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
void UpdatePartitionVerificationState(UpdateVerifyState state);
bool VerifyPartition(const std::string& partition_name, const std::string& dm_block_device);
bool VerifyBlocks(const std::string& partition_name, const std::string& dm_block_device,
off_t offset, int skip_blocks, uint64_t dev_sz);
};
class Worker {
public:
Worker(const std::string& cow_device, const std::string& backing_device,
@ -344,24 +376,16 @@ class SnapshotHandler : public std::enable_shared_from_this<SnapshotHandler> {
MERGE_GROUP_STATE ProcessMergingBlock(uint64_t new_block, void* buffer);
bool IsIouringSupported();
bool CheckPartitionVerification() { return update_verify_->CheckPartitionVerification(); }
private:
bool ReadMetadata();
sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
bool IsBlockAligned(uint64_t read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
struct BufferState* GetBufferState();
void UpdateMergeCompletionPercentage();
void ReadBlocks(const std::string partition_name, const std::string& dm_block_device);
void ReadBlocksToCache(const std::string& dm_block_device, const std::string& partition_name,
off_t offset, size_t size);
bool InitializeIouring(int io_depth);
void FinalizeIouring();
bool ReadBlocksAsync(const std::string& dm_block_device, const std::string& partition_name,
size_t size);
// COW device
std::string cow_device_;
// Source device
@ -413,6 +437,7 @@ class SnapshotHandler : public std::enable_shared_from_this<SnapshotHandler> {
bool scratch_space_ = false;
std::unique_ptr<struct io_uring> ring_;
std::unique_ptr<UpdateVerify> update_verify_;
};
} // namespace snapshot

View file

@ -55,6 +55,7 @@ DaemonOps UserSnapshotServer::Resolveop(std::string& input) {
if (input == "initiate_merge") return DaemonOps::INITIATE;
if (input == "merge_percent") return DaemonOps::PERCENTAGE;
if (input == "getstatus") return DaemonOps::GETSTATUS;
if (input == "update-verify") return DaemonOps::UPDATE_VERIFY;
return DaemonOps::INVALID;
}
@ -282,6 +283,14 @@ bool UserSnapshotServer::Receivemsg(android::base::borrowed_fd fd, const std::st
return Sendmsg(fd, merge_status);
}
}
case DaemonOps::UPDATE_VERIFY: {
std::lock_guard<std::mutex> lock(lock_);
if (!UpdateVerification(&lock)) {
return Sendmsg(fd, "fail");
}
return Sendmsg(fd, "success");
}
default: {
LOG(ERROR) << "Received unknown message type from client";
Sendmsg(fd, "fail");
@ -687,5 +696,22 @@ bool UserSnapshotServer::RunForSocketHandoff() {
return true;
}
bool UserSnapshotServer::UpdateVerification(std::lock_guard<std::mutex>* proof_of_lock) {
CHECK(proof_of_lock);
bool status = true;
for (auto iter = dm_users_.begin(); iter != dm_users_.end(); iter++) {
auto& th = (*iter)->thread();
if (th.joinable() && status) {
status = (*iter)->snapuserd()->CheckPartitionVerification() && status;
} else {
// return immediately if there is a failure
return false;
}
}
return status;
}
} // namespace snapshot
} // namespace android

View file

@ -46,6 +46,7 @@ enum class DaemonOps {
INITIATE,
PERCENTAGE,
GETSTATUS,
UPDATE_VERIFY,
INVALID,
};
@ -118,6 +119,8 @@ class UserSnapshotServer {
double GetMergePercentage(std::lock_guard<std::mutex>* proof_of_lock);
void TerminateMergeThreads(std::lock_guard<std::mutex>* proof_of_lock);
bool UpdateVerification(std::lock_guard<std::mutex>* proof_of_lock);
public:
UserSnapshotServer() { terminating_ = false; }
~UserSnapshotServer();

View file

@ -0,0 +1,222 @@
/*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "snapuserd_core.h"
#include <android-base/chrono_utils.h>
#include <android-base/scopeguard.h>
#include <android-base/strings.h>
namespace android {
namespace snapshot {
using namespace android;
using namespace android::dm;
using android::base::unique_fd;
UpdateVerify::UpdateVerify(const std::string& misc_name)
: misc_name_(misc_name), state_(UpdateVerifyState::VERIFY_UNKNOWN) {}
bool UpdateVerify::CheckPartitionVerification() {
auto now = std::chrono::system_clock::now();
auto deadline = now + 10s;
{
std::unique_lock<std::mutex> cv_lock(m_lock_);
while (state_ == UpdateVerifyState::VERIFY_UNKNOWN) {
auto status = m_cv_.wait_until(cv_lock, deadline);
if (status == std::cv_status::timeout) {
return false;
}
}
}
return (state_ == UpdateVerifyState::VERIFY_SUCCESS);
}
void UpdateVerify::UpdatePartitionVerificationState(UpdateVerifyState state) {
{
std::lock_guard<std::mutex> lock(m_lock_);
state_ = state;
}
m_cv_.notify_all();
}
void UpdateVerify::VerifyUpdatePartition() {
bool succeeded = false;
auto scope_guard = android::base::make_scope_guard([this, &succeeded]() -> void {
if (!succeeded) {
UpdatePartitionVerificationState(UpdateVerifyState::VERIFY_FAILED);
}
});
auto& dm = DeviceMapper::Instance();
auto dm_block_devices = dm.FindDmPartitions();
if (dm_block_devices.empty()) {
SNAP_LOG(ERROR) << "No dm-enabled block device is found.";
return;
}
const auto parts = android::base::Split(misc_name_, "-");
std::string partition_name = parts[0];
constexpr auto&& suffix_b = "_b";
constexpr auto&& suffix_a = "_a";
partition_name.erase(partition_name.find_last_not_of(suffix_b) + 1);
partition_name.erase(partition_name.find_last_not_of(suffix_a) + 1);
if (dm_block_devices.find(partition_name) == dm_block_devices.end()) {
SNAP_LOG(ERROR) << "Failed to find dm block device for " << partition_name;
return;
}
if (!VerifyPartition(partition_name, dm_block_devices.at(partition_name))) {
SNAP_LOG(ERROR) << "Partition: " << partition_name
<< " Block-device: " << dm_block_devices.at(partition_name)
<< " verification failed";
}
succeeded = true;
}
bool UpdateVerify::VerifyBlocks(const std::string& partition_name,
const std::string& dm_block_device, off_t offset, int skip_blocks,
uint64_t dev_sz) {
unique_fd fd(TEMP_FAILURE_RETRY(open(dm_block_device.c_str(), O_RDONLY | O_DIRECT)));
if (fd < 0) {
SNAP_LOG(ERROR) << "open failed: " << dm_block_device;
return false;
}
loff_t file_offset = offset;
const uint64_t read_sz = kBlockSizeVerify;
void* addr;
ssize_t page_size = getpagesize();
if (posix_memalign(&addr, page_size, read_sz) < 0) {
SNAP_PLOG(ERROR) << "posix_memalign failed "
<< " page_size: " << page_size << " read_sz: " << read_sz;
return false;
}
std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
uint64_t bytes_read = 0;
while (true) {
size_t to_read = std::min((dev_sz - file_offset), read_sz);
if (!android::base::ReadFullyAtOffset(fd.get(), buffer.get(), to_read, file_offset)) {
SNAP_PLOG(ERROR) << "Failed to read block from block device: " << dm_block_device
<< " partition-name: " << partition_name
<< " at offset: " << file_offset << " read-size: " << to_read
<< " block-size: " << dev_sz;
return false;
}
bytes_read += to_read;
file_offset += (skip_blocks * kBlockSizeVerify);
if (file_offset >= dev_sz) {
break;
}
}
SNAP_LOG(DEBUG) << "Verification success with bytes-read: " << bytes_read
<< " dev_sz: " << dev_sz << " partition_name: " << partition_name;
return true;
}
bool UpdateVerify::VerifyPartition(const std::string& partition_name,
const std::string& dm_block_device) {
android::base::Timer timer;
SNAP_LOG(INFO) << "VerifyPartition: " << partition_name << " Block-device: " << dm_block_device;
bool succeeded = false;
auto scope_guard = android::base::make_scope_guard([this, &succeeded]() -> void {
if (!succeeded) {
UpdatePartitionVerificationState(UpdateVerifyState::VERIFY_FAILED);
}
});
unique_fd fd(TEMP_FAILURE_RETRY(open(dm_block_device.c_str(), O_RDONLY | O_DIRECT)));
if (fd < 0) {
SNAP_LOG(ERROR) << "open failed: " << dm_block_device;
return false;
}
uint64_t dev_sz = get_block_device_size(fd.get());
if (!dev_sz) {
SNAP_PLOG(ERROR) << "Could not determine block device size: " << dm_block_device;
return false;
}
if (!IsBlockAligned(dev_sz)) {
SNAP_LOG(ERROR) << "dev_sz: " << dev_sz << " is not block aligned";
return false;
}
/*
* Not all partitions are of same size. Some partitions are as small as
* 100Mb. We can just finish them in a single thread. For bigger partitions
* such as product, 4 threads are sufficient enough.
*
* TODO: With io_uring SQ_POLL support, we can completely cut this
* down to just single thread for all partitions and potentially verify all
* the partitions with zero syscalls. Additionally, since block layer
* supports polling, IO_POLL could be used which will further cut down
* latency.
*/
int num_threads = kMinThreadsToVerify;
if (dev_sz > kThresholdSize) {
num_threads = kMaxThreadsToVerify;
}
std::vector<std::future<bool>> threads;
off_t start_offset = 0;
const int skip_blocks = num_threads;
while (num_threads) {
threads.emplace_back(std::async(std::launch::async, &UpdateVerify::VerifyBlocks, this,
partition_name, dm_block_device, start_offset, skip_blocks,
dev_sz));
start_offset += kBlockSizeVerify;
num_threads -= 1;
if (start_offset >= dev_sz) {
break;
}
}
bool ret = true;
for (auto& t : threads) {
ret = t.get() && ret;
}
if (ret) {
succeeded = true;
UpdatePartitionVerificationState(UpdateVerifyState::VERIFY_SUCCESS);
SNAP_LOG(INFO) << "Partition: " << partition_name << " Block-device: " << dm_block_device
<< " Size: " << dev_sz
<< " verification success. Duration : " << timer.duration().count() << " ms";
return true;
}
return false;
}
} // namespace snapshot
} // namespace android