Merge "Verify partitions using registered buffers" into main am: 95b890708d (32e6fc05) · Commits · e / os / android_system_core

fs_mgr/libsnapshot/snapuserd/Android.bp

+4 −0

Original line number	Diff line number	Diff line
		@@ -88,6 +88,7 @@ cc_library_static {
		"libprocessgroup",
		"libprocessgroup_util",
		"libjsoncpp",
		"liburing_cpp",
		],
		export_include_dirs: ["include"],
		header_libs: [
		@@ -136,6 +137,7 @@ cc_defaults {
		"libext4_utils",
		"liburing",
		"libzstd",
		"liburing_cpp",
		],

		header_libs: [
		@@ -222,6 +224,7 @@ cc_defaults {
		"libjsoncpp",
		"liburing",
		"libz",
		"liburing_cpp",
		],
		include_dirs: [
		".",
		@@ -319,6 +322,7 @@ cc_binary_host {
		"libjsoncpp",
		"liburing",
		"libz",
		"liburing_cpp",
		],
		include_dirs: [
		".",

fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.cpp

+101 −44

Original line number	Diff line number	Diff line
		@@ -22,6 +22,7 @@

		#include "android-base/properties.h"
		#include "snapuserd_core.h"
		#include "utility.h"

		namespace android {
		namespace snapshot {
		@@ -104,43 +105,108 @@ bool UpdateVerify::VerifyBlocks(const std::string& partition_name,
		return false;
		}

		loff_t file_offset = offset;
		auto verify_block_size = android::base::GetUintProperty<uint>("ro.virtual_ab.verify_block_size",
		kBlockSizeVerify);
		const uint64_t read_sz = verify_block_size;
		int queue_depth = std::max(queue_depth_, 1);
		int verify_block_size = verify_block_size_;

		// Smaller partitions don't need a bigger queue-depth.
		// This is required for low-memory devices.
		if (dev_sz < threshold_size_) {
		queue_depth = std::max(queue_depth / 2, 1);
		verify_block_size >>= 2;
		}

		if (!IsBlockAligned(verify_block_size)) {
		verify_block_size = EXT4_ALIGN(verify_block_size, BLOCK_SZ);
		}

		std::unique_ptr<io_uring_cpp::IoUringInterface> ring =
		io_uring_cpp::IoUringInterface::CreateLinuxIoUring(queue_depth, 0);
		if (ring.get() == nullptr) {
		PLOG(ERROR) << "Verify: io_uring_queue_init failed for queue_depth: " << queue_depth;
		return false;
		}

		std::unique_ptr<struct iovec[]> vecs = std::make_unique<struct iovec[]>(queue_depth);
		std::vector<std::unique_ptr<void, decltype(&::free)>> buffers;
		for (int i = 0; i < queue_depth; i++) {
		void* addr;
		ssize_t page_size = getpagesize();
		if (posix_memalign(&addr, page_size, read_sz) < 0) {
		SNAP_PLOG(ERROR) << "posix_memalign failed "
		<< " page_size: " << page_size << " read_sz: " << read_sz;
		if (posix_memalign(&addr, page_size, verify_block_size) < 0) {
		LOG(ERROR) << "posix_memalign failed";
		return false;
		}

		std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);

		uint64_t bytes_read = 0;

		while (true) {
		size_t to_read = std::min((dev_sz - file_offset), read_sz);
		buffers.emplace_back(addr, ::free);
		vecs[i].iov_base = addr;
		vecs[i].iov_len = verify_block_size;
		}

		if (!android::base::ReadFullyAtOffset(fd.get(), buffer.get(), to_read, file_offset)) {
		SNAP_PLOG(ERROR) << "Failed to read block from block device: " << dm_block_device
		<< " partition-name: " << partition_name
		<< " at offset: " << file_offset << " read-size: " << to_read
		<< " block-size: " << dev_sz;
		auto ret = ring->RegisterBuffers(vecs.get(), queue_depth);
		if (!ret.IsOk()) {
		SNAP_LOG(ERROR) << "io_uring_register_buffers failed: " << ret.ErrCode();
		return false;
		}

		bytes_read += to_read;
		file_offset += (skip_blocks * verify_block_size);
		loff_t file_offset = offset;
		const uint64_t read_sz = verify_block_size;
		uint64_t total_read = 0;
		int num_submitted = 0;

		SNAP_LOG(DEBUG) << "VerifyBlocks: queue_depth: " << queue_depth
		<< " verify_block_size: " << verify_block_size << " dev_sz: " << dev_sz
		<< " file_offset: " << file_offset << " skip_blocks: " << skip_blocks;

		while (file_offset < dev_sz) {
		for (size_t i = 0; i < queue_depth; i++) {
		uint64_t to_read = std::min((dev_sz - file_offset), read_sz);
		if (to_read <= 0) break;

		const auto sqe =
		ring->PrepReadFixed(fd.get(), vecs[i].iov_base, to_read, file_offset, i);
		if (!sqe.IsOk()) {
		SNAP_PLOG(ERROR) << "PrepReadFixed failed";
		return false;
		}
		file_offset += (skip_blocks * to_read);
		total_read += to_read;
		num_submitted += 1;
		if (file_offset >= dev_sz) {
		break;
		}
		}

		SNAP_LOG(DEBUG) << "Verification success with bytes-read: " << bytes_read
		<< " dev_sz: " << dev_sz << " partition_name: " << partition_name;
		if (num_submitted == 0) {
		break;
		}

		const auto io_submit = ring->SubmitAndWait(num_submitted);
		if (!io_submit.IsOk()) {
		SNAP_LOG(ERROR) << "SubmitAndWait failed: " << io_submit.ErrMsg()
		<< " for: " << num_submitted << " entries.";
		return false;
		}

		SNAP_LOG(DEBUG) << "io_uring_submit: " << total_read << "num_submitted: " << num_submitted
		<< "ret: " << ret;

		const auto cqes = ring->PopCQE(num_submitted);
		if (cqes.IsErr()) {
		SNAP_LOG(ERROR) << "PopCqe failed for: " << num_submitted
		<< " error: " << cqes.GetError().ErrMsg();
		return false;
		}
		for (const auto& cqe : cqes.GetResult()) {
		if (cqe.res < 0) {
		SNAP_LOG(ERROR) << "I/O failed: cqe->res: " << cqe.res;
		return false;
		}
		num_submitted -= 1;
		}
		}

		SNAP_LOG(DEBUG) << "Verification success with io_uring: "
		<< " dev_sz: " << dev_sz << " partition_name: " << partition_name
		<< " total_read: " << total_read;

		return true;
		}
		@@ -175,21 +241,14 @@ bool UpdateVerify::VerifyPartition(const std::string& partition_name,
		return false;
		}

		/*
		* Not all partitions are of same size. Some partitions are as small as
		* 100Mb. We can just finish them in a single thread. For bigger partitions
		* such as product, 4 threads are sufficient enough.
		*
		* TODO: With io_uring SQ_POLL support, we can completely cut this
		* down to just single thread for all partitions and potentially verify all
		* the partitions with zero syscalls. Additionally, since block layer
		* supports polling, IO_POLL could be used which will further cut down
		* latency.
		*/
		if (!KernelSupportsIoUring()) {
		SNAP_LOG(INFO) << "Kernel does not support io_uring. Skipping verification.\n";
		// This will fallback to update_verifier to do the verification.
		return false;
		}

		int num_threads = kMinThreadsToVerify;
		auto verify_threshold_size = android::base::GetUintProperty<uint>(
		"ro.virtual_ab.verify_threshold_size", kThresholdSize);
		if (dev_sz > verify_threshold_size) {
		if (dev_sz > threshold_size_) {
		num_threads = kMaxThreadsToVerify;
		}

		@@ -197,13 +256,11 @@ bool UpdateVerify::VerifyPartition(const std::string& partition_name,
		off_t start_offset = 0;
		const int skip_blocks = num_threads;

		auto verify_block_size =
		android::base::GetUintProperty("ro.virtual_ab.verify_block_size", kBlockSizeVerify);
		while (num_threads) {
		threads.emplace_back(std::async(std::launch::async, &UpdateVerify::VerifyBlocks, this,
		partition_name, dm_block_device, start_offset, skip_blocks,
		dev_sz));
		start_offset += verify_block_size;
		start_offset += verify_block_size_;
		num_threads -= 1;
		if (start_offset >= dev_sz) {
		break;
		@@ -218,9 +275,9 @@ bool UpdateVerify::VerifyPartition(const std::string& partition_name,
		if (ret) {
		succeeded = true;
		UpdatePartitionVerificationState(UpdateVerifyState::VERIFY_SUCCESS);
		SNAP_LOG(INFO) << "Partition: " << partition_name << " Block-device: " << dm_block_device
		<< " Size: " << dev_sz
		<< " verification success. Duration : " << timer.duration().count() << " ms";
		SNAP_LOG(INFO) << "Partition verification success: " << partition_name
		<< " Block-device: " << dm_block_device << " Size: " << dev_sz
		<< " Duration : " << timer.duration().count() << " ms";
		return true;
		}

fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.h

+13 −15

Original line number	Diff line number	Diff line
		@@ -15,6 +15,7 @@

		#pragma once

		#include <liburing.h>
		#include <stdint.h>
		#include <sys/types.h>

		@@ -22,6 +23,7 @@
		#include <mutex>
		#include <string>

		#include <liburing_cpp/IoUring.h>
		#include <snapuserd/snapuserd_kernel.h>
		#include <storage_literals/storage_literals.h>

		@@ -48,27 +50,23 @@ class UpdateVerify {
		std::mutex m_lock_;
		std::condition_variable m_cv_;

		int kMinThreadsToVerify = 1;
		int kMaxThreadsToVerify = 3;

		/*
		* Scanning of partitions is an expensive operation both in terms of memory
		* and CPU usage. The goal here is to scan the partitions fast enough without
		* significant increase in the boot time.
		*
		* Partitions such as system, product which may be huge and may need multiple
		* threads to speed up the verification process. Using multiple threads for
		* all partitions may increase CPU usage significantly. Hence, limit that to
		* 1 thread per partition.
		* To optimize partition scanning speed without significantly impacting boot time,
		* we employ O_DIRECT, bypassing the page-cache. However, O_DIRECT's memory
		* allocation from CMA can be problematic on devices with restricted CMA space.
		* To address this, io_uring_register_buffers() pre-registers I/O buffers,
		* preventing CMA usage. See b/401952955 for more details.
		*
		* These numbers were derived by monitoring the memory and CPU pressure
		* (/proc/pressure/{cpu,memory}; and monitoring the Inactive(file) and
		* Active(file) pages from /proc/meminfo.
		*
		* Additionally, for low memory devices, it is advisable to use O_DIRECT
		* functionality for source block device.
		*/
		int kMinThreadsToVerify = 1;
		int kMaxThreadsToVerify = 3;
		uint64_t kThresholdSize = 750_MiB;
		uint64_t kBlockSizeVerify = 2_MiB;
		uint64_t verify_block_size_ = 1_MiB;
		uint64_t threshold_size_ = 2_GiB;
		int queue_depth_ = 4;

		bool IsBlockAligned(uint64_t read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
		void UpdatePartitionVerificationState(UpdateVerifyState state);