Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5312c466 authored by David Anderson's avatar David Anderson
Browse files

libsnapshot: Implement merge flow.

This implements InitiateMerge() and WaitForMerge(). InitiateMerge() is
meant to be called after an update has been marked successful.
WaitForMerge() is designed to be called either: immediately after
InitiateMerge, or during each subsequent boot where merging has not
completed.

InitiateMerge converts each snapshot device to a snapshot-merge device.

WaitForMerge polls each snapshot-merge device until no device reports a
"merging" state. One of the following states can result from this:
 - MergeFailed. This will happen if any device failed to merge, or we
   were unable to poll, or any other system-level failure occurred.
 - MergeNeedsReboot. This will happen if a snapshot-merge device has
   completed merging, but we were unable to clean it up due to something
   holding a resource open.
 - MergeCompleted. This indicates that all snapshots completed merging
   and were cleaned up.

If WaitForMerge() returns MergeCompleted, then all snapshots have been
removed and a new update can begin. GetUpdateState() will return None.

MergeFailed and MergeNeedsReboot, on the other hand, are "sticky". They
indicate a merge is still pending. When called again, WaitForMerge()
will poll again to attempt to make more progress in the merge. For
NeedsReboot, a single reboot will ensure all resources are released and
the next WaitForMerge() will successfully finish cleanup. In the failure
case, it is unlikely the next WaitForMerge will succeed, but we always
retry anyway (there is no harm in doing so, and if we get lucky, the
device can take more OTAs).

Bug: 136678799
Test: libsnapshot_test gtests
Change-Id: I5e93fcbffee1973da5ff76363df12d6317a7a7c7
parent 98609503
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -197,6 +197,7 @@ class DeviceMapper final {
    struct TargetInfo {
        struct dm_target_spec spec;
        std::string data;
        TargetInfo() {}
        TargetInfo(const struct dm_target_spec& spec, const std::string& data)
            : spec(spec), data(data) {}
    };
+56 −4
Original line number Diff line number Diff line
@@ -22,7 +22,8 @@
#include <vector>

#include <android-base/unique_fd.h>
#include <libdm/dm_target.h>
#include <libdm/dm.h>
#include <libfiemap/image_manager.h>

#ifndef FRIEND_TEST
#define FRIEND_TEST(test_set_name, individual_test) \
@@ -38,7 +39,7 @@ class IImageManager;

namespace snapshot {

enum class UpdateState {
enum class UpdateState : unsigned int {
    // No update or merge is in progress.
    None,

@@ -51,6 +52,10 @@ enum class UpdateState {
    // The kernel is merging in the background.
    Merging,

    // Post-merge cleanup steps could not be completed due to a transient
    // error, but the next reboot will finish any pending operations.
    MergeNeedsReboot,

    // Merging is complete, and needs to be acknowledged.
    MergeCompleted,

@@ -94,8 +99,23 @@ class SnapshotManager final {

    // Wait for the current merge to finish, then perform cleanup when it
    // completes. It is necessary to call this after InitiateMerge(), or when
    // a merge is detected for the first time after boot.
    bool WaitForMerge();
    // a merge state is detected during boot.
    //
    // Note that after calling WaitForMerge(), GetUpdateState() may still return
    // that a merge is in progress:
    //   MergeFailed indicates that a fatal error occurred. WaitForMerge() may
    //   called any number of times again to attempt to make more progress, but
    //   we do not expect it to succeed if a catastrophic error occurred.
    //
    //   MergeNeedsReboot indicates that the merge has completed, but cleanup
    //   failed. This can happen if for some reason resources were not closed
    //   properly. In this case another reboot is needed before we can take
    //   another OTA. However, WaitForMerge() can be called again without
    //   rebooting, to attempt to finish cleanup anyway.
    //
    //   MergeCompleted indicates that the update has fully completed.
    //   GetUpdateState will return None, and a new update can begin.
    UpdateState WaitForMerge();

    // Find the status of the current update, if any.
    //
@@ -109,9 +129,14 @@ class SnapshotManager final {
    FRIEND_TEST(SnapshotTest, CreateSnapshot);
    FRIEND_TEST(SnapshotTest, MapSnapshot);
    FRIEND_TEST(SnapshotTest, MapPartialSnapshot);
    FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot);
    FRIEND_TEST(SnapshotTest, Merge);
    FRIEND_TEST(SnapshotTest, MergeCannotRemoveCow);
    friend class SnapshotTest;

    using DmTargetSnapshot = android::dm::DmTargetSnapshot;
    using IImageManager = android::fiemap::IImageManager;
    using TargetInfo = android::dm::DeviceMapper::TargetInfo;

    explicit SnapshotManager(IDeviceInfo* info);

@@ -203,6 +228,33 @@ class SnapshotManager final {
        uint64_t metadata_sectors = 0;
    };

    // Helpers for merging.
    bool SwitchSnapshotToMerge(LockedFile* lock, const std::string& name);
    bool RewriteSnapshotDeviceTable(const std::string& dm_name);
    bool MarkSnapshotMergeCompleted(LockedFile* snapshot_lock, const std::string& snapshot_name);
    void AcknowledgeMergeSuccess(LockedFile* lock);
    void AcknowledgeMergeFailure();

    // Note that these require the name of the device containing the snapshot,
    // which may be the "inner" device. Use GetsnapshotDeviecName().
    bool QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
                             DmTargetSnapshot::Status* status);
    bool IsSnapshotDevice(const std::string& dm_name, TargetInfo* target = nullptr);

    // Internal callback for when merging is complete.
    bool OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
                                 const SnapshotStatus& status);
    bool CollapseSnapshotDevice(const std::string& name, const SnapshotStatus& status);

    // Only the following UpdateStates are used here:
    //   UpdateState::Merging
    //   UpdateState::MergeCompleted
    //   UpdateState::MergeFailed
    //   UpdateState::MergeNeedsReboot
    UpdateState CheckMergeState();
    UpdateState CheckMergeState(LockedFile* lock);
    UpdateState CheckTargetMergeState(LockedFile* lock, const std::string& name);

    // Interact with status files under /metadata/ota/snapshots.
    bool WriteSnapshotStatus(LockedFile* lock, const std::string& name,
                             const SnapshotStatus& status);
+532 −10

File changed.

Preview size limit exceeded, changes collapsed.

+123 −6
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include <iostream>

#include <android-base/file.h>
#include <android-base/properties.h>
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <gtest/gtest.h>
@@ -82,12 +83,7 @@ class SnapshotTest : public ::testing::Test {
        // are tests, we don't care, destroy everything that might exist.
        std::vector<std::string> snapshots = {"test-snapshot"};
        for (const auto& snapshot : snapshots) {
            if (dm_.GetState(snapshot) != DmDeviceState::INVALID) {
                dm_.DeleteDevice(snapshot);
            }
            if (dm_.GetState(snapshot + "-inner") != DmDeviceState::INVALID) {
                dm_.DeleteDevice(snapshot + "-inner");
            }
            DeleteSnapshotDevice(snapshot);
            temp_images_.emplace_back(snapshot + "-cow");

            auto status_file = sm->GetSnapshotStatusFilePath(snapshot);
@@ -120,6 +116,16 @@ class SnapshotTest : public ::testing::Test {
        return image_manager_->MapImageDevice(name, 10s, path);
    }

    bool DeleteSnapshotDevice(const std::string& snapshot) {
        if (dm_.GetState(snapshot) != DmDeviceState::INVALID) {
            if (!dm_.DeleteDevice(snapshot)) return false;
        }
        if (dm_.GetState(snapshot + "-inner") != DmDeviceState::INVALID) {
            if (!dm_.DeleteDevice(snapshot + "-inner")) return false;
        }
        return true;
    }

    DeviceMapper& dm_;
    std::unique_ptr<SnapshotManager::LockedFile> lock_;
    std::vector<std::string> temp_images_;
@@ -182,6 +188,117 @@ TEST_F(SnapshotTest, MapPartialSnapshot) {
    ASSERT_TRUE(android::base::StartsWith(snap_device, "/dev/block/dm-"));
}

TEST_F(SnapshotTest, NoMergeBeforeReboot) {
    ASSERT_TRUE(AcquireLock());

    // Set the state to Unverified, as if we finished an update.
    ASSERT_TRUE(sm->WriteUpdateState(lock_.get(), UpdateState::Unverified));

    // Release the lock.
    lock_ = nullptr;

    // Merge should fail, since we didn't mark the device as rebooted.
    ASSERT_FALSE(sm->InitiateMerge());
}

TEST_F(SnapshotTest, Merge) {
    ASSERT_TRUE(AcquireLock());

    static const uint64_t kDeviceSize = 1024 * 1024;
    ASSERT_TRUE(sm->CreateSnapshot(lock_.get(), "test-snapshot", kDeviceSize, kDeviceSize,
                                   kDeviceSize));

    std::string base_device, snap_device;
    ASSERT_TRUE(CreateTempDevice("base-device", kDeviceSize, &base_device));
    ASSERT_TRUE(sm->MapSnapshot(lock_.get(), "test-snapshot", base_device, 10s, &snap_device));

    std::string test_string = "This is a test string.";
    {
        unique_fd fd(open(snap_device.c_str(), O_RDWR | O_CLOEXEC | O_SYNC));
        ASSERT_GE(fd, 0);
        ASSERT_TRUE(android::base::WriteFully(fd, test_string.data(), test_string.size()));
    }

    // Note: we know the name of the device is test-snapshot because we didn't
    // request a linear segment.
    DeviceMapper::TargetInfo target;
    ASSERT_TRUE(sm->IsSnapshotDevice("test-snapshot", &target));
    ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "snapshot");

    // Set the state to Unverified, as if we finished an update.
    ASSERT_TRUE(sm->WriteUpdateState(lock_.get(), UpdateState::Unverified));

    // Release the lock.
    lock_ = nullptr;

    test_device->set_is_running_snapshot(true);
    ASSERT_TRUE(sm->InitiateMerge());

    // The device should have been switched to a snapshot-merge target.
    ASSERT_TRUE(sm->IsSnapshotDevice("test-snapshot", &target));
    ASSERT_EQ(DeviceMapper::GetTargetType(target.spec), "snapshot-merge");

    // We should not be able to cancel an update now.
    ASSERT_FALSE(sm->CancelUpdate());

    ASSERT_EQ(sm->WaitForMerge(), UpdateState::MergeCompleted);
    ASSERT_EQ(sm->GetUpdateState(), UpdateState::None);

    // The device should no longer be a snapshot or snapshot-merge.
    ASSERT_FALSE(sm->IsSnapshotDevice("test-snapshot"));

    // Test that we can read back the string we wrote to the snapshot.
    unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
    ASSERT_GE(fd, 0);

    std::string buffer(test_string.size(), '\0');
    ASSERT_TRUE(android::base::ReadFully(fd, buffer.data(), buffer.size()));
    ASSERT_EQ(test_string, buffer);
}

TEST_F(SnapshotTest, MergeCannotRemoveCow) {
    ASSERT_TRUE(AcquireLock());

    static const uint64_t kDeviceSize = 1024 * 1024;
    ASSERT_TRUE(sm->CreateSnapshot(lock_.get(), "test-snapshot", kDeviceSize, kDeviceSize,
                                   kDeviceSize));

    std::string base_device, snap_device;
    ASSERT_TRUE(CreateTempDevice("base-device", kDeviceSize, &base_device));
    ASSERT_TRUE(sm->MapSnapshot(lock_.get(), "test-snapshot", base_device, 10s, &snap_device));

    // Keep an open handle to the cow device. This should cause the merge to
    // be incomplete.
    auto cow_path = android::base::GetProperty("gsid.mapped_image.test-snapshot-cow", "");
    unique_fd fd(open(cow_path.c_str(), O_RDONLY | O_CLOEXEC));
    ASSERT_GE(fd, 0);

    // Set the state to Unverified, as if we finished an update.
    ASSERT_TRUE(sm->WriteUpdateState(lock_.get(), UpdateState::Unverified));

    // Release the lock.
    lock_ = nullptr;

    test_device->set_is_running_snapshot(true);
    ASSERT_TRUE(sm->InitiateMerge());

    // COW cannot be removed due to open fd, so expect a soft failure.
    ASSERT_EQ(sm->WaitForMerge(), UpdateState::MergeNeedsReboot);

    // Forcefully delete the snapshot device, so it looks like we just rebooted.
    ASSERT_TRUE(DeleteSnapshotDevice("test-snapshot"));

    // Map snapshot should fail now, because we're in a merge-complete state.
    ASSERT_TRUE(AcquireLock());
    ASSERT_FALSE(sm->MapSnapshot(lock_.get(), "test-snapshot", base_device, 10s, &snap_device));

    // Release everything and now the merge should complete.
    fd = {};
    lock_ = nullptr;

    ASSERT_EQ(sm->WaitForMerge(), UpdateState::MergeCompleted);
}

}  // namespace snapshot
}  // namespace android