Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2a55e811 authored by David Anderson's avatar David Anderson
Browse files

libsnapshot: Harden merge-in-recovery for factory data resets.

This addresses bugs where unexpected edge cases in the snapshot state
could prevent a merge or data wipe from completing in recovery.

Invalid snapshots (eg on the wrong slot) are now ignored in
CheckMergeState(). This prevents those snapshots from being detected as
"cancelled" and thus falling into RemoveAllUpdateState.

ProcessUpdateState will no longer call RemoveAllUpdateState in recovery.
Furthermore, when RemoveAllUpdateState fails, we will no longer return
the "old" state. If this state is Merging, ProcessUpdateState can
infinite loop.

Finally, HandleImminentDataWipe now guarantees the final state will be
either MergeFailed or None. For testing purposes, the old mechanism was
too susceptible to state machinery changes. And for practical purposes,
either we're going to wipe data (which removes the OTA), or a merge
failed and we can't. So the effective outcome is always no update or a
failed update.

Bug: 179006671
Test: vts_libsnapshot_test
Change-Id: Idcb30151e4d35cbeccf14369f09707ae94a57c66
parent 4e936b4b
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -694,7 +694,7 @@ class SnapshotManager final : public ISnapshotManager {
    // Call ProcessUpdateState and handle states with special rules before data wipe. Specifically,
    // if |allow_forward_merge| and allow-forward-merge indicator exists, initiate merge if
    // necessary.
    bool ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
    UpdateState ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
                                             const std::function<bool()>& callback);

    // Return device string of a mapped image, or if it is not available, the mapped image path.
+40 −11
Original line number Diff line number Diff line
@@ -894,6 +894,8 @@ UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& cal
                                                const std::function<bool()>& before_cancel) {
    while (true) {
        UpdateState state = CheckMergeState(before_cancel);
        LOG(INFO) << "ProcessUpdateState handling state: " << state;

        if (state == UpdateState::MergeFailed) {
            AcknowledgeMergeFailure();
        }
@@ -920,13 +922,15 @@ UpdateState SnapshotManager::CheckMergeState(const std::function<bool()>& before
    }

    UpdateState state = CheckMergeState(lock.get(), before_cancel);
    LOG(INFO) << "CheckMergeState for snapshots returned: " << state;

    if (state == UpdateState::MergeCompleted) {
        // Do this inside the same lock. Failures get acknowledged without the
        // lock, because flock() might have failed.
        AcknowledgeMergeSuccess(lock.get());
    } else if (state == UpdateState::Cancelled) {
        if (!RemoveAllUpdateState(lock.get(), before_cancel)) {
            return ReadSnapshotUpdateStatus(lock.get()).state();
        if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
            LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
        }
    }
    return state;
@@ -968,13 +972,23 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
        return UpdateState::MergeFailed;
    }

    auto other_suffix = device_->GetOtherSlotSuffix();

    bool cancelled = false;
    bool failed = false;
    bool merging = false;
    bool needs_reboot = false;
    bool wrong_phase = false;
    for (const auto& snapshot : snapshots) {
        if (android::base::EndsWith(snapshot, other_suffix)) {
            // This will have triggered an error message in InitiateMerge already.
            LOG(INFO) << "Skipping merge validation of unexpected snapshot: " << snapshot;
            continue;
        }

        UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot, update_status);
        LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << snapshot_state;

        switch (snapshot_state) {
            case UpdateState::MergeFailed:
                failed = true;
@@ -1173,7 +1187,7 @@ void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
    // indicator that cleanup is needed on reboot. If a factory data reset
    // was requested, it doesn't matter, everything will get wiped anyway.
    // To make testing easier we consider a /data wipe as cleaned up.
    if (device_->IsRecovery() && !in_factory_data_reset_) {
    if (device_->IsRecovery()) {
        WriteUpdateState(lock, UpdateState::MergeCompleted);
        return;
    }
@@ -3213,10 +3227,11 @@ bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callba
    };

    in_factory_data_reset_ = true;
    bool ok = ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
    UpdateState state =
            ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
    in_factory_data_reset_ = false;

    if (!ok) {
    if (state == UpdateState::MergeFailed) {
        return false;
    }

@@ -3224,6 +3239,16 @@ bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callba
    if (!UnmapAllPartitionsInRecovery()) {
        LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
    }

    if (state != UpdateState::None) {
        auto lock = LockExclusive();
        if (!lock) return false;

        // Zap the update state so the bootloader doesn't think we're still
        // merging. It's okay if this fails, it's informative only at this
        // point.
        WriteUpdateState(lock.get(), UpdateState::None);
    }
    return true;
}

@@ -3258,7 +3283,7 @@ bool SnapshotManager::FinishMergeInRecovery() {
    return true;
}

bool SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
                                                          const std::function<bool()>& callback) {
    auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
    UpdateState state = ProcessUpdateState(callback);
@@ -3266,7 +3291,7 @@ bool SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
    switch (state) {
        case UpdateState::MergeFailed:
            LOG(ERROR) << "Unrecoverable merge failure detected.";
            return false;
            return state;
        case UpdateState::Unverified: {
            // If an OTA was just applied but has not yet started merging:
            //
@@ -3286,8 +3311,12 @@ bool SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
                if (allow_forward_merge &&
                    access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
                    LOG(INFO) << "Forward merge allowed, initiating merge now.";
                    return InitiateMerge() &&
                           ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);

                    if (!InitiateMerge()) {
                        LOG(ERROR) << "Failed to initiate merge on data wipe.";
                        return UpdateState::MergeFailed;
                    }
                    return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
                }

                LOG(ERROR) << "Reverting to old slot since update will be deleted.";
@@ -3305,7 +3334,7 @@ bool SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
        default:
            break;
    }
    return true;
    return state;
}

bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
+9 −5
Original line number Diff line number Diff line
@@ -636,8 +636,8 @@ TEST_F(SnapshotTest, FlashSuperDuringMerge) {

    // Because the status is Merging, we must call ProcessUpdateState, which should
    // detect a cancelled update.
    ASSERT_EQ(sm->ProcessUpdateState(), UpdateState::Cancelled);
    ASSERT_EQ(sm->GetUpdateState(), UpdateState::None);
    ASSERT_EQ(init->ProcessUpdateState(), UpdateState::Cancelled);
    ASSERT_EQ(init->GetUpdateState(), UpdateState::None);
}

TEST_F(SnapshotTest, UpdateBootControlHal) {
@@ -1767,7 +1767,7 @@ TEST_F(SnapshotUpdateTest, DataWipeRollbackInRecovery) {
    ASSERT_TRUE(new_sm->HandleImminentDataWipe());
    // Manually mount metadata so that we can call GetUpdateState() below.
    MountMetadata();
    EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified);
    EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::None);
    EXPECT_TRUE(test_device->IsSlotUnbootable(1));
    EXPECT_FALSE(test_device->IsSlotUnbootable(0));
}
@@ -2105,8 +2105,12 @@ TEST_P(FlashAfterUpdateTest, FlashSlotAfterUpdate) {

    // There should be no snapshot to merge.
    auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, flashed_slot_suffix));
    // update_enigne calls ProcessUpdateState first -- should see Cancelled.
    if (flashed_slot == 0 && after_merge) {
        ASSERT_EQ(UpdateState::MergeCompleted, new_sm->ProcessUpdateState());
    } else {
        // update_engine calls ProcessUpdateState first -- should see Cancelled.
        ASSERT_EQ(UpdateState::Cancelled, new_sm->ProcessUpdateState());
    }

    // Next OTA calls CancelUpdate no matter what.
    ASSERT_TRUE(new_sm->CancelUpdate());