Loading fs_mgr/libsnapshot/android/snapshot/snapshot.proto +25 −1 Original line number Diff line number Diff line Loading @@ -139,7 +139,28 @@ enum UpdateState { Cancelled = 7; }; // Next: 7 // Next 14: // // To understand the source of each failure, read snapshot.cpp. To handle new // sources of failure, avoid reusing an existing code; add a new code instead. enum MergeFailureCode { Ok = 0; ReadStatus = 1; GetTableInfo = 2; UnknownTable = 3; GetTableParams = 4; ActivateNewTable = 5; AcquireLock = 6; ListSnapshots = 7; WriteStatus = 8; UnknownTargetType = 9; QuerySnapshotStatus = 10; ExpectedMergeTarget = 11; UnmergedSectorsAfterCompletion = 12; UnexpectedMergeState = 13; }; // Next: 8 message SnapshotUpdateStatus { UpdateState state = 1; Loading @@ -160,6 +181,9 @@ message SnapshotUpdateStatus { // Merge phase (if state == MERGING). MergePhase merge_phase = 6; // Merge failure code, filled if state == MergeFailed. MergeFailureCode merge_failure_code = 7; } // Next: 9 Loading fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +18 −8 Original line number Diff line number Diff line Loading @@ -381,6 +381,7 @@ class SnapshotManager final : public ISnapshotManager { FRIEND_TEST(SnapshotTest, MapPartialSnapshot); FRIEND_TEST(SnapshotTest, MapSnapshot); FRIEND_TEST(SnapshotTest, Merge); FRIEND_TEST(SnapshotTest, MergeFailureCode); FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot); FRIEND_TEST(SnapshotTest, UpdateBootControlHal); FRIEND_TEST(SnapshotUpdateTest, DaemonTransition); Loading Loading @@ -532,7 +533,8 @@ class SnapshotManager final : public ISnapshotManager { // Interact with /metadata/ota/state. UpdateState ReadUpdateState(LockedFile* file); SnapshotUpdateStatus ReadSnapshotUpdateStatus(LockedFile* file); bool WriteUpdateState(LockedFile* file, UpdateState state); bool WriteUpdateState(LockedFile* file, UpdateState state, MergeFailureCode failure_code = MergeFailureCode::Ok); bool WriteSnapshotUpdateStatus(LockedFile* file, const SnapshotUpdateStatus& status); std::string GetStateFilePath() const; Loading @@ -541,12 +543,12 @@ class SnapshotManager final : public ISnapshotManager { std::string GetMergeStateFilePath() const; // Helpers for merging. bool MergeSecondPhaseSnapshots(LockedFile* lock); bool SwitchSnapshotToMerge(LockedFile* lock, const std::string& name); bool RewriteSnapshotDeviceTable(const std::string& dm_name); MergeFailureCode MergeSecondPhaseSnapshots(LockedFile* lock); MergeFailureCode SwitchSnapshotToMerge(LockedFile* lock, const std::string& name); MergeFailureCode RewriteSnapshotDeviceTable(const std::string& dm_name); bool MarkSnapshotMergeCompleted(LockedFile* snapshot_lock, const std::string& snapshot_name); void AcknowledgeMergeSuccess(LockedFile* lock); void AcknowledgeMergeFailure(); void AcknowledgeMergeFailure(MergeFailureCode failure_code); MergePhase DecideMergePhase(const SnapshotStatus& status); std::unique_ptr<LpMetadata> ReadCurrentMetadata(); Loading @@ -573,14 +575,22 @@ class SnapshotManager final : public ISnapshotManager { const SnapshotStatus& status); bool CollapseSnapshotDevice(const std::string& name, const SnapshotStatus& status); struct MergeResult { explicit MergeResult(UpdateState state, MergeFailureCode failure_code = MergeFailureCode::Ok) : state(state), failure_code(failure_code) {} UpdateState state; MergeFailureCode failure_code; }; // Only the following UpdateStates are used here: // UpdateState::Merging // UpdateState::MergeCompleted // UpdateState::MergeFailed // UpdateState::MergeNeedsReboot UpdateState CheckMergeState(const std::function<bool()>& before_cancel); UpdateState CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel); UpdateState CheckTargetMergeState(LockedFile* lock, const std::string& name, MergeResult CheckMergeState(const std::function<bool()>& before_cancel); MergeResult CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel); MergeResult CheckTargetMergeState(LockedFile* lock, const std::string& name, const SnapshotUpdateStatus& update_status); // Interact with status files under /metadata/ota/snapshots. Loading fs_mgr/libsnapshot/snapshot.cpp +97 −78 Original line number Diff line number Diff line Loading @@ -746,32 +746,35 @@ bool SnapshotManager::InitiateMerge() { return false; } bool rewrote_all = true; auto reported_code = MergeFailureCode::Ok; for (const auto& snapshot : *merge_group) { // If this fails, we have no choice but to continue. Everything must // be merged. This is not an ideal state to be in, but it is safe, // because we the next boot will try again. if (!SwitchSnapshotToMerge(lock.get(), snapshot)) { auto code = SwitchSnapshotToMerge(lock.get(), snapshot); if (code != MergeFailureCode::Ok) { LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot; rewrote_all = false; if (reported_code == MergeFailureCode::Ok) { reported_code = code; } } } // If we couldn't switch everything to a merge target, pre-emptively mark // this merge as failed. It will get acknowledged when WaitForMerge() is // called. if (!rewrote_all) { WriteUpdateState(lock.get(), UpdateState::MergeFailed); if (reported_code != MergeFailureCode::Ok) { WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code); } // Return true no matter what, because a merge was initiated. return true; } bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) { MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) { SnapshotStatus status; if (!ReadSnapshotStatus(lock, name, &status)) { return false; return MergeFailureCode::ReadStatus; } if (status.state() != SnapshotState::CREATED) { LOG(WARNING) << "Snapshot " << name Loading @@ -780,8 +783,8 @@ bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& // After this, we return true because we technically did switch to a merge // target. Everything else we do here is just informational. if (!RewriteSnapshotDeviceTable(name)) { return false; if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) { return code; } status.set_state(SnapshotState::MERGING); Loading @@ -795,26 +798,26 @@ bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& if (!WriteSnapshotStatus(lock, status)) { LOG(ERROR) << "Could not update status file for snapshot: " << name; } return true; return MergeFailureCode::Ok; } bool SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) { MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) { auto& dm = DeviceMapper::Instance(); std::vector<DeviceMapper::TargetInfo> old_targets; if (!dm.GetTableInfo(name, &old_targets)) { LOG(ERROR) << "Could not read snapshot device table: " << name; return false; return MergeFailureCode::GetTableInfo; } if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") { LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name; return false; return MergeFailureCode::UnknownTable; } std::string base_device, cow_device; if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) { LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name; return false; return MergeFailureCode::GetTableParams; } DmTable table; Loading @@ -822,10 +825,10 @@ bool SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) { SnapshotStorageMode::Merge, kSnapshotChunkSize); if (!dm.LoadTableAndActivate(name, table)) { LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name; return false; return MergeFailureCode::ActivateNewTable; } LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name; return true; return MergeFailureCode::Ok; } enum class TableQuery { Loading Loading @@ -897,20 +900,20 @@ bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::strin UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback, const std::function<bool()>& before_cancel) { while (true) { UpdateState state = CheckMergeState(before_cancel); LOG(INFO) << "ProcessUpdateState handling state: " << state; auto result = CheckMergeState(before_cancel); LOG(INFO) << "ProcessUpdateState handling state: " << result.state; if (state == UpdateState::MergeFailed) { AcknowledgeMergeFailure(); if (result.state == UpdateState::MergeFailed) { AcknowledgeMergeFailure(result.failure_code); } if (state != UpdateState::Merging) { if (result.state != UpdateState::Merging) { // Either there is no merge, or the merge was finished, so no need // to keep waiting. return state; return result.state; } if (callback && !callback()) { return state; return result.state; } // This wait is not super time sensitive, so we have a relatively Loading @@ -919,36 +922,36 @@ UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& cal } } UpdateState SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) { auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult { auto lock = LockExclusive(); if (!lock) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock); } UpdateState state = CheckMergeState(lock.get(), before_cancel); LOG(INFO) << "CheckMergeState for snapshots returned: " << state; auto result = CheckMergeState(lock.get(), before_cancel); LOG(INFO) << "CheckMergeState for snapshots returned: " << result.state; if (state == UpdateState::MergeCompleted) { if (result.state == UpdateState::MergeCompleted) { // Do this inside the same lock. Failures get acknowledged without the // lock, because flock() might have failed. AcknowledgeMergeSuccess(lock.get()); } else if (state == UpdateState::Cancelled) { } else if (result.state == UpdateState::Cancelled) { if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) { LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update."; } } return state; return result; } UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel) { auto SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel) -> MergeResult { SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock); switch (update_status.state()) { case UpdateState::None: case UpdateState::MergeCompleted: // Harmless races are allowed between two callers of WaitForMerge, // so in both of these cases we just propagate the state. return update_status.state(); return MergeResult(update_status.state()); case UpdateState::Merging: case UpdateState::MergeNeedsReboot: Loading @@ -963,26 +966,26 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, // via the merge poll below, but if we never started a merge, we // need to also check here. if (HandleCancelledUpdate(lock, before_cancel)) { return UpdateState::Cancelled; return MergeResult(UpdateState::Cancelled); } return update_status.state(); return MergeResult(update_status.state()); default: return update_status.state(); return MergeResult(update_status.state()); } std::vector<std::string> snapshots; if (!ListSnapshots(lock, &snapshots)) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots); } auto other_suffix = device_->GetOtherSlotSuffix(); bool cancelled = false; bool failed = false; bool merging = false; bool needs_reboot = false; bool wrong_phase = false; MergeFailureCode failure_code = MergeFailureCode::Ok; for (const auto& snapshot : snapshots) { if (android::base::EndsWith(snapshot, other_suffix)) { // This will have triggered an error message in InitiateMerge already. Loading @@ -990,12 +993,15 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, continue; } UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot, update_status); LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << snapshot_state; auto result = CheckTargetMergeState(lock, snapshot, update_status); LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << result.state; switch (snapshot_state) { switch (result.state) { case UpdateState::MergeFailed: failed = true; // Take the first failure code in case other failures compound. if (failure_code == MergeFailureCode::Ok) { failure_code = result.failure_code; } break; case UpdateState::Merging: merging = true; Loading @@ -1013,8 +1019,10 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, break; default: LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": " << "\"" << snapshot_state << "\""; failed = true; << "\"" << result.state << "\""; if (failure_code == MergeFailureCode::Ok) { failure_code = MergeFailureCode::UnexpectedMergeState; } break; } } Loading @@ -1023,24 +1031,25 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, // Note that we handle "Merging" before we handle anything else. We // want to poll until *nothing* is merging if we can, so everything has // a chance to get marked as completed or failed. return UpdateState::Merging; return MergeResult(UpdateState::Merging); } if (failed) { if (failure_code != MergeFailureCode::Ok) { // Note: since there are many drop-out cases for failure, we acknowledge // it in WaitForMerge rather than here and elsewhere. return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, failure_code); } if (wrong_phase) { // If we got here, no other partitions are being merged, and nothing // failed to merge. It's safe to move to the next merge phase. if (!MergeSecondPhaseSnapshots(lock)) { return UpdateState::MergeFailed; auto code = MergeSecondPhaseSnapshots(lock); if (code != MergeFailureCode::Ok) { return MergeResult(UpdateState::MergeFailed, code); } return UpdateState::Merging; return MergeResult(UpdateState::Merging); } if (needs_reboot) { WriteUpdateState(lock, UpdateState::MergeNeedsReboot); return UpdateState::MergeNeedsReboot; return MergeResult(UpdateState::MergeNeedsReboot); } if (cancelled) { // This is an edge case, that we handle as correctly as we sensibly can. Loading @@ -1048,16 +1057,17 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, // removed the snapshot as a result. The exact state of the update is // undefined now, but this can only happen on an unlocked device where // partitions can be flashed without wiping userdata. return UpdateState::Cancelled; return MergeResult(UpdateState::Cancelled); } return UpdateState::MergeCompleted; return MergeResult(UpdateState::MergeCompleted); } UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name, const SnapshotUpdateStatus& update_status) { auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name, const SnapshotUpdateStatus& update_status) -> MergeResult { SnapshotStatus snapshot_status; if (!ReadSnapshotStatus(lock, name, &snapshot_status)) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus); } std::unique_ptr<LpMetadata> current_metadata; Loading @@ -1070,7 +1080,7 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: if (!current_metadata || GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) { DeleteSnapshot(lock, name); return UpdateState::Cancelled; return MergeResult(UpdateState::Cancelled); } // During a check, we decided the merge was complete, but we were unable to Loading @@ -1081,11 +1091,11 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) { // NB: It's okay if this fails now, we gave cleanup our best effort. OnSnapshotMergeComplete(lock, name, snapshot_status); return UpdateState::MergeCompleted; return MergeResult(UpdateState::MergeCompleted); } LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name; return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType); } // This check is expensive so it is only enabled for debugging. Loading @@ -1095,29 +1105,30 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: std::string target_type; DmTargetSnapshot::Status status; if (!QuerySnapshotStatus(name, &target_type, &status)) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus); } if (target_type == "snapshot" && DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE && update_status.merge_phase() == MergePhase::FIRST_PHASE) { // The snapshot is not being merged because it's in the wrong phase. return UpdateState::None; return MergeResult(UpdateState::None); } if (target_type != "snapshot-merge") { // We can get here if we failed to rewrite the target type in // InitiateMerge(). If we failed to create the target in first-stage // init, boot would not succeed. LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type; return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget); } // These two values are equal when merging is complete. if (status.sectors_allocated != status.metadata_sectors) { if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) { LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete."; return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnmergedSectorsAfterCompletion); } return UpdateState::Merging; return MergeResult(UpdateState::Merging); } // Merging is done. First, update the status file to indicate the merge Loading @@ -1130,18 +1141,18 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: // snapshot device for this partition. snapshot_status.set_state(SnapshotState::MERGE_COMPLETED); if (!WriteSnapshotStatus(lock, snapshot_status)) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus); } if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) { return UpdateState::MergeNeedsReboot; return MergeResult(UpdateState::MergeNeedsReboot); } return UpdateState::MergeCompleted; return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok); } bool SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) { MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) { std::vector<std::string> snapshots; if (!ListSnapshots(lock, &snapshots)) { return UpdateState::MergeFailed; return MergeFailureCode::ListSnapshots; } SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock); Loading @@ -1150,24 +1161,27 @@ bool SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) { update_status.set_merge_phase(MergePhase::SECOND_PHASE); if (!WriteSnapshotUpdateStatus(lock, update_status)) { return false; return MergeFailureCode::WriteStatus; } bool rewrote_all = true; MergeFailureCode result = MergeFailureCode::Ok; for (const auto& snapshot : snapshots) { SnapshotStatus snapshot_status; if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) { return UpdateState::MergeFailed; return MergeFailureCode::ReadStatus; } if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) { continue; } if (!SwitchSnapshotToMerge(lock, snapshot)) { auto code = SwitchSnapshotToMerge(lock, snapshot); if (code != MergeFailureCode::Ok) { LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot; rewrote_all = false; if (result == MergeFailureCode::Ok) { result = code; } } } return rewrote_all; return result; } std::string SnapshotManager::GetSnapshotBootIndicatorPath() { Loading Loading @@ -1199,7 +1213,7 @@ void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) { RemoveAllUpdateState(lock); } void SnapshotManager::AcknowledgeMergeFailure() { void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) { // Log first, so worst case, we always have a record of why the calls below // were being made. LOG(ERROR) << "Merge could not be completed and will be marked as failed."; Loading @@ -1216,7 +1230,7 @@ void SnapshotManager::AcknowledgeMergeFailure() { return; } WriteUpdateState(lock.get(), UpdateState::MergeFailed); WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code); } bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name, Loading Loading @@ -2414,10 +2428,15 @@ SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) return status; } bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state) { bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state, MergeFailureCode failure_code) { SnapshotUpdateStatus status; status.set_state(state); if (state == UpdateState::MergeFailed) { status.set_merge_failure_code(failure_code); } // If we're transitioning between two valid states (eg, we're not beginning // or ending an OTA), then make sure to propagate the compression bit. if (!(state == UpdateState::Initiated || state == UpdateState::None)) { Loading fs_mgr/libsnapshot/snapshot_test.cpp +12 −0 Original line number Diff line number Diff line Loading @@ -676,6 +676,18 @@ TEST_F(SnapshotTest, UpdateBootControlHal) { ASSERT_EQ(test_device->merge_status(), MergeStatus::MERGING); } TEST_F(SnapshotTest, MergeFailureCode) { ASSERT_TRUE(AcquireLock()); ASSERT_TRUE(sm->WriteUpdateState(lock_.get(), UpdateState::MergeFailed, MergeFailureCode::ListSnapshots)); ASSERT_EQ(test_device->merge_status(), MergeStatus::MERGING); SnapshotUpdateStatus status = sm->ReadSnapshotUpdateStatus(lock_.get()); ASSERT_EQ(status.state(), UpdateState::MergeFailed); ASSERT_EQ(status.merge_failure_code(), MergeFailureCode::ListSnapshots); } enum class Request { UNKNOWN, LOCK_SHARED, LOCK_EXCLUSIVE, UNLOCK, EXIT }; std::ostream& operator<<(std::ostream& os, Request request) { switch (request) { Loading Loading
fs_mgr/libsnapshot/android/snapshot/snapshot.proto +25 −1 Original line number Diff line number Diff line Loading @@ -139,7 +139,28 @@ enum UpdateState { Cancelled = 7; }; // Next: 7 // Next 14: // // To understand the source of each failure, read snapshot.cpp. To handle new // sources of failure, avoid reusing an existing code; add a new code instead. enum MergeFailureCode { Ok = 0; ReadStatus = 1; GetTableInfo = 2; UnknownTable = 3; GetTableParams = 4; ActivateNewTable = 5; AcquireLock = 6; ListSnapshots = 7; WriteStatus = 8; UnknownTargetType = 9; QuerySnapshotStatus = 10; ExpectedMergeTarget = 11; UnmergedSectorsAfterCompletion = 12; UnexpectedMergeState = 13; }; // Next: 8 message SnapshotUpdateStatus { UpdateState state = 1; Loading @@ -160,6 +181,9 @@ message SnapshotUpdateStatus { // Merge phase (if state == MERGING). MergePhase merge_phase = 6; // Merge failure code, filled if state == MergeFailed. MergeFailureCode merge_failure_code = 7; } // Next: 9 Loading
fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +18 −8 Original line number Diff line number Diff line Loading @@ -381,6 +381,7 @@ class SnapshotManager final : public ISnapshotManager { FRIEND_TEST(SnapshotTest, MapPartialSnapshot); FRIEND_TEST(SnapshotTest, MapSnapshot); FRIEND_TEST(SnapshotTest, Merge); FRIEND_TEST(SnapshotTest, MergeFailureCode); FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot); FRIEND_TEST(SnapshotTest, UpdateBootControlHal); FRIEND_TEST(SnapshotUpdateTest, DaemonTransition); Loading Loading @@ -532,7 +533,8 @@ class SnapshotManager final : public ISnapshotManager { // Interact with /metadata/ota/state. UpdateState ReadUpdateState(LockedFile* file); SnapshotUpdateStatus ReadSnapshotUpdateStatus(LockedFile* file); bool WriteUpdateState(LockedFile* file, UpdateState state); bool WriteUpdateState(LockedFile* file, UpdateState state, MergeFailureCode failure_code = MergeFailureCode::Ok); bool WriteSnapshotUpdateStatus(LockedFile* file, const SnapshotUpdateStatus& status); std::string GetStateFilePath() const; Loading @@ -541,12 +543,12 @@ class SnapshotManager final : public ISnapshotManager { std::string GetMergeStateFilePath() const; // Helpers for merging. bool MergeSecondPhaseSnapshots(LockedFile* lock); bool SwitchSnapshotToMerge(LockedFile* lock, const std::string& name); bool RewriteSnapshotDeviceTable(const std::string& dm_name); MergeFailureCode MergeSecondPhaseSnapshots(LockedFile* lock); MergeFailureCode SwitchSnapshotToMerge(LockedFile* lock, const std::string& name); MergeFailureCode RewriteSnapshotDeviceTable(const std::string& dm_name); bool MarkSnapshotMergeCompleted(LockedFile* snapshot_lock, const std::string& snapshot_name); void AcknowledgeMergeSuccess(LockedFile* lock); void AcknowledgeMergeFailure(); void AcknowledgeMergeFailure(MergeFailureCode failure_code); MergePhase DecideMergePhase(const SnapshotStatus& status); std::unique_ptr<LpMetadata> ReadCurrentMetadata(); Loading @@ -573,14 +575,22 @@ class SnapshotManager final : public ISnapshotManager { const SnapshotStatus& status); bool CollapseSnapshotDevice(const std::string& name, const SnapshotStatus& status); struct MergeResult { explicit MergeResult(UpdateState state, MergeFailureCode failure_code = MergeFailureCode::Ok) : state(state), failure_code(failure_code) {} UpdateState state; MergeFailureCode failure_code; }; // Only the following UpdateStates are used here: // UpdateState::Merging // UpdateState::MergeCompleted // UpdateState::MergeFailed // UpdateState::MergeNeedsReboot UpdateState CheckMergeState(const std::function<bool()>& before_cancel); UpdateState CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel); UpdateState CheckTargetMergeState(LockedFile* lock, const std::string& name, MergeResult CheckMergeState(const std::function<bool()>& before_cancel); MergeResult CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel); MergeResult CheckTargetMergeState(LockedFile* lock, const std::string& name, const SnapshotUpdateStatus& update_status); // Interact with status files under /metadata/ota/snapshots. Loading
fs_mgr/libsnapshot/snapshot.cpp +97 −78 Original line number Diff line number Diff line Loading @@ -746,32 +746,35 @@ bool SnapshotManager::InitiateMerge() { return false; } bool rewrote_all = true; auto reported_code = MergeFailureCode::Ok; for (const auto& snapshot : *merge_group) { // If this fails, we have no choice but to continue. Everything must // be merged. This is not an ideal state to be in, but it is safe, // because we the next boot will try again. if (!SwitchSnapshotToMerge(lock.get(), snapshot)) { auto code = SwitchSnapshotToMerge(lock.get(), snapshot); if (code != MergeFailureCode::Ok) { LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot; rewrote_all = false; if (reported_code == MergeFailureCode::Ok) { reported_code = code; } } } // If we couldn't switch everything to a merge target, pre-emptively mark // this merge as failed. It will get acknowledged when WaitForMerge() is // called. if (!rewrote_all) { WriteUpdateState(lock.get(), UpdateState::MergeFailed); if (reported_code != MergeFailureCode::Ok) { WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code); } // Return true no matter what, because a merge was initiated. return true; } bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) { MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) { SnapshotStatus status; if (!ReadSnapshotStatus(lock, name, &status)) { return false; return MergeFailureCode::ReadStatus; } if (status.state() != SnapshotState::CREATED) { LOG(WARNING) << "Snapshot " << name Loading @@ -780,8 +783,8 @@ bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& // After this, we return true because we technically did switch to a merge // target. Everything else we do here is just informational. if (!RewriteSnapshotDeviceTable(name)) { return false; if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) { return code; } status.set_state(SnapshotState::MERGING); Loading @@ -795,26 +798,26 @@ bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& if (!WriteSnapshotStatus(lock, status)) { LOG(ERROR) << "Could not update status file for snapshot: " << name; } return true; return MergeFailureCode::Ok; } bool SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) { MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) { auto& dm = DeviceMapper::Instance(); std::vector<DeviceMapper::TargetInfo> old_targets; if (!dm.GetTableInfo(name, &old_targets)) { LOG(ERROR) << "Could not read snapshot device table: " << name; return false; return MergeFailureCode::GetTableInfo; } if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") { LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name; return false; return MergeFailureCode::UnknownTable; } std::string base_device, cow_device; if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) { LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name; return false; return MergeFailureCode::GetTableParams; } DmTable table; Loading @@ -822,10 +825,10 @@ bool SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) { SnapshotStorageMode::Merge, kSnapshotChunkSize); if (!dm.LoadTableAndActivate(name, table)) { LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name; return false; return MergeFailureCode::ActivateNewTable; } LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name; return true; return MergeFailureCode::Ok; } enum class TableQuery { Loading Loading @@ -897,20 +900,20 @@ bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::strin UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback, const std::function<bool()>& before_cancel) { while (true) { UpdateState state = CheckMergeState(before_cancel); LOG(INFO) << "ProcessUpdateState handling state: " << state; auto result = CheckMergeState(before_cancel); LOG(INFO) << "ProcessUpdateState handling state: " << result.state; if (state == UpdateState::MergeFailed) { AcknowledgeMergeFailure(); if (result.state == UpdateState::MergeFailed) { AcknowledgeMergeFailure(result.failure_code); } if (state != UpdateState::Merging) { if (result.state != UpdateState::Merging) { // Either there is no merge, or the merge was finished, so no need // to keep waiting. return state; return result.state; } if (callback && !callback()) { return state; return result.state; } // This wait is not super time sensitive, so we have a relatively Loading @@ -919,36 +922,36 @@ UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& cal } } UpdateState SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) { auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult { auto lock = LockExclusive(); if (!lock) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock); } UpdateState state = CheckMergeState(lock.get(), before_cancel); LOG(INFO) << "CheckMergeState for snapshots returned: " << state; auto result = CheckMergeState(lock.get(), before_cancel); LOG(INFO) << "CheckMergeState for snapshots returned: " << result.state; if (state == UpdateState::MergeCompleted) { if (result.state == UpdateState::MergeCompleted) { // Do this inside the same lock. Failures get acknowledged without the // lock, because flock() might have failed. AcknowledgeMergeSuccess(lock.get()); } else if (state == UpdateState::Cancelled) { } else if (result.state == UpdateState::Cancelled) { if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) { LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update."; } } return state; return result; } UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel) { auto SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel) -> MergeResult { SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock); switch (update_status.state()) { case UpdateState::None: case UpdateState::MergeCompleted: // Harmless races are allowed between two callers of WaitForMerge, // so in both of these cases we just propagate the state. return update_status.state(); return MergeResult(update_status.state()); case UpdateState::Merging: case UpdateState::MergeNeedsReboot: Loading @@ -963,26 +966,26 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, // via the merge poll below, but if we never started a merge, we // need to also check here. if (HandleCancelledUpdate(lock, before_cancel)) { return UpdateState::Cancelled; return MergeResult(UpdateState::Cancelled); } return update_status.state(); return MergeResult(update_status.state()); default: return update_status.state(); return MergeResult(update_status.state()); } std::vector<std::string> snapshots; if (!ListSnapshots(lock, &snapshots)) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots); } auto other_suffix = device_->GetOtherSlotSuffix(); bool cancelled = false; bool failed = false; bool merging = false; bool needs_reboot = false; bool wrong_phase = false; MergeFailureCode failure_code = MergeFailureCode::Ok; for (const auto& snapshot : snapshots) { if (android::base::EndsWith(snapshot, other_suffix)) { // This will have triggered an error message in InitiateMerge already. Loading @@ -990,12 +993,15 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, continue; } UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot, update_status); LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << snapshot_state; auto result = CheckTargetMergeState(lock, snapshot, update_status); LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << result.state; switch (snapshot_state) { switch (result.state) { case UpdateState::MergeFailed: failed = true; // Take the first failure code in case other failures compound. if (failure_code == MergeFailureCode::Ok) { failure_code = result.failure_code; } break; case UpdateState::Merging: merging = true; Loading @@ -1013,8 +1019,10 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, break; default: LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": " << "\"" << snapshot_state << "\""; failed = true; << "\"" << result.state << "\""; if (failure_code == MergeFailureCode::Ok) { failure_code = MergeFailureCode::UnexpectedMergeState; } break; } } Loading @@ -1023,24 +1031,25 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, // Note that we handle "Merging" before we handle anything else. We // want to poll until *nothing* is merging if we can, so everything has // a chance to get marked as completed or failed. return UpdateState::Merging; return MergeResult(UpdateState::Merging); } if (failed) { if (failure_code != MergeFailureCode::Ok) { // Note: since there are many drop-out cases for failure, we acknowledge // it in WaitForMerge rather than here and elsewhere. return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, failure_code); } if (wrong_phase) { // If we got here, no other partitions are being merged, and nothing // failed to merge. It's safe to move to the next merge phase. if (!MergeSecondPhaseSnapshots(lock)) { return UpdateState::MergeFailed; auto code = MergeSecondPhaseSnapshots(lock); if (code != MergeFailureCode::Ok) { return MergeResult(UpdateState::MergeFailed, code); } return UpdateState::Merging; return MergeResult(UpdateState::Merging); } if (needs_reboot) { WriteUpdateState(lock, UpdateState::MergeNeedsReboot); return UpdateState::MergeNeedsReboot; return MergeResult(UpdateState::MergeNeedsReboot); } if (cancelled) { // This is an edge case, that we handle as correctly as we sensibly can. Loading @@ -1048,16 +1057,17 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock, // removed the snapshot as a result. The exact state of the update is // undefined now, but this can only happen on an unlocked device where // partitions can be flashed without wiping userdata. return UpdateState::Cancelled; return MergeResult(UpdateState::Cancelled); } return UpdateState::MergeCompleted; return MergeResult(UpdateState::MergeCompleted); } UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name, const SnapshotUpdateStatus& update_status) { auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name, const SnapshotUpdateStatus& update_status) -> MergeResult { SnapshotStatus snapshot_status; if (!ReadSnapshotStatus(lock, name, &snapshot_status)) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus); } std::unique_ptr<LpMetadata> current_metadata; Loading @@ -1070,7 +1080,7 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: if (!current_metadata || GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) { DeleteSnapshot(lock, name); return UpdateState::Cancelled; return MergeResult(UpdateState::Cancelled); } // During a check, we decided the merge was complete, but we were unable to Loading @@ -1081,11 +1091,11 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) { // NB: It's okay if this fails now, we gave cleanup our best effort. OnSnapshotMergeComplete(lock, name, snapshot_status); return UpdateState::MergeCompleted; return MergeResult(UpdateState::MergeCompleted); } LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name; return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType); } // This check is expensive so it is only enabled for debugging. Loading @@ -1095,29 +1105,30 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: std::string target_type; DmTargetSnapshot::Status status; if (!QuerySnapshotStatus(name, &target_type, &status)) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus); } if (target_type == "snapshot" && DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE && update_status.merge_phase() == MergePhase::FIRST_PHASE) { // The snapshot is not being merged because it's in the wrong phase. return UpdateState::None; return MergeResult(UpdateState::None); } if (target_type != "snapshot-merge") { // We can get here if we failed to rewrite the target type in // InitiateMerge(). If we failed to create the target in first-stage // init, boot would not succeed. LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type; return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget); } // These two values are equal when merging is complete. if (status.sectors_allocated != status.metadata_sectors) { if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) { LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete."; return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnmergedSectorsAfterCompletion); } return UpdateState::Merging; return MergeResult(UpdateState::Merging); } // Merging is done. First, update the status file to indicate the merge Loading @@ -1130,18 +1141,18 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: // snapshot device for this partition. snapshot_status.set_state(SnapshotState::MERGE_COMPLETED); if (!WriteSnapshotStatus(lock, snapshot_status)) { return UpdateState::MergeFailed; return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus); } if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) { return UpdateState::MergeNeedsReboot; return MergeResult(UpdateState::MergeNeedsReboot); } return UpdateState::MergeCompleted; return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok); } bool SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) { MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) { std::vector<std::string> snapshots; if (!ListSnapshots(lock, &snapshots)) { return UpdateState::MergeFailed; return MergeFailureCode::ListSnapshots; } SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock); Loading @@ -1150,24 +1161,27 @@ bool SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) { update_status.set_merge_phase(MergePhase::SECOND_PHASE); if (!WriteSnapshotUpdateStatus(lock, update_status)) { return false; return MergeFailureCode::WriteStatus; } bool rewrote_all = true; MergeFailureCode result = MergeFailureCode::Ok; for (const auto& snapshot : snapshots) { SnapshotStatus snapshot_status; if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) { return UpdateState::MergeFailed; return MergeFailureCode::ReadStatus; } if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) { continue; } if (!SwitchSnapshotToMerge(lock, snapshot)) { auto code = SwitchSnapshotToMerge(lock, snapshot); if (code != MergeFailureCode::Ok) { LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot; rewrote_all = false; if (result == MergeFailureCode::Ok) { result = code; } } } return rewrote_all; return result; } std::string SnapshotManager::GetSnapshotBootIndicatorPath() { Loading Loading @@ -1199,7 +1213,7 @@ void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) { RemoveAllUpdateState(lock); } void SnapshotManager::AcknowledgeMergeFailure() { void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) { // Log first, so worst case, we always have a record of why the calls below // were being made. LOG(ERROR) << "Merge could not be completed and will be marked as failed."; Loading @@ -1216,7 +1230,7 @@ void SnapshotManager::AcknowledgeMergeFailure() { return; } WriteUpdateState(lock.get(), UpdateState::MergeFailed); WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code); } bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name, Loading Loading @@ -2414,10 +2428,15 @@ SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) return status; } bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state) { bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state, MergeFailureCode failure_code) { SnapshotUpdateStatus status; status.set_state(state); if (state == UpdateState::MergeFailed) { status.set_merge_failure_code(failure_code); } // If we're transitioning between two valid states (eg, we're not beginning // or ending an OTA), then make sure to propagate the compression bit. if (!(state == UpdateState::Initiated || state == UpdateState::None)) { Loading
fs_mgr/libsnapshot/snapshot_test.cpp +12 −0 Original line number Diff line number Diff line Loading @@ -676,6 +676,18 @@ TEST_F(SnapshotTest, UpdateBootControlHal) { ASSERT_EQ(test_device->merge_status(), MergeStatus::MERGING); } TEST_F(SnapshotTest, MergeFailureCode) { ASSERT_TRUE(AcquireLock()); ASSERT_TRUE(sm->WriteUpdateState(lock_.get(), UpdateState::MergeFailed, MergeFailureCode::ListSnapshots)); ASSERT_EQ(test_device->merge_status(), MergeStatus::MERGING); SnapshotUpdateStatus status = sm->ReadSnapshotUpdateStatus(lock_.get()); ASSERT_EQ(status.state(), UpdateState::MergeFailed); ASSERT_EQ(status.merge_failure_code(), MergeFailureCode::ListSnapshots); } enum class Request { UNKNOWN, LOCK_SHARED, LOCK_EXCLUSIVE, UNLOCK, EXIT }; std::ostream& operator<<(std::ostream& os, Request request) { switch (request) { Loading