Loading fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +28 −2 Original line number Diff line number Diff line Loading @@ -154,6 +154,7 @@ class SnapshotManager final { // rebooting or after rolling back), or merge the OTA. bool FinishedSnapshotWrites(); private: // Initiate a merge on all snapshot devices. This should only be used after an // update has been marked successful after booting. bool InitiateMerge(); Loading Loading @@ -181,6 +182,15 @@ class SnapshotManager final { // GetUpdateState will return None, and a new update can begin. UpdateState ProcessUpdateState(); public: // Initiate the merge if necessary, then wait for the merge to finish. // See InitiateMerge() and ProcessUpdateState() for details. // Returns: // - None if no merge to initiate // - MergeCompleted if merge is completed // - other states indicating an error has occurred UpdateState InitiateMergeAndWait(); // Find the status of the current update, if any. // // |progress| depends on the returned status: Loading Loading @@ -238,12 +248,13 @@ class SnapshotManager final { FRIEND_TEST(SnapshotTest, MapPartialSnapshot); FRIEND_TEST(SnapshotTest, MapSnapshot); FRIEND_TEST(SnapshotTest, Merge); FRIEND_TEST(SnapshotTest, MergeCannotRemoveCow); FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot); FRIEND_TEST(SnapshotTest, UpdateBootControlHal); FRIEND_TEST(SnapshotUpdateTest, MergeCannotRemoveCow); FRIEND_TEST(SnapshotUpdateTest, SnapshotStatusFileWithoutCow); friend class SnapshotTest; friend class SnapshotUpdateTest; friend class FlashAfterUpdateTest; friend struct AutoDeleteCowImage; friend struct AutoDeleteSnapshot; friend struct PartitionCowCreator; Loading Loading @@ -341,6 +352,9 @@ class SnapshotManager final { // condition was detected and handled. bool HandleCancelledUpdate(LockedFile* lock); // Helper for HandleCancelledUpdate. Assumes booting from new slot. bool HandleCancelledUpdateOnNewSlot(LockedFile* lock); // Remove artifacts created by the update process, such as snapshots, and // set the update state to None. bool RemoveAllUpdateState(LockedFile* lock); Loading @@ -359,7 +373,19 @@ class SnapshotManager final { bool MarkSnapshotMergeCompleted(LockedFile* snapshot_lock, const std::string& snapshot_name); void AcknowledgeMergeSuccess(LockedFile* lock); void AcknowledgeMergeFailure(); bool IsCancelledSnapshot(const std::string& snapshot_name); std::unique_ptr<LpMetadata> ReadCurrentMetadata(); enum class MetadataPartitionState { // Partition does not exist. None, // Partition is flashed. Flashed, // Partition is created by OTA client. Updated, }; // Helper function to check the state of a partition as described in metadata. MetadataPartitionState GetMetadataPartitionState(const LpMetadata& metadata, const std::string& name); // Note that these require the name of the device containing the snapshot, // which may be the "inner" device. Use GetsnapshotDeviecName(). Loading fs_mgr/libsnapshot/snapshot.cpp +111 −8 Original line number Diff line number Diff line Loading @@ -568,6 +568,27 @@ bool SnapshotManager::InitiateMerge() { } } auto metadata = ReadCurrentMetadata(); for (auto it = snapshots.begin(); it != snapshots.end();) { switch (GetMetadataPartitionState(*metadata, *it)) { case MetadataPartitionState::Flashed: LOG(WARNING) << "Detected re-flashing for partition " << *it << ". Skip merging it."; [[fallthrough]]; case MetadataPartitionState::None: { LOG(WARNING) << "Deleting snapshot for partition " << *it; if (!DeleteSnapshot(lock.get(), *it)) { LOG(WARNING) << "Cannot delete snapshot for partition " << *it << ". Skip merging it anyways."; } it = snapshots.erase(it); } break; case MetadataPartitionState::Updated: { ++it; } break; } } // Point of no return - mark that we're starting a merge. From now on every // snapshot must be a merge target. if (!WriteUpdateState(lock.get(), UpdateState::Merging)) { Loading Loading @@ -855,8 +876,15 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: std::string dm_name = GetSnapshotDeviceName(name, snapshot_status); std::unique_ptr<LpMetadata> current_metadata; if (!IsSnapshotDevice(dm_name)) { if (IsCancelledSnapshot(name)) { if (!current_metadata) { current_metadata = ReadCurrentMetadata(); } if (!current_metadata || GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) { DeleteSnapshot(lock, name); return UpdateState::Cancelled; } Loading @@ -877,7 +905,8 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: } // This check is expensive so it is only enabled for debugging. DCHECK(!IsCancelledSnapshot(name)); DCHECK((current_metadata = ReadCurrentMetadata()) && GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated); std::string target_type; DmTargetSnapshot::Status status; Loading Loading @@ -1106,13 +1135,17 @@ bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock) { if (device_->GetSlotSuffix() != old_slot) { // We're booted into the target slot, which means we just rebooted // after applying the update. if (!HandleCancelledUpdateOnNewSlot(lock)) { return false; } } // The only way we can get here is if: // (1) The device rolled back to the previous slot. // (2) This function was called prematurely before rebooting the device. // (3) fastboot set_active was used. // (4) The device updates to the new slot but re-flashed *all* partitions // in the new slot. // // In any case, delete the snapshots. It may be worth using the boot_control // HAL to differentiate case (2). Loading @@ -1120,18 +1153,66 @@ bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock) { return true; } bool SnapshotManager::IsCancelledSnapshot(const std::string& snapshot_name) { std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() { const auto& opener = device_->GetPartitionOpener(); uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix()); auto super_device = device_->GetSuperDevice(slot); auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot); if (!metadata) { LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device; return false; return nullptr; } return metadata; } SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState( const LpMetadata& metadata, const std::string& name) { auto partition = android::fs_mgr::FindPartition(metadata, name); if (!partition) return MetadataPartitionState::None; if (partition->attributes & LP_PARTITION_ATTR_UPDATED) { return MetadataPartitionState::Updated; } return MetadataPartitionState::Flashed; } bool SnapshotManager::HandleCancelledUpdateOnNewSlot(LockedFile* lock) { std::vector<std::string> snapshots; if (!ListSnapshots(lock, &snapshots)) { LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed " << "after applying an update. Assuming no snapshots."; // Let HandleCancelledUpdate resets UpdateState. return true; } auto partition = android::fs_mgr::FindPartition(*metadata.get(), snapshot_name); if (!partition) return false; return (partition->attributes & LP_PARTITION_ATTR_UPDATED) == 0; // Attempt to detect re-flashing on each partition. // - If all partitions are re-flashed, we can proceed to cancel the whole update. // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are // deleted. Caller is responsible for merging the rest of the snapshots. // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots. auto metadata = ReadCurrentMetadata(); if (!metadata) return false; bool all_snapshot_cancelled = true; for (const auto& snapshot_name : snapshots) { if (GetMetadataPartitionState(*metadata, snapshot_name) == MetadataPartitionState::Updated) { LOG(WARNING) << "Cannot cancel update because snapshot" << snapshot_name << " is in use."; all_snapshot_cancelled = false; continue; } // Delete snapshots for partitions that are re-flashed after the update. LOG(INFO) << "Detected re-flashing of partition " << snapshot_name << "."; if (!DeleteSnapshot(lock, snapshot_name)) { // This is an error, but it is okay to leave the snapshot in the short term. // However, if all_snapshot_cancelled == false after exiting the loop, caller may // initiate merge for this unused snapshot, which is likely to fail. LOG(WARNING) << "Failed to delete snapshot for re-flashed partition " << snapshot_name; } } if (!all_snapshot_cancelled) return false; LOG(INFO) << "All partitions are re-flashed after update, removing all update states."; return true; } bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) { Loading Loading @@ -2089,5 +2170,27 @@ std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() { return AutoUnmountDevice::New(device_->GetMetadataDir()); } UpdateState SnapshotManager::InitiateMergeAndWait() { LOG(INFO) << "Waiting for any previous merge request to complete. " << "This can take up to several minutes."; auto state = ProcessUpdateState(); if (state == UpdateState::None) { LOG(INFO) << "Can't find any snapshot to merge."; return state; } if (state == UpdateState::Unverified) { if (!InitiateMerge()) { LOG(ERROR) << "Failed to initiate merge."; return state; } // All other states can be handled by ProcessUpdateState. LOG(INFO) << "Waiting for merge to complete. This can take up to several minutes."; state = ProcessUpdateState(); } LOG(INFO) << "Merge finished with state \"" << state << "\"."; return state; } } // namespace snapshot } // namespace android fs_mgr/libsnapshot/snapshot_test.cpp +188 −63 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ #include <iostream> #include <android-base/file.h> #include <android-base/logging.h> #include <android-base/properties.h> #include <android-base/strings.h> #include <android-base/unique_fd.h> Loading Loading @@ -56,6 +57,7 @@ using android::fs_mgr::GetPartitionGroupName; using android::fs_mgr::GetPartitionName; using android::fs_mgr::Interval; using android::fs_mgr::MetadataBuilder; using android::fs_mgr::SlotSuffixForSlotNumber; using chromeos_update_engine::DeltaArchiveManifest; using chromeos_update_engine::DynamicPartitionGroup; using chromeos_update_engine::PartitionUpdate; Loading Loading @@ -445,61 +447,6 @@ TEST_F(SnapshotTest, Merge) { ASSERT_EQ(test_string, buffer); } TEST_F(SnapshotTest, MergeCannotRemoveCow) { ASSERT_TRUE(AcquireLock()); static const uint64_t kDeviceSize = 1024 * 1024; SnapshotStatus status; status.set_name("test-snapshot"); status.set_device_size(kDeviceSize); status.set_snapshot_size(kDeviceSize); status.set_cow_file_size(kDeviceSize); ASSERT_TRUE(sm->CreateSnapshot(lock_.get(), &status)); ASSERT_TRUE(CreateCowImage("test-snapshot")); std::string base_device, cow_device, snap_device; ASSERT_TRUE(CreatePartition("base-device", kDeviceSize, &base_device)); ASSERT_TRUE(MapCowImage("test-snapshot", 10s, &cow_device)); ASSERT_TRUE(sm->MapSnapshot(lock_.get(), "test-snapshot", base_device, cow_device, 10s, &snap_device)); // Keep an open handle to the cow device. This should cause the merge to // be incomplete. auto cow_path = android::base::GetProperty("gsid.mapped_image.test-snapshot-cow-img", ""); unique_fd fd(open(cow_path.c_str(), O_RDONLY | O_CLOEXEC)); ASSERT_GE(fd, 0); // Release the lock. lock_ = nullptr; ASSERT_TRUE(sm->FinishedSnapshotWrites()); test_device->set_slot_suffix("_b"); ASSERT_TRUE(sm->InitiateMerge()); // COW cannot be removed due to open fd, so expect a soft failure. ASSERT_EQ(sm->ProcessUpdateState(), UpdateState::MergeNeedsReboot); // Release the handle to the COW device to fake a reboot. fd.reset(); // Wait 1s, otherwise DeleteSnapshotDevice may fail with EBUSY. sleep(1); // Forcefully delete the snapshot device, so it looks like we just rebooted. ASSERT_TRUE(DeleteSnapshotDevice("test-snapshot")); // Map snapshot should fail now, because we're in a merge-complete state. ASSERT_TRUE(AcquireLock()); ASSERT_TRUE(MapCowImage("test-snapshot", 10s, &cow_device)); ASSERT_FALSE(sm->MapSnapshot(lock_.get(), "test-snapshot", base_device, cow_device, 10s, &snap_device)); // Release everything and now the merge should complete. fd = {}; lock_ = nullptr; ASSERT_EQ(sm->ProcessUpdateState(), UpdateState::MergeCompleted); } TEST_F(SnapshotTest, FirstStageMountAndMerge) { ASSERT_TRUE(AcquireLock()); Loading Loading @@ -680,9 +627,9 @@ class SnapshotUpdateTest : public SnapshotTest { // Initialize source partition metadata using |manifest_|. src_ = MetadataBuilder::New(*opener_, "super", 0); ASSERT_TRUE(FillFakeMetadata(src_.get(), manifest_, "_a")); ASSERT_NE(nullptr, src_); // Add sys_b which is like system_other. auto partition = src_->AddPartition("sys_b", 0); ASSERT_TRUE(src_->AddGroup("group_b", kGroupSize)); auto partition = src_->AddPartition("sys_b", "group_b", 0); ASSERT_NE(nullptr, partition); ASSERT_TRUE(src_->ResizePartition(partition, 1_MiB)); auto metadata = src_->Export(); Loading Loading @@ -731,8 +678,12 @@ class SnapshotUpdateTest : public SnapshotTest { if (!hash.has_value()) { return AssertionFailure() << "Cannot read partition " << name << ": " << path; } if (hashes_[name] != *hash) { return AssertionFailure() << "Content of " << name << " has changed after the merge"; auto it = hashes_.find(name); if (it == hashes_.end()) { return AssertionFailure() << "No existing hash for " << name << ". Bad test code?"; } if (it->second != *hash) { return AssertionFailure() << "Content of " << name << " has changed"; } return AssertionSuccess(); } Loading Loading @@ -847,8 +798,7 @@ TEST_F(SnapshotUpdateTest, FullUpdateFlow) { } // Initiate the merge and wait for it to be completed. ASSERT_TRUE(init->InitiateMerge()); ASSERT_EQ(UpdateState::MergeCompleted, init->ProcessUpdateState()); ASSERT_EQ(UpdateState::MergeCompleted, init->InitiateMergeAndWait()); // Check that the target partitions have the same content after the merge. for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { Loading Loading @@ -1052,8 +1002,7 @@ TEST_F(SnapshotUpdateTest, ReclaimCow) { // Initiate the merge and wait for it to be completed. auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, "_b")); ASSERT_TRUE(new_sm->InitiateMerge()); ASSERT_EQ(UpdateState::MergeCompleted, new_sm->ProcessUpdateState()); ASSERT_EQ(UpdateState::MergeCompleted, new_sm->InitiateMergeAndWait()); // Execute the second update. ASSERT_TRUE(new_sm->BeginUpdate()); Loading Loading @@ -1162,6 +1111,67 @@ TEST_F(SnapshotUpdateTest, RetrofitAfterRegularAb) { ASSERT_TRUE(sm->FinishedSnapshotWrites()); } TEST_F(SnapshotUpdateTest, MergeCannotRemoveCow) { // Make source partitions as big as possible to force COW image to be created. SetSize(sys_, 5_MiB); SetSize(vnd_, 5_MiB); SetSize(prd_, 5_MiB); src_ = MetadataBuilder::New(*opener_, "super", 0); src_->RemoveGroupAndPartitions(group_->name() + "_a"); src_->RemoveGroupAndPartitions(group_->name() + "_b"); ASSERT_TRUE(FillFakeMetadata(src_.get(), manifest_, "_a")); auto metadata = src_->Export(); ASSERT_NE(nullptr, metadata); ASSERT_TRUE(UpdatePartitionTable(*opener_, "super", *metadata.get(), 0)); // OTA client blindly unmaps all partitions that are possibly mapped. for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { ASSERT_TRUE(sm->UnmapUpdateSnapshot(name)); } // Add operations for sys. The whole device is written. auto e = sys_->add_operations()->add_dst_extents(); e->set_start_block(0); e->set_num_blocks(GetSize(sys_) / manifest_.block_size()); // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); ASSERT_TRUE(sm->FinishedSnapshotWrites()); // Simulate shutting down the device. ASSERT_TRUE(UnmapAll()); // After reboot, init does first stage mount. // Normally we should use NewForFirstStageMount, but if so, "gsid.mapped_image.sys_b-cow-img" // won't be set. auto init = SnapshotManager::New(new TestDeviceInfo(fake_super, "_b")); ASSERT_NE(init, nullptr); ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super")); // Keep an open handle to the cow device. This should cause the merge to // be incomplete. auto cow_path = android::base::GetProperty("gsid.mapped_image.sys_b-cow-img", ""); unique_fd fd(open(cow_path.c_str(), O_RDONLY | O_CLOEXEC)); ASSERT_GE(fd, 0); // COW cannot be removed due to open fd, so expect a soft failure. ASSERT_EQ(UpdateState::MergeNeedsReboot, init->InitiateMergeAndWait()); // Simulate shutting down the device. fd.reset(); ASSERT_TRUE(UnmapAll()); // init does first stage mount again. ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super")); // sys_b should be mapped as a dm-linear device directly. ASSERT_FALSE(sm->IsSnapshotDevice("sys_b", nullptr)); // Merge should be able to complete now. ASSERT_EQ(UpdateState::MergeCompleted, init->InitiateMergeAndWait()); } class MetadataMountedTest : public SnapshotUpdateTest { public: void SetUp() override { Loading Loading @@ -1220,6 +1230,121 @@ TEST_F(MetadataMountedTest, Recovery) { EXPECT_FALSE(IsMetadataMounted()); } class FlashAfterUpdateTest : public SnapshotUpdateTest, public WithParamInterface<std::tuple<uint32_t, bool>> { public: AssertionResult InitiateMerge(const std::string& slot_suffix) { auto sm = SnapshotManager::New(new TestDeviceInfo(fake_super, slot_suffix)); if (!sm->CreateLogicalAndSnapshotPartitions("super")) { return AssertionFailure() << "Cannot CreateLogicalAndSnapshotPartitions"; } if (!sm->InitiateMerge()) { return AssertionFailure() << "Cannot initiate merge"; } return AssertionSuccess(); } }; TEST_P(FlashAfterUpdateTest, FlashSlotAfterUpdate) { // OTA client blindly unmaps all partitions that are possibly mapped. for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { ASSERT_TRUE(sm->UnmapUpdateSnapshot(name)); } // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); ASSERT_TRUE(sm->FinishedSnapshotWrites()); // Simulate shutting down the device. ASSERT_TRUE(UnmapAll()); if (std::get<1>(GetParam()) /* merge */) { ASSERT_TRUE(InitiateMerge("_b")); // Simulate shutting down the device after merge has initiated. ASSERT_TRUE(UnmapAll()); } auto flashed_slot = std::get<0>(GetParam()); auto flashed_slot_suffix = SlotSuffixForSlotNumber(flashed_slot); // Simulate flashing |flashed_slot|. This clears the UPDATED flag. auto flashed_builder = MetadataBuilder::New(*opener_, "super", flashed_slot); flashed_builder->RemoveGroupAndPartitions(group_->name() + flashed_slot_suffix); flashed_builder->RemoveGroupAndPartitions(kCowGroupName); ASSERT_TRUE(FillFakeMetadata(flashed_builder.get(), manifest_, flashed_slot_suffix)); // Deliberately remove a partition from this build so that // InitiateMerge do not switch state to "merging". This is possible in // practice because the list of dynamic partitions may change. ASSERT_NE(nullptr, flashed_builder->FindPartition("prd" + flashed_slot_suffix)); flashed_builder->RemovePartition("prd" + flashed_slot_suffix); auto flashed_metadata = flashed_builder->Export(); ASSERT_NE(nullptr, flashed_metadata); ASSERT_TRUE(UpdatePartitionTable(*opener_, "super", *flashed_metadata, flashed_slot)); std::string path; for (const auto& name : {"sys", "vnd"}) { ASSERT_TRUE(CreateLogicalPartition( CreateLogicalPartitionParams{ .block_device = fake_super, .metadata_slot = flashed_slot, .partition_name = name + flashed_slot_suffix, .timeout_ms = 1s, .partition_opener = opener_.get(), }, &path)); ASSERT_TRUE(WriteRandomData(path)); auto hash = GetHash(path); ASSERT_TRUE(hash.has_value()); hashes_[name + flashed_slot_suffix] = *hash; } // Simulate shutting down the device after flash. ASSERT_TRUE(UnmapAll()); // Simulate reboot. After reboot, init does first stage mount. auto init = SnapshotManager::NewForFirstStageMount( new TestDeviceInfo(fake_super, flashed_slot_suffix)); ASSERT_NE(init, nullptr); if (init->NeedSnapshotsInFirstStageMount()) { ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super")); } else { for (const auto& name : {"sys", "vnd"}) { ASSERT_TRUE(CreateLogicalPartition( CreateLogicalPartitionParams{ .block_device = fake_super, .metadata_slot = flashed_slot, .partition_name = name + flashed_slot_suffix, .timeout_ms = 1s, .partition_opener = opener_.get(), }, &path)); } } // Check that the target partitions have the same content. for (const auto& name : {"sys", "vnd"}) { ASSERT_TRUE(IsPartitionUnchanged(name + flashed_slot_suffix)); } // There should be no snapshot to merge. auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, flashed_slot_suffix)); ASSERT_EQ(UpdateState::Cancelled, new_sm->InitiateMergeAndWait()); // Next OTA calls CancelUpdate no matter what. ASSERT_TRUE(new_sm->CancelUpdate()); } INSTANTIATE_TEST_SUITE_P(, FlashAfterUpdateTest, Combine(Values(0, 1), Bool()), [](const TestParamInfo<FlashAfterUpdateTest::ParamType>& info) { return "Flash"s + (std::get<0>(info.param) ? "New"s : "Old"s) + "Slot"s + (std::get<1>(info.param) ? "After"s : "Before"s) + "Merge"s; }); } // namespace snapshot } // namespace android Loading fs_mgr/libsnapshot/snapshotctl.cpp +1 −14 Original line number Diff line number Diff line Loading @@ -60,24 +60,11 @@ bool MergeCmdHandler(int argc, char** argv) { android::base::InitLogging(argv, &android::base::StdioLogger); } auto sm = SnapshotManager::New(); auto state = SnapshotManager::New()->InitiateMergeAndWait(); auto state = sm->GetUpdateState(); if (state == UpdateState::None) { LOG(INFO) << "Can't find any snapshot to merge."; return true; } if (state == UpdateState::Unverified) { if (!sm->InitiateMerge()) { LOG(ERROR) << "Failed to initiate merge."; return false; } } // All other states can be handled by ProcessUpdateState. LOG(INFO) << "Waiting for any merge to complete. This can take up to 1 minute."; state = SnapshotManager::New()->ProcessUpdateState(); if (state == UpdateState::MergeCompleted) { auto end = std::chrono::steady_clock::now(); auto passed = std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count(); Loading Loading
fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +28 −2 Original line number Diff line number Diff line Loading @@ -154,6 +154,7 @@ class SnapshotManager final { // rebooting or after rolling back), or merge the OTA. bool FinishedSnapshotWrites(); private: // Initiate a merge on all snapshot devices. This should only be used after an // update has been marked successful after booting. bool InitiateMerge(); Loading Loading @@ -181,6 +182,15 @@ class SnapshotManager final { // GetUpdateState will return None, and a new update can begin. UpdateState ProcessUpdateState(); public: // Initiate the merge if necessary, then wait for the merge to finish. // See InitiateMerge() and ProcessUpdateState() for details. // Returns: // - None if no merge to initiate // - MergeCompleted if merge is completed // - other states indicating an error has occurred UpdateState InitiateMergeAndWait(); // Find the status of the current update, if any. // // |progress| depends on the returned status: Loading Loading @@ -238,12 +248,13 @@ class SnapshotManager final { FRIEND_TEST(SnapshotTest, MapPartialSnapshot); FRIEND_TEST(SnapshotTest, MapSnapshot); FRIEND_TEST(SnapshotTest, Merge); FRIEND_TEST(SnapshotTest, MergeCannotRemoveCow); FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot); FRIEND_TEST(SnapshotTest, UpdateBootControlHal); FRIEND_TEST(SnapshotUpdateTest, MergeCannotRemoveCow); FRIEND_TEST(SnapshotUpdateTest, SnapshotStatusFileWithoutCow); friend class SnapshotTest; friend class SnapshotUpdateTest; friend class FlashAfterUpdateTest; friend struct AutoDeleteCowImage; friend struct AutoDeleteSnapshot; friend struct PartitionCowCreator; Loading Loading @@ -341,6 +352,9 @@ class SnapshotManager final { // condition was detected and handled. bool HandleCancelledUpdate(LockedFile* lock); // Helper for HandleCancelledUpdate. Assumes booting from new slot. bool HandleCancelledUpdateOnNewSlot(LockedFile* lock); // Remove artifacts created by the update process, such as snapshots, and // set the update state to None. bool RemoveAllUpdateState(LockedFile* lock); Loading @@ -359,7 +373,19 @@ class SnapshotManager final { bool MarkSnapshotMergeCompleted(LockedFile* snapshot_lock, const std::string& snapshot_name); void AcknowledgeMergeSuccess(LockedFile* lock); void AcknowledgeMergeFailure(); bool IsCancelledSnapshot(const std::string& snapshot_name); std::unique_ptr<LpMetadata> ReadCurrentMetadata(); enum class MetadataPartitionState { // Partition does not exist. None, // Partition is flashed. Flashed, // Partition is created by OTA client. Updated, }; // Helper function to check the state of a partition as described in metadata. MetadataPartitionState GetMetadataPartitionState(const LpMetadata& metadata, const std::string& name); // Note that these require the name of the device containing the snapshot, // which may be the "inner" device. Use GetsnapshotDeviecName(). Loading
fs_mgr/libsnapshot/snapshot.cpp +111 −8 Original line number Diff line number Diff line Loading @@ -568,6 +568,27 @@ bool SnapshotManager::InitiateMerge() { } } auto metadata = ReadCurrentMetadata(); for (auto it = snapshots.begin(); it != snapshots.end();) { switch (GetMetadataPartitionState(*metadata, *it)) { case MetadataPartitionState::Flashed: LOG(WARNING) << "Detected re-flashing for partition " << *it << ". Skip merging it."; [[fallthrough]]; case MetadataPartitionState::None: { LOG(WARNING) << "Deleting snapshot for partition " << *it; if (!DeleteSnapshot(lock.get(), *it)) { LOG(WARNING) << "Cannot delete snapshot for partition " << *it << ". Skip merging it anyways."; } it = snapshots.erase(it); } break; case MetadataPartitionState::Updated: { ++it; } break; } } // Point of no return - mark that we're starting a merge. From now on every // snapshot must be a merge target. if (!WriteUpdateState(lock.get(), UpdateState::Merging)) { Loading Loading @@ -855,8 +876,15 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: std::string dm_name = GetSnapshotDeviceName(name, snapshot_status); std::unique_ptr<LpMetadata> current_metadata; if (!IsSnapshotDevice(dm_name)) { if (IsCancelledSnapshot(name)) { if (!current_metadata) { current_metadata = ReadCurrentMetadata(); } if (!current_metadata || GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) { DeleteSnapshot(lock, name); return UpdateState::Cancelled; } Loading @@ -877,7 +905,8 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std:: } // This check is expensive so it is only enabled for debugging. DCHECK(!IsCancelledSnapshot(name)); DCHECK((current_metadata = ReadCurrentMetadata()) && GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated); std::string target_type; DmTargetSnapshot::Status status; Loading Loading @@ -1106,13 +1135,17 @@ bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock) { if (device_->GetSlotSuffix() != old_slot) { // We're booted into the target slot, which means we just rebooted // after applying the update. if (!HandleCancelledUpdateOnNewSlot(lock)) { return false; } } // The only way we can get here is if: // (1) The device rolled back to the previous slot. // (2) This function was called prematurely before rebooting the device. // (3) fastboot set_active was used. // (4) The device updates to the new slot but re-flashed *all* partitions // in the new slot. // // In any case, delete the snapshots. It may be worth using the boot_control // HAL to differentiate case (2). Loading @@ -1120,18 +1153,66 @@ bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock) { return true; } bool SnapshotManager::IsCancelledSnapshot(const std::string& snapshot_name) { std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() { const auto& opener = device_->GetPartitionOpener(); uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix()); auto super_device = device_->GetSuperDevice(slot); auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot); if (!metadata) { LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device; return false; return nullptr; } return metadata; } SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState( const LpMetadata& metadata, const std::string& name) { auto partition = android::fs_mgr::FindPartition(metadata, name); if (!partition) return MetadataPartitionState::None; if (partition->attributes & LP_PARTITION_ATTR_UPDATED) { return MetadataPartitionState::Updated; } return MetadataPartitionState::Flashed; } bool SnapshotManager::HandleCancelledUpdateOnNewSlot(LockedFile* lock) { std::vector<std::string> snapshots; if (!ListSnapshots(lock, &snapshots)) { LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed " << "after applying an update. Assuming no snapshots."; // Let HandleCancelledUpdate resets UpdateState. return true; } auto partition = android::fs_mgr::FindPartition(*metadata.get(), snapshot_name); if (!partition) return false; return (partition->attributes & LP_PARTITION_ATTR_UPDATED) == 0; // Attempt to detect re-flashing on each partition. // - If all partitions are re-flashed, we can proceed to cancel the whole update. // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are // deleted. Caller is responsible for merging the rest of the snapshots. // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots. auto metadata = ReadCurrentMetadata(); if (!metadata) return false; bool all_snapshot_cancelled = true; for (const auto& snapshot_name : snapshots) { if (GetMetadataPartitionState(*metadata, snapshot_name) == MetadataPartitionState::Updated) { LOG(WARNING) << "Cannot cancel update because snapshot" << snapshot_name << " is in use."; all_snapshot_cancelled = false; continue; } // Delete snapshots for partitions that are re-flashed after the update. LOG(INFO) << "Detected re-flashing of partition " << snapshot_name << "."; if (!DeleteSnapshot(lock, snapshot_name)) { // This is an error, but it is okay to leave the snapshot in the short term. // However, if all_snapshot_cancelled == false after exiting the loop, caller may // initiate merge for this unused snapshot, which is likely to fail. LOG(WARNING) << "Failed to delete snapshot for re-flashed partition " << snapshot_name; } } if (!all_snapshot_cancelled) return false; LOG(INFO) << "All partitions are re-flashed after update, removing all update states."; return true; } bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) { Loading Loading @@ -2089,5 +2170,27 @@ std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() { return AutoUnmountDevice::New(device_->GetMetadataDir()); } UpdateState SnapshotManager::InitiateMergeAndWait() { LOG(INFO) << "Waiting for any previous merge request to complete. " << "This can take up to several minutes."; auto state = ProcessUpdateState(); if (state == UpdateState::None) { LOG(INFO) << "Can't find any snapshot to merge."; return state; } if (state == UpdateState::Unverified) { if (!InitiateMerge()) { LOG(ERROR) << "Failed to initiate merge."; return state; } // All other states can be handled by ProcessUpdateState. LOG(INFO) << "Waiting for merge to complete. This can take up to several minutes."; state = ProcessUpdateState(); } LOG(INFO) << "Merge finished with state \"" << state << "\"."; return state; } } // namespace snapshot } // namespace android
fs_mgr/libsnapshot/snapshot_test.cpp +188 −63 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ #include <iostream> #include <android-base/file.h> #include <android-base/logging.h> #include <android-base/properties.h> #include <android-base/strings.h> #include <android-base/unique_fd.h> Loading Loading @@ -56,6 +57,7 @@ using android::fs_mgr::GetPartitionGroupName; using android::fs_mgr::GetPartitionName; using android::fs_mgr::Interval; using android::fs_mgr::MetadataBuilder; using android::fs_mgr::SlotSuffixForSlotNumber; using chromeos_update_engine::DeltaArchiveManifest; using chromeos_update_engine::DynamicPartitionGroup; using chromeos_update_engine::PartitionUpdate; Loading Loading @@ -445,61 +447,6 @@ TEST_F(SnapshotTest, Merge) { ASSERT_EQ(test_string, buffer); } TEST_F(SnapshotTest, MergeCannotRemoveCow) { ASSERT_TRUE(AcquireLock()); static const uint64_t kDeviceSize = 1024 * 1024; SnapshotStatus status; status.set_name("test-snapshot"); status.set_device_size(kDeviceSize); status.set_snapshot_size(kDeviceSize); status.set_cow_file_size(kDeviceSize); ASSERT_TRUE(sm->CreateSnapshot(lock_.get(), &status)); ASSERT_TRUE(CreateCowImage("test-snapshot")); std::string base_device, cow_device, snap_device; ASSERT_TRUE(CreatePartition("base-device", kDeviceSize, &base_device)); ASSERT_TRUE(MapCowImage("test-snapshot", 10s, &cow_device)); ASSERT_TRUE(sm->MapSnapshot(lock_.get(), "test-snapshot", base_device, cow_device, 10s, &snap_device)); // Keep an open handle to the cow device. This should cause the merge to // be incomplete. auto cow_path = android::base::GetProperty("gsid.mapped_image.test-snapshot-cow-img", ""); unique_fd fd(open(cow_path.c_str(), O_RDONLY | O_CLOEXEC)); ASSERT_GE(fd, 0); // Release the lock. lock_ = nullptr; ASSERT_TRUE(sm->FinishedSnapshotWrites()); test_device->set_slot_suffix("_b"); ASSERT_TRUE(sm->InitiateMerge()); // COW cannot be removed due to open fd, so expect a soft failure. ASSERT_EQ(sm->ProcessUpdateState(), UpdateState::MergeNeedsReboot); // Release the handle to the COW device to fake a reboot. fd.reset(); // Wait 1s, otherwise DeleteSnapshotDevice may fail with EBUSY. sleep(1); // Forcefully delete the snapshot device, so it looks like we just rebooted. ASSERT_TRUE(DeleteSnapshotDevice("test-snapshot")); // Map snapshot should fail now, because we're in a merge-complete state. ASSERT_TRUE(AcquireLock()); ASSERT_TRUE(MapCowImage("test-snapshot", 10s, &cow_device)); ASSERT_FALSE(sm->MapSnapshot(lock_.get(), "test-snapshot", base_device, cow_device, 10s, &snap_device)); // Release everything and now the merge should complete. fd = {}; lock_ = nullptr; ASSERT_EQ(sm->ProcessUpdateState(), UpdateState::MergeCompleted); } TEST_F(SnapshotTest, FirstStageMountAndMerge) { ASSERT_TRUE(AcquireLock()); Loading Loading @@ -680,9 +627,9 @@ class SnapshotUpdateTest : public SnapshotTest { // Initialize source partition metadata using |manifest_|. src_ = MetadataBuilder::New(*opener_, "super", 0); ASSERT_TRUE(FillFakeMetadata(src_.get(), manifest_, "_a")); ASSERT_NE(nullptr, src_); // Add sys_b which is like system_other. auto partition = src_->AddPartition("sys_b", 0); ASSERT_TRUE(src_->AddGroup("group_b", kGroupSize)); auto partition = src_->AddPartition("sys_b", "group_b", 0); ASSERT_NE(nullptr, partition); ASSERT_TRUE(src_->ResizePartition(partition, 1_MiB)); auto metadata = src_->Export(); Loading Loading @@ -731,8 +678,12 @@ class SnapshotUpdateTest : public SnapshotTest { if (!hash.has_value()) { return AssertionFailure() << "Cannot read partition " << name << ": " << path; } if (hashes_[name] != *hash) { return AssertionFailure() << "Content of " << name << " has changed after the merge"; auto it = hashes_.find(name); if (it == hashes_.end()) { return AssertionFailure() << "No existing hash for " << name << ". Bad test code?"; } if (it->second != *hash) { return AssertionFailure() << "Content of " << name << " has changed"; } return AssertionSuccess(); } Loading Loading @@ -847,8 +798,7 @@ TEST_F(SnapshotUpdateTest, FullUpdateFlow) { } // Initiate the merge and wait for it to be completed. ASSERT_TRUE(init->InitiateMerge()); ASSERT_EQ(UpdateState::MergeCompleted, init->ProcessUpdateState()); ASSERT_EQ(UpdateState::MergeCompleted, init->InitiateMergeAndWait()); // Check that the target partitions have the same content after the merge. for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { Loading Loading @@ -1052,8 +1002,7 @@ TEST_F(SnapshotUpdateTest, ReclaimCow) { // Initiate the merge and wait for it to be completed. auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, "_b")); ASSERT_TRUE(new_sm->InitiateMerge()); ASSERT_EQ(UpdateState::MergeCompleted, new_sm->ProcessUpdateState()); ASSERT_EQ(UpdateState::MergeCompleted, new_sm->InitiateMergeAndWait()); // Execute the second update. ASSERT_TRUE(new_sm->BeginUpdate()); Loading Loading @@ -1162,6 +1111,67 @@ TEST_F(SnapshotUpdateTest, RetrofitAfterRegularAb) { ASSERT_TRUE(sm->FinishedSnapshotWrites()); } TEST_F(SnapshotUpdateTest, MergeCannotRemoveCow) { // Make source partitions as big as possible to force COW image to be created. SetSize(sys_, 5_MiB); SetSize(vnd_, 5_MiB); SetSize(prd_, 5_MiB); src_ = MetadataBuilder::New(*opener_, "super", 0); src_->RemoveGroupAndPartitions(group_->name() + "_a"); src_->RemoveGroupAndPartitions(group_->name() + "_b"); ASSERT_TRUE(FillFakeMetadata(src_.get(), manifest_, "_a")); auto metadata = src_->Export(); ASSERT_NE(nullptr, metadata); ASSERT_TRUE(UpdatePartitionTable(*opener_, "super", *metadata.get(), 0)); // OTA client blindly unmaps all partitions that are possibly mapped. for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { ASSERT_TRUE(sm->UnmapUpdateSnapshot(name)); } // Add operations for sys. The whole device is written. auto e = sys_->add_operations()->add_dst_extents(); e->set_start_block(0); e->set_num_blocks(GetSize(sys_) / manifest_.block_size()); // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); ASSERT_TRUE(sm->FinishedSnapshotWrites()); // Simulate shutting down the device. ASSERT_TRUE(UnmapAll()); // After reboot, init does first stage mount. // Normally we should use NewForFirstStageMount, but if so, "gsid.mapped_image.sys_b-cow-img" // won't be set. auto init = SnapshotManager::New(new TestDeviceInfo(fake_super, "_b")); ASSERT_NE(init, nullptr); ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super")); // Keep an open handle to the cow device. This should cause the merge to // be incomplete. auto cow_path = android::base::GetProperty("gsid.mapped_image.sys_b-cow-img", ""); unique_fd fd(open(cow_path.c_str(), O_RDONLY | O_CLOEXEC)); ASSERT_GE(fd, 0); // COW cannot be removed due to open fd, so expect a soft failure. ASSERT_EQ(UpdateState::MergeNeedsReboot, init->InitiateMergeAndWait()); // Simulate shutting down the device. fd.reset(); ASSERT_TRUE(UnmapAll()); // init does first stage mount again. ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super")); // sys_b should be mapped as a dm-linear device directly. ASSERT_FALSE(sm->IsSnapshotDevice("sys_b", nullptr)); // Merge should be able to complete now. ASSERT_EQ(UpdateState::MergeCompleted, init->InitiateMergeAndWait()); } class MetadataMountedTest : public SnapshotUpdateTest { public: void SetUp() override { Loading Loading @@ -1220,6 +1230,121 @@ TEST_F(MetadataMountedTest, Recovery) { EXPECT_FALSE(IsMetadataMounted()); } class FlashAfterUpdateTest : public SnapshotUpdateTest, public WithParamInterface<std::tuple<uint32_t, bool>> { public: AssertionResult InitiateMerge(const std::string& slot_suffix) { auto sm = SnapshotManager::New(new TestDeviceInfo(fake_super, slot_suffix)); if (!sm->CreateLogicalAndSnapshotPartitions("super")) { return AssertionFailure() << "Cannot CreateLogicalAndSnapshotPartitions"; } if (!sm->InitiateMerge()) { return AssertionFailure() << "Cannot initiate merge"; } return AssertionSuccess(); } }; TEST_P(FlashAfterUpdateTest, FlashSlotAfterUpdate) { // OTA client blindly unmaps all partitions that are possibly mapped. for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { ASSERT_TRUE(sm->UnmapUpdateSnapshot(name)); } // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); ASSERT_TRUE(sm->FinishedSnapshotWrites()); // Simulate shutting down the device. ASSERT_TRUE(UnmapAll()); if (std::get<1>(GetParam()) /* merge */) { ASSERT_TRUE(InitiateMerge("_b")); // Simulate shutting down the device after merge has initiated. ASSERT_TRUE(UnmapAll()); } auto flashed_slot = std::get<0>(GetParam()); auto flashed_slot_suffix = SlotSuffixForSlotNumber(flashed_slot); // Simulate flashing |flashed_slot|. This clears the UPDATED flag. auto flashed_builder = MetadataBuilder::New(*opener_, "super", flashed_slot); flashed_builder->RemoveGroupAndPartitions(group_->name() + flashed_slot_suffix); flashed_builder->RemoveGroupAndPartitions(kCowGroupName); ASSERT_TRUE(FillFakeMetadata(flashed_builder.get(), manifest_, flashed_slot_suffix)); // Deliberately remove a partition from this build so that // InitiateMerge do not switch state to "merging". This is possible in // practice because the list of dynamic partitions may change. ASSERT_NE(nullptr, flashed_builder->FindPartition("prd" + flashed_slot_suffix)); flashed_builder->RemovePartition("prd" + flashed_slot_suffix); auto flashed_metadata = flashed_builder->Export(); ASSERT_NE(nullptr, flashed_metadata); ASSERT_TRUE(UpdatePartitionTable(*opener_, "super", *flashed_metadata, flashed_slot)); std::string path; for (const auto& name : {"sys", "vnd"}) { ASSERT_TRUE(CreateLogicalPartition( CreateLogicalPartitionParams{ .block_device = fake_super, .metadata_slot = flashed_slot, .partition_name = name + flashed_slot_suffix, .timeout_ms = 1s, .partition_opener = opener_.get(), }, &path)); ASSERT_TRUE(WriteRandomData(path)); auto hash = GetHash(path); ASSERT_TRUE(hash.has_value()); hashes_[name + flashed_slot_suffix] = *hash; } // Simulate shutting down the device after flash. ASSERT_TRUE(UnmapAll()); // Simulate reboot. After reboot, init does first stage mount. auto init = SnapshotManager::NewForFirstStageMount( new TestDeviceInfo(fake_super, flashed_slot_suffix)); ASSERT_NE(init, nullptr); if (init->NeedSnapshotsInFirstStageMount()) { ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super")); } else { for (const auto& name : {"sys", "vnd"}) { ASSERT_TRUE(CreateLogicalPartition( CreateLogicalPartitionParams{ .block_device = fake_super, .metadata_slot = flashed_slot, .partition_name = name + flashed_slot_suffix, .timeout_ms = 1s, .partition_opener = opener_.get(), }, &path)); } } // Check that the target partitions have the same content. for (const auto& name : {"sys", "vnd"}) { ASSERT_TRUE(IsPartitionUnchanged(name + flashed_slot_suffix)); } // There should be no snapshot to merge. auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, flashed_slot_suffix)); ASSERT_EQ(UpdateState::Cancelled, new_sm->InitiateMergeAndWait()); // Next OTA calls CancelUpdate no matter what. ASSERT_TRUE(new_sm->CancelUpdate()); } INSTANTIATE_TEST_SUITE_P(, FlashAfterUpdateTest, Combine(Values(0, 1), Bool()), [](const TestParamInfo<FlashAfterUpdateTest::ParamType>& info) { return "Flash"s + (std::get<0>(info.param) ? "New"s : "Old"s) + "Slot"s + (std::get<1>(info.param) ? "After"s : "Before"s) + "Merge"s; }); } // namespace snapshot } // namespace android Loading
fs_mgr/libsnapshot/snapshotctl.cpp +1 −14 Original line number Diff line number Diff line Loading @@ -60,24 +60,11 @@ bool MergeCmdHandler(int argc, char** argv) { android::base::InitLogging(argv, &android::base::StdioLogger); } auto sm = SnapshotManager::New(); auto state = SnapshotManager::New()->InitiateMergeAndWait(); auto state = sm->GetUpdateState(); if (state == UpdateState::None) { LOG(INFO) << "Can't find any snapshot to merge."; return true; } if (state == UpdateState::Unverified) { if (!sm->InitiateMerge()) { LOG(ERROR) << "Failed to initiate merge."; return false; } } // All other states can be handled by ProcessUpdateState. LOG(INFO) << "Waiting for any merge to complete. This can take up to 1 minute."; state = SnapshotManager::New()->ProcessUpdateState(); if (state == UpdateState::MergeCompleted) { auto end = std::chrono::steady_clock::now(); auto passed = std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count(); Loading