Loading fs_mgr/libsnapshot/cow_snapuserd_test.cpp +2 −6 Original line number Original line Diff line number Diff line Loading @@ -239,16 +239,12 @@ void CowSnapuserdTest::CreateCowDevice() { ASSERT_TRUE(rnd_fd > 0); ASSERT_TRUE(rnd_fd > 0); std::unique_ptr<uint8_t[]> random_buffer_1_ = std::make_unique<uint8_t[]>(size_); std::unique_ptr<uint8_t[]> random_buffer_1_ = std::make_unique<uint8_t[]>(size_); std::unique_ptr<uint8_t[]> random_buffer_2_ = std::make_unique<uint8_t[]>(size_); // Fill random data // Fill random data for (size_t j = 0; j < (size_ / 1_MiB); j++) { for (size_t j = 0; j < (size_ / 1_MiB); j++) { ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0), ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0), true); true); ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_2_.get() + offset, 1_MiB, 0), true); offset += 1_MiB; offset += 1_MiB; } } Loading Loading @@ -280,7 +276,7 @@ void CowSnapuserdTest::CreateCowDevice() { size_t blk_random2_replace_start = blk_zero_copy_end; size_t blk_random2_replace_start = blk_zero_copy_end; ASSERT_TRUE(writer.AddRawBlocks(blk_random2_replace_start, random_buffer_2_.get(), size_)); ASSERT_TRUE(writer.AddRawBlocks(blk_random2_replace_start, random_buffer_1_.get(), size_)); // Flush operations // Flush operations ASSERT_TRUE(writer.Finalize()); ASSERT_TRUE(writer.Finalize()); Loading @@ -292,7 +288,7 @@ void CowSnapuserdTest::CreateCowDevice() { ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), size_, size_), true); ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), size_, size_), true); memcpy((char*)orig_buffer_.get() + size_, random_buffer_1_.get(), size_); memcpy((char*)orig_buffer_.get() + size_, random_buffer_1_.get(), size_); memcpy((char*)orig_buffer_.get() + (size_ * 2), (void*)zero_buffer.c_str(), size_); memcpy((char*)orig_buffer_.get() + (size_ * 2), (void*)zero_buffer.c_str(), size_); memcpy((char*)orig_buffer_.get() + (size_ * 3), random_buffer_2_.get(), size_); memcpy((char*)orig_buffer_.get() + (size_ * 3), random_buffer_1_.get(), size_); } } void CowSnapuserdTest::InitCowDevice() { void CowSnapuserdTest::InitCowDevice() { Loading fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h +2 −0 Original line number Original line Diff line number Diff line Loading @@ -50,6 +50,8 @@ static constexpr uint32_t CHUNK_SHIFT = (__builtin_ffs(CHUNK_SIZE) - 1); static constexpr uint32_t BLOCK_SIZE = 4096; static constexpr uint32_t BLOCK_SIZE = 4096; static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SIZE) - 1); static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SIZE) - 1); #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) // This structure represents the kernel COW header. // This structure represents the kernel COW header. // All the below fields should be in Little Endian format. // All the below fields should be in Little Endian format. struct disk_header { struct disk_header { Loading fs_mgr/libsnapshot/snapuserd.cpp +230 −180 Original line number Original line Diff line number Diff line Loading @@ -129,88 +129,126 @@ bool Snapuserd::ProcessZeroOp() { return true; return true; } } /* bool Snapuserd::ProcessCowOp(const CowOperation* cow_op) { * Read the data of size bytes from a given chunk. * * Kernel can potentially merge the blocks if the * successive chunks are contiguous. For chunk size of 8, * there can be 256 disk exceptions; and if * all 256 disk exceptions are contiguous, kernel can merge * them into a single IO. * * Since each chunk in the disk exception * mapping represents a 4k block, kernel can potentially * issue 256*4k = 1M IO in one shot. * * Even though kernel assumes that the blocks are * contiguous, we need to split the 1M IO into 4k chunks * as each operation represents 4k and it can either be: * * 1: Replace operation * 2: Copy operation * 3: Zero operation * */ bool Snapuserd::ReadData(chunk_t chunk, size_t size) { size_t read_size = size; bool ret = true; chunk_t chunk_key = chunk; if (!((read_size & (BLOCK_SIZE - 1)) == 0)) { SNAP_LOG(ERROR) << "ReadData - unaligned read_size: " << read_size; return false; } while (read_size > 0) { const CowOperation* cow_op = chunk_map_[chunk_key]; CHECK(cow_op != nullptr); CHECK(cow_op != nullptr); switch (cow_op->type) { switch (cow_op->type) { case kCowReplaceOp: { case kCowReplaceOp: { ret = ProcessReplaceOp(cow_op); return ProcessReplaceOp(cow_op); break; } } case kCowZeroOp: { case kCowZeroOp: { ret = ProcessZeroOp(); return ProcessZeroOp(); break; } } case kCowCopyOp: { case kCowCopyOp: { ret = ProcessCopyOp(cow_op); return ProcessCopyOp(cow_op); break; } } default: { default: { SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type; SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type; ret = false; break; } } } } if (!ret) { SNAP_LOG(ERROR) << "ReadData failed for operation: " << cow_op->type; return false; return false; } } // Update the buffer offset int Snapuserd::ReadUnalignedSector(sector_t sector, size_t size, bufsink_.UpdateBufferOffset(BLOCK_SIZE); std::map<sector_t, const CowOperation*>::iterator& it) { size_t skip_sector_size = 0; read_size -= BLOCK_SIZE; SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size << " Aligned sector: " << it->second; // Start iterating the chunk incrementally; Since while if (!ProcessCowOp(it->second)) { // constructing the metadata, we know that the chunk IDs SNAP_LOG(ERROR) << "ReadUnalignedSector: " << sector << " failed"; // are contiguous return -1; chunk_key += 1; } if (cow_op->type == kCowCopyOp) { int num_sectors_skip = sector - it->first; CHECK(read_size == 0); if (num_sectors_skip > 0) { skip_sector_size = num_sectors_skip << SECTOR_SHIFT; char* buffer = reinterpret_cast<char*>(bufsink_.GetBufPtr()); struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0])); memmove(msg->payload.buf, (char*)msg->payload.buf + skip_sector_size, (BLOCK_SIZE - skip_sector_size)); } } bufsink_.ResetBufferOffset(); return std::min(size, (BLOCK_SIZE - skip_sector_size)); } /* * Read the data for a given COW Operation. * * Kernel can issue IO at a sector granularity. * Hence, an IO may end up with reading partial * data from a COW operation or we may also * end up with interspersed request between * two COW operations. * */ int Snapuserd::ReadData(sector_t sector, size_t size) { /* * chunk_map stores COW operation at 4k granularity. * If the requested IO with the sector falls on the 4k * boundary, then we can read the COW op directly without * any issue. * * However, if the requested sector is not 4K aligned, * then we will have the find the nearest COW operation * and chop the 4K block to fetch the requested sector. */ std::map<sector_t, const CowOperation*>::iterator it = chunk_map_.find(sector); if (it == chunk_map_.end()) { it = chunk_map_.lower_bound(sector); if (it != chunk_map_.begin()) { --it; } /* * If the IO is spanned between two COW operations, * split the IO into two parts: * * 1: Read the first part from the single COW op * 2: Read the second part from the next COW op. * * Ex: Let's say we have a 1024 Bytes IO request. * * 0 COW OP-1 4096 COW OP-2 8192 * |******************|*******************| * |*****|*****| * 3584 4608 * <- 1024B - > * * We have two COW operations which are 4k blocks. * The IO is requested for 1024 Bytes which are spanned * between two COW operations. We will split this IO * into two parts: * * 1: IO of size 512B from offset 3584 bytes (COW OP-1) * 2: IO of size 512B from offset 4096 bytes (COW OP-2) */ return ReadUnalignedSector(sector, size, it); } int num_ops = DIV_ROUND_UP(size, BLOCK_SIZE); while (num_ops) { if (!ProcessCowOp(it->second)) { return -1; } num_ops -= 1; it++; // Update the buffer offset bufsink_.UpdateBufferOffset(BLOCK_SIZE); SNAP_LOG(DEBUG) << "ReadData at sector: " << sector << " size: " << size; } } // Reset the buffer offset // Reset the buffer offset bufsink_.ResetBufferOffset(); bufsink_.ResetBufferOffset(); return ret; return size; } } /* /* Loading Loading @@ -261,9 +299,7 @@ bool Snapuserd::ReadDiskExceptions(chunk_t chunk, size_t read_size) { if (divresult.quot < vec_.size()) { if (divresult.quot < vec_.size()) { size = exceptions_per_area_ * sizeof(struct disk_exception); size = exceptions_per_area_ * sizeof(struct disk_exception); if (read_size > size) { CHECK(read_size == size); return false; } void* buffer = bufsink_.GetPayloadBuffer(size); void* buffer = bufsink_.GetPayloadBuffer(size); CHECK(buffer != nullptr); CHECK(buffer != nullptr); Loading Loading @@ -329,7 +365,7 @@ int Snapuserd::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, if (cow_de->new_chunk != 0) { if (cow_de->new_chunk != 0) { merged_ops_cur_iter += 1; merged_ops_cur_iter += 1; offset += sizeof(struct disk_exception); offset += sizeof(struct disk_exception); const CowOperation* cow_op = chunk_map_[cow_de->new_chunk]; const CowOperation* cow_op = chunk_map_[ChunkToSector(cow_de->new_chunk)]; CHECK(cow_op != nullptr); CHECK(cow_op != nullptr); CHECK(cow_op->new_block == cow_de->old_chunk); CHECK(cow_op->new_block == cow_de->old_chunk); if (cow_op->type == kCowCopyOp) { if (cow_op->type == kCowCopyOp) { Loading Loading @@ -563,7 +599,7 @@ bool Snapuserd::ReadMetadata() { SNAP_LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk; SNAP_LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk; // Store operation pointer. // Store operation pointer. chunk_map_[data_chunk_id] = cow_op; chunk_map_[ChunkToSector(data_chunk_id)] = cow_op; num_ops += 1; num_ops += 1; offset += sizeof(struct disk_exception); offset += sizeof(struct disk_exception); cowop_riter_->Next(); cowop_riter_->Next(); Loading Loading @@ -680,29 +716,77 @@ bool Snapuserd::InitBackingAndControlDevice() { return true; return true; } } bool Snapuserd::Run() { bool Snapuserd::DmuserWriteRequest() { struct dm_user_header* header = bufsink_.GetHeaderPtr(); struct dm_user_header* header = bufsink_.GetHeaderPtr(); bufsink_.Clear(); // device mapper has the capability to allow // targets to flush the cache when writes are completed. This // is controlled by each target by a flag "flush_supported". // This flag is set by dm-user. When flush is supported, // a number of zero-length bio's will be submitted to // the target for the purpose of flushing cache. It is the // responsibility of the target driver - which is dm-user in this // case, to remap these bio's to the underlying device. Since, // there is no underlying device for dm-user, this zero length // bio's gets routed to daemon. // // Flush operations are generated post merge by dm-snap by having // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything // to flush per se; hence, just respond back with a success message. if (header->sector == 0) { CHECK(header->len == 0); header->type = DM_USER_RESP_SUCCESS; if (!WriteDmUserPayload(0)) { return false; } return true; } if (!ReadDmUserHeader()) { size_t remaining_size = header->len; SNAP_LOG(ERROR) << "ReadDmUserHeader failed"; size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); CHECK(read_size == BLOCK_SIZE); CHECK(header->sector > 0); chunk_t chunk = SectorToChunk(header->sector); CHECK(chunk_map_.find(header->sector) == chunk_map_.end()); void* buffer = bufsink_.GetPayloadBuffer(read_size); CHECK(buffer != nullptr); header->type = DM_USER_RESP_SUCCESS; if (!ReadDmUserPayload(buffer, read_size)) { SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk << "Sector: " << header->sector; header->type = DM_USER_RESP_ERROR; } if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) { SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk << "Sector: " << header->sector; header->type = DM_USER_RESP_ERROR; } else { SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk << "Sector: " << header->sector; } if (!WriteDmUserPayload(0)) { return false; return false; } } SNAP_LOG(DEBUG) << "msg->seq: " << std::hex << header->seq; return true; SNAP_LOG(DEBUG) << "msg->type: " << std::hex << header->type; } SNAP_LOG(DEBUG) << "msg->flags: " << std::hex << header->flags; SNAP_LOG(DEBUG) << "msg->sector: " << std::hex << header->sector; SNAP_LOG(DEBUG) << "msg->len: " << std::hex << header->len; switch (header->type) { bool Snapuserd::DmuserReadRequest() { case DM_USER_REQ_MAP_READ: { struct dm_user_header* header = bufsink_.GetHeaderPtr(); size_t remaining_size = header->len; size_t remaining_size = header->len; loff_t offset = 0; loff_t offset = 0; sector_t sector = header->sector; do { do { size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); int ret = read_size; header->type = DM_USER_RESP_SUCCESS; header->type = DM_USER_RESP_SUCCESS; chunk_t chunk = SectorToChunk(header->sector); // Request to sector 0 is always for kernel // Request to sector 0 is always for kernel // representation of COW header. This IO should be only // representation of COW header. This IO should be only Loading @@ -715,14 +799,8 @@ bool Snapuserd::Run() { ConstructKernelCowHeader(); ConstructKernelCowHeader(); SNAP_LOG(DEBUG) << "Kernel header constructed"; SNAP_LOG(DEBUG) << "Kernel header constructed"; } else { } else { // Convert the sector number to a chunk ID. if (!offset && (read_size == BLOCK_SIZE) && // chunk_map_.find(header->sector) == chunk_map_.end()) { // Check if the chunk ID represents a metadata // page. If the chunk ID is not found in the // vector, then it points to a metadata page. chunk_t chunk = SectorToChunk(header->sector); if (chunk_map_.find(chunk) == chunk_map_.end()) { if (!ReadDiskExceptions(chunk, read_size)) { if (!ReadDiskExceptions(chunk, read_size)) { SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk << "Sector: " << header->sector; << "Sector: " << header->sector; Loading @@ -732,10 +810,9 @@ bool Snapuserd::Run() { << "Sector: " << header->sector; << "Sector: " << header->sector; } } } else { } else { SNAP_LOG(DEBUG) << "ReadData: chunk: " << chunk << " len: " << header->len chunk_t num_sectors_read = (offset >> SECTOR_SHIFT); << " read_size: " << read_size << " offset: " << offset; ret = ReadData(sector + num_sectors_read, read_size); chunk_t num_chunks_read = (offset >> BLOCK_SHIFT); if (ret < 0) { if (!ReadData(chunk + num_chunks_read, read_size)) { SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk << "Sector: " << header->sector; << "Sector: " << header->sector; header->type = DM_USER_RESP_ERROR; header->type = DM_USER_RESP_ERROR; Loading @@ -748,72 +825,45 @@ bool Snapuserd::Run() { // Daemon will not be terminated if there is any error. We will // Daemon will not be terminated if there is any error. We will // just send the error back to dm-user. // just send the error back to dm-user. if (!WriteDmUserPayload(read_size)) { if (!WriteDmUserPayload(ret)) { return false; return false; } } remaining_size -= read_size; remaining_size -= ret; offset += read_size; offset += ret; } while (remaining_size); } while (remaining_size > 0); break; return true; } } case DM_USER_REQ_MAP_WRITE: { bool Snapuserd::Run() { // device mapper has the capability to allow struct dm_user_header* header = bufsink_.GetHeaderPtr(); // targets to flush the cache when writes are completed. This // is controlled by each target by a flag "flush_supported". // This flag is set by dm-user. When flush is supported, // a number of zero-length bio's will be submitted to // the target for the purpose of flushing cache. It is the // responsibility of the target driver - which is dm-user in this // case, to remap these bio's to the underlying device. Since, // there is no underlying device for dm-user, this zero length // bio's gets routed to daemon. // // Flush operations are generated post merge by dm-snap by having // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything // to flush per se; hence, just respond back with a success message. if (header->sector == 0) { CHECK(header->len == 0); header->type = DM_USER_RESP_SUCCESS; if (!WriteDmUserPayload(0)) { return false; } break; } size_t remaining_size = header->len; bufsink_.Clear(); size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); CHECK(read_size == BLOCK_SIZE); CHECK(header->sector > 0); if (!ReadDmUserHeader()) { chunk_t chunk = SectorToChunk(header->sector); SNAP_LOG(ERROR) << "ReadDmUserHeader failed"; CHECK(chunk_map_.find(chunk) == chunk_map_.end()); return false; } void* buffer = bufsink_.GetPayloadBuffer(read_size); SNAP_LOG(DEBUG) << "msg->seq: " << std::hex << header->seq; CHECK(buffer != nullptr); SNAP_LOG(DEBUG) << "msg->type: " << std::hex << header->type; header->type = DM_USER_RESP_SUCCESS; SNAP_LOG(DEBUG) << "msg->flags: " << std::hex << header->flags; SNAP_LOG(DEBUG) << "msg->sector: " << std::hex << header->sector; SNAP_LOG(DEBUG) << "msg->len: " << std::hex << header->len; if (!ReadDmUserPayload(buffer, read_size)) { switch (header->type) { SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk case DM_USER_REQ_MAP_READ: { << "Sector: " << header->sector; if (!DmuserReadRequest()) { header->type = DM_USER_RESP_ERROR; return false; } } break; if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) { SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk << "Sector: " << header->sector; header->type = DM_USER_RESP_ERROR; } else { SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk << "Sector: " << header->sector; } } if (!WriteDmUserPayload(0)) { case DM_USER_REQ_MAP_WRITE: { if (!DmuserWriteRequest()) { return false; return false; } } break; break; } } } } Loading fs_mgr/libsnapshot/snapuserd.h +17 −4 Original line number Original line Diff line number Diff line Loading @@ -22,9 +22,9 @@ #include <cstring> #include <cstring> #include <iostream> #include <iostream> #include <limits> #include <limits> #include <map> #include <string> #include <string> #include <thread> #include <thread> #include <unordered_map> #include <vector> #include <vector> #include <android-base/file.h> #include <android-base/file.h> Loading Loading @@ -72,6 +72,9 @@ class Snapuserd final { bool IsAttached() const { return ctrl_fd_ >= 0; } bool IsAttached() const { return ctrl_fd_ >= 0; } private: private: bool DmuserReadRequest(); bool DmuserWriteRequest(); bool ReadDmUserHeader(); bool ReadDmUserHeader(); bool ReadDmUserPayload(void* buffer, size_t size); bool ReadDmUserPayload(void* buffer, size_t size); bool WriteDmUserPayload(size_t size); bool WriteDmUserPayload(size_t size); Loading @@ -79,10 +82,13 @@ class Snapuserd final { bool ReadMetadata(); bool ReadMetadata(); bool ZerofillDiskExceptions(size_t read_size); bool ZerofillDiskExceptions(size_t read_size); bool ReadDiskExceptions(chunk_t chunk, size_t size); bool ReadDiskExceptions(chunk_t chunk, size_t size); bool ReadData(chunk_t chunk, size_t size); int ReadUnalignedSector(sector_t sector, size_t size, std::map<sector_t, const CowOperation*>::iterator& it); int ReadData(sector_t sector, size_t size); bool IsChunkIdMetadata(chunk_t chunk); bool IsChunkIdMetadata(chunk_t chunk); chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); bool ProcessCowOp(const CowOperation* cow_op); bool ProcessReplaceOp(const CowOperation* cow_op); bool ProcessReplaceOp(const CowOperation* cow_op); bool ProcessCopyOp(const CowOperation* cow_op); bool ProcessCopyOp(const CowOperation* cow_op); bool ProcessZeroOp(); bool ProcessZeroOp(); Loading @@ -94,6 +100,7 @@ class Snapuserd final { bool ProcessMergeComplete(chunk_t chunk, void* buffer); bool ProcessMergeComplete(chunk_t chunk, void* buffer); sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SIZE - 1)) == 0); } std::string cow_device_; std::string cow_device_; std::string backing_store_device_; std::string backing_store_device_; Loading @@ -116,9 +123,15 @@ class Snapuserd final { // mapping of old-chunk to new-chunk // mapping of old-chunk to new-chunk std::vector<std::unique_ptr<uint8_t[]>> vec_; std::vector<std::unique_ptr<uint8_t[]>> vec_; // Key - Chunk ID // Key - Sector // Value - cow operation // Value - cow operation std::unordered_map<chunk_t, const CowOperation*> chunk_map_; // // chunk_map stores the pseudo mapping of sector // to COW operations. Each COW op is 4k; however, // we can get a read request which are as small // as 512 bytes. Hence, we need to binary search // in the chunk_map to find the nearest COW op. std::map<sector_t, const CowOperation*> chunk_map_; bool metadata_read_done_ = false; bool metadata_read_done_ = false; BufferSink bufsink_; BufferSink bufsink_; Loading Loading
fs_mgr/libsnapshot/cow_snapuserd_test.cpp +2 −6 Original line number Original line Diff line number Diff line Loading @@ -239,16 +239,12 @@ void CowSnapuserdTest::CreateCowDevice() { ASSERT_TRUE(rnd_fd > 0); ASSERT_TRUE(rnd_fd > 0); std::unique_ptr<uint8_t[]> random_buffer_1_ = std::make_unique<uint8_t[]>(size_); std::unique_ptr<uint8_t[]> random_buffer_1_ = std::make_unique<uint8_t[]>(size_); std::unique_ptr<uint8_t[]> random_buffer_2_ = std::make_unique<uint8_t[]>(size_); // Fill random data // Fill random data for (size_t j = 0; j < (size_ / 1_MiB); j++) { for (size_t j = 0; j < (size_ / 1_MiB); j++) { ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0), ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0), true); true); ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_2_.get() + offset, 1_MiB, 0), true); offset += 1_MiB; offset += 1_MiB; } } Loading Loading @@ -280,7 +276,7 @@ void CowSnapuserdTest::CreateCowDevice() { size_t blk_random2_replace_start = blk_zero_copy_end; size_t blk_random2_replace_start = blk_zero_copy_end; ASSERT_TRUE(writer.AddRawBlocks(blk_random2_replace_start, random_buffer_2_.get(), size_)); ASSERT_TRUE(writer.AddRawBlocks(blk_random2_replace_start, random_buffer_1_.get(), size_)); // Flush operations // Flush operations ASSERT_TRUE(writer.Finalize()); ASSERT_TRUE(writer.Finalize()); Loading @@ -292,7 +288,7 @@ void CowSnapuserdTest::CreateCowDevice() { ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), size_, size_), true); ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), size_, size_), true); memcpy((char*)orig_buffer_.get() + size_, random_buffer_1_.get(), size_); memcpy((char*)orig_buffer_.get() + size_, random_buffer_1_.get(), size_); memcpy((char*)orig_buffer_.get() + (size_ * 2), (void*)zero_buffer.c_str(), size_); memcpy((char*)orig_buffer_.get() + (size_ * 2), (void*)zero_buffer.c_str(), size_); memcpy((char*)orig_buffer_.get() + (size_ * 3), random_buffer_2_.get(), size_); memcpy((char*)orig_buffer_.get() + (size_ * 3), random_buffer_1_.get(), size_); } } void CowSnapuserdTest::InitCowDevice() { void CowSnapuserdTest::InitCowDevice() { Loading
fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h +2 −0 Original line number Original line Diff line number Diff line Loading @@ -50,6 +50,8 @@ static constexpr uint32_t CHUNK_SHIFT = (__builtin_ffs(CHUNK_SIZE) - 1); static constexpr uint32_t BLOCK_SIZE = 4096; static constexpr uint32_t BLOCK_SIZE = 4096; static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SIZE) - 1); static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SIZE) - 1); #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) // This structure represents the kernel COW header. // This structure represents the kernel COW header. // All the below fields should be in Little Endian format. // All the below fields should be in Little Endian format. struct disk_header { struct disk_header { Loading
fs_mgr/libsnapshot/snapuserd.cpp +230 −180 Original line number Original line Diff line number Diff line Loading @@ -129,88 +129,126 @@ bool Snapuserd::ProcessZeroOp() { return true; return true; } } /* bool Snapuserd::ProcessCowOp(const CowOperation* cow_op) { * Read the data of size bytes from a given chunk. * * Kernel can potentially merge the blocks if the * successive chunks are contiguous. For chunk size of 8, * there can be 256 disk exceptions; and if * all 256 disk exceptions are contiguous, kernel can merge * them into a single IO. * * Since each chunk in the disk exception * mapping represents a 4k block, kernel can potentially * issue 256*4k = 1M IO in one shot. * * Even though kernel assumes that the blocks are * contiguous, we need to split the 1M IO into 4k chunks * as each operation represents 4k and it can either be: * * 1: Replace operation * 2: Copy operation * 3: Zero operation * */ bool Snapuserd::ReadData(chunk_t chunk, size_t size) { size_t read_size = size; bool ret = true; chunk_t chunk_key = chunk; if (!((read_size & (BLOCK_SIZE - 1)) == 0)) { SNAP_LOG(ERROR) << "ReadData - unaligned read_size: " << read_size; return false; } while (read_size > 0) { const CowOperation* cow_op = chunk_map_[chunk_key]; CHECK(cow_op != nullptr); CHECK(cow_op != nullptr); switch (cow_op->type) { switch (cow_op->type) { case kCowReplaceOp: { case kCowReplaceOp: { ret = ProcessReplaceOp(cow_op); return ProcessReplaceOp(cow_op); break; } } case kCowZeroOp: { case kCowZeroOp: { ret = ProcessZeroOp(); return ProcessZeroOp(); break; } } case kCowCopyOp: { case kCowCopyOp: { ret = ProcessCopyOp(cow_op); return ProcessCopyOp(cow_op); break; } } default: { default: { SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type; SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type; ret = false; break; } } } } if (!ret) { SNAP_LOG(ERROR) << "ReadData failed for operation: " << cow_op->type; return false; return false; } } // Update the buffer offset int Snapuserd::ReadUnalignedSector(sector_t sector, size_t size, bufsink_.UpdateBufferOffset(BLOCK_SIZE); std::map<sector_t, const CowOperation*>::iterator& it) { size_t skip_sector_size = 0; read_size -= BLOCK_SIZE; SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size << " Aligned sector: " << it->second; // Start iterating the chunk incrementally; Since while if (!ProcessCowOp(it->second)) { // constructing the metadata, we know that the chunk IDs SNAP_LOG(ERROR) << "ReadUnalignedSector: " << sector << " failed"; // are contiguous return -1; chunk_key += 1; } if (cow_op->type == kCowCopyOp) { int num_sectors_skip = sector - it->first; CHECK(read_size == 0); if (num_sectors_skip > 0) { skip_sector_size = num_sectors_skip << SECTOR_SHIFT; char* buffer = reinterpret_cast<char*>(bufsink_.GetBufPtr()); struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0])); memmove(msg->payload.buf, (char*)msg->payload.buf + skip_sector_size, (BLOCK_SIZE - skip_sector_size)); } } bufsink_.ResetBufferOffset(); return std::min(size, (BLOCK_SIZE - skip_sector_size)); } /* * Read the data for a given COW Operation. * * Kernel can issue IO at a sector granularity. * Hence, an IO may end up with reading partial * data from a COW operation or we may also * end up with interspersed request between * two COW operations. * */ int Snapuserd::ReadData(sector_t sector, size_t size) { /* * chunk_map stores COW operation at 4k granularity. * If the requested IO with the sector falls on the 4k * boundary, then we can read the COW op directly without * any issue. * * However, if the requested sector is not 4K aligned, * then we will have the find the nearest COW operation * and chop the 4K block to fetch the requested sector. */ std::map<sector_t, const CowOperation*>::iterator it = chunk_map_.find(sector); if (it == chunk_map_.end()) { it = chunk_map_.lower_bound(sector); if (it != chunk_map_.begin()) { --it; } /* * If the IO is spanned between two COW operations, * split the IO into two parts: * * 1: Read the first part from the single COW op * 2: Read the second part from the next COW op. * * Ex: Let's say we have a 1024 Bytes IO request. * * 0 COW OP-1 4096 COW OP-2 8192 * |******************|*******************| * |*****|*****| * 3584 4608 * <- 1024B - > * * We have two COW operations which are 4k blocks. * The IO is requested for 1024 Bytes which are spanned * between two COW operations. We will split this IO * into two parts: * * 1: IO of size 512B from offset 3584 bytes (COW OP-1) * 2: IO of size 512B from offset 4096 bytes (COW OP-2) */ return ReadUnalignedSector(sector, size, it); } int num_ops = DIV_ROUND_UP(size, BLOCK_SIZE); while (num_ops) { if (!ProcessCowOp(it->second)) { return -1; } num_ops -= 1; it++; // Update the buffer offset bufsink_.UpdateBufferOffset(BLOCK_SIZE); SNAP_LOG(DEBUG) << "ReadData at sector: " << sector << " size: " << size; } } // Reset the buffer offset // Reset the buffer offset bufsink_.ResetBufferOffset(); bufsink_.ResetBufferOffset(); return ret; return size; } } /* /* Loading Loading @@ -261,9 +299,7 @@ bool Snapuserd::ReadDiskExceptions(chunk_t chunk, size_t read_size) { if (divresult.quot < vec_.size()) { if (divresult.quot < vec_.size()) { size = exceptions_per_area_ * sizeof(struct disk_exception); size = exceptions_per_area_ * sizeof(struct disk_exception); if (read_size > size) { CHECK(read_size == size); return false; } void* buffer = bufsink_.GetPayloadBuffer(size); void* buffer = bufsink_.GetPayloadBuffer(size); CHECK(buffer != nullptr); CHECK(buffer != nullptr); Loading Loading @@ -329,7 +365,7 @@ int Snapuserd::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, if (cow_de->new_chunk != 0) { if (cow_de->new_chunk != 0) { merged_ops_cur_iter += 1; merged_ops_cur_iter += 1; offset += sizeof(struct disk_exception); offset += sizeof(struct disk_exception); const CowOperation* cow_op = chunk_map_[cow_de->new_chunk]; const CowOperation* cow_op = chunk_map_[ChunkToSector(cow_de->new_chunk)]; CHECK(cow_op != nullptr); CHECK(cow_op != nullptr); CHECK(cow_op->new_block == cow_de->old_chunk); CHECK(cow_op->new_block == cow_de->old_chunk); if (cow_op->type == kCowCopyOp) { if (cow_op->type == kCowCopyOp) { Loading Loading @@ -563,7 +599,7 @@ bool Snapuserd::ReadMetadata() { SNAP_LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk; SNAP_LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk; // Store operation pointer. // Store operation pointer. chunk_map_[data_chunk_id] = cow_op; chunk_map_[ChunkToSector(data_chunk_id)] = cow_op; num_ops += 1; num_ops += 1; offset += sizeof(struct disk_exception); offset += sizeof(struct disk_exception); cowop_riter_->Next(); cowop_riter_->Next(); Loading Loading @@ -680,29 +716,77 @@ bool Snapuserd::InitBackingAndControlDevice() { return true; return true; } } bool Snapuserd::Run() { bool Snapuserd::DmuserWriteRequest() { struct dm_user_header* header = bufsink_.GetHeaderPtr(); struct dm_user_header* header = bufsink_.GetHeaderPtr(); bufsink_.Clear(); // device mapper has the capability to allow // targets to flush the cache when writes are completed. This // is controlled by each target by a flag "flush_supported". // This flag is set by dm-user. When flush is supported, // a number of zero-length bio's will be submitted to // the target for the purpose of flushing cache. It is the // responsibility of the target driver - which is dm-user in this // case, to remap these bio's to the underlying device. Since, // there is no underlying device for dm-user, this zero length // bio's gets routed to daemon. // // Flush operations are generated post merge by dm-snap by having // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything // to flush per se; hence, just respond back with a success message. if (header->sector == 0) { CHECK(header->len == 0); header->type = DM_USER_RESP_SUCCESS; if (!WriteDmUserPayload(0)) { return false; } return true; } if (!ReadDmUserHeader()) { size_t remaining_size = header->len; SNAP_LOG(ERROR) << "ReadDmUserHeader failed"; size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); CHECK(read_size == BLOCK_SIZE); CHECK(header->sector > 0); chunk_t chunk = SectorToChunk(header->sector); CHECK(chunk_map_.find(header->sector) == chunk_map_.end()); void* buffer = bufsink_.GetPayloadBuffer(read_size); CHECK(buffer != nullptr); header->type = DM_USER_RESP_SUCCESS; if (!ReadDmUserPayload(buffer, read_size)) { SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk << "Sector: " << header->sector; header->type = DM_USER_RESP_ERROR; } if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) { SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk << "Sector: " << header->sector; header->type = DM_USER_RESP_ERROR; } else { SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk << "Sector: " << header->sector; } if (!WriteDmUserPayload(0)) { return false; return false; } } SNAP_LOG(DEBUG) << "msg->seq: " << std::hex << header->seq; return true; SNAP_LOG(DEBUG) << "msg->type: " << std::hex << header->type; } SNAP_LOG(DEBUG) << "msg->flags: " << std::hex << header->flags; SNAP_LOG(DEBUG) << "msg->sector: " << std::hex << header->sector; SNAP_LOG(DEBUG) << "msg->len: " << std::hex << header->len; switch (header->type) { bool Snapuserd::DmuserReadRequest() { case DM_USER_REQ_MAP_READ: { struct dm_user_header* header = bufsink_.GetHeaderPtr(); size_t remaining_size = header->len; size_t remaining_size = header->len; loff_t offset = 0; loff_t offset = 0; sector_t sector = header->sector; do { do { size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); int ret = read_size; header->type = DM_USER_RESP_SUCCESS; header->type = DM_USER_RESP_SUCCESS; chunk_t chunk = SectorToChunk(header->sector); // Request to sector 0 is always for kernel // Request to sector 0 is always for kernel // representation of COW header. This IO should be only // representation of COW header. This IO should be only Loading @@ -715,14 +799,8 @@ bool Snapuserd::Run() { ConstructKernelCowHeader(); ConstructKernelCowHeader(); SNAP_LOG(DEBUG) << "Kernel header constructed"; SNAP_LOG(DEBUG) << "Kernel header constructed"; } else { } else { // Convert the sector number to a chunk ID. if (!offset && (read_size == BLOCK_SIZE) && // chunk_map_.find(header->sector) == chunk_map_.end()) { // Check if the chunk ID represents a metadata // page. If the chunk ID is not found in the // vector, then it points to a metadata page. chunk_t chunk = SectorToChunk(header->sector); if (chunk_map_.find(chunk) == chunk_map_.end()) { if (!ReadDiskExceptions(chunk, read_size)) { if (!ReadDiskExceptions(chunk, read_size)) { SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk << "Sector: " << header->sector; << "Sector: " << header->sector; Loading @@ -732,10 +810,9 @@ bool Snapuserd::Run() { << "Sector: " << header->sector; << "Sector: " << header->sector; } } } else { } else { SNAP_LOG(DEBUG) << "ReadData: chunk: " << chunk << " len: " << header->len chunk_t num_sectors_read = (offset >> SECTOR_SHIFT); << " read_size: " << read_size << " offset: " << offset; ret = ReadData(sector + num_sectors_read, read_size); chunk_t num_chunks_read = (offset >> BLOCK_SHIFT); if (ret < 0) { if (!ReadData(chunk + num_chunks_read, read_size)) { SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk << "Sector: " << header->sector; << "Sector: " << header->sector; header->type = DM_USER_RESP_ERROR; header->type = DM_USER_RESP_ERROR; Loading @@ -748,72 +825,45 @@ bool Snapuserd::Run() { // Daemon will not be terminated if there is any error. We will // Daemon will not be terminated if there is any error. We will // just send the error back to dm-user. // just send the error back to dm-user. if (!WriteDmUserPayload(read_size)) { if (!WriteDmUserPayload(ret)) { return false; return false; } } remaining_size -= read_size; remaining_size -= ret; offset += read_size; offset += ret; } while (remaining_size); } while (remaining_size > 0); break; return true; } } case DM_USER_REQ_MAP_WRITE: { bool Snapuserd::Run() { // device mapper has the capability to allow struct dm_user_header* header = bufsink_.GetHeaderPtr(); // targets to flush the cache when writes are completed. This // is controlled by each target by a flag "flush_supported". // This flag is set by dm-user. When flush is supported, // a number of zero-length bio's will be submitted to // the target for the purpose of flushing cache. It is the // responsibility of the target driver - which is dm-user in this // case, to remap these bio's to the underlying device. Since, // there is no underlying device for dm-user, this zero length // bio's gets routed to daemon. // // Flush operations are generated post merge by dm-snap by having // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything // to flush per se; hence, just respond back with a success message. if (header->sector == 0) { CHECK(header->len == 0); header->type = DM_USER_RESP_SUCCESS; if (!WriteDmUserPayload(0)) { return false; } break; } size_t remaining_size = header->len; bufsink_.Clear(); size_t read_size = std::min(PAYLOAD_SIZE, remaining_size); CHECK(read_size == BLOCK_SIZE); CHECK(header->sector > 0); if (!ReadDmUserHeader()) { chunk_t chunk = SectorToChunk(header->sector); SNAP_LOG(ERROR) << "ReadDmUserHeader failed"; CHECK(chunk_map_.find(chunk) == chunk_map_.end()); return false; } void* buffer = bufsink_.GetPayloadBuffer(read_size); SNAP_LOG(DEBUG) << "msg->seq: " << std::hex << header->seq; CHECK(buffer != nullptr); SNAP_LOG(DEBUG) << "msg->type: " << std::hex << header->type; header->type = DM_USER_RESP_SUCCESS; SNAP_LOG(DEBUG) << "msg->flags: " << std::hex << header->flags; SNAP_LOG(DEBUG) << "msg->sector: " << std::hex << header->sector; SNAP_LOG(DEBUG) << "msg->len: " << std::hex << header->len; if (!ReadDmUserPayload(buffer, read_size)) { switch (header->type) { SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk case DM_USER_REQ_MAP_READ: { << "Sector: " << header->sector; if (!DmuserReadRequest()) { header->type = DM_USER_RESP_ERROR; return false; } } break; if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) { SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk << "Sector: " << header->sector; header->type = DM_USER_RESP_ERROR; } else { SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk << "Sector: " << header->sector; } } if (!WriteDmUserPayload(0)) { case DM_USER_REQ_MAP_WRITE: { if (!DmuserWriteRequest()) { return false; return false; } } break; break; } } } } Loading
fs_mgr/libsnapshot/snapuserd.h +17 −4 Original line number Original line Diff line number Diff line Loading @@ -22,9 +22,9 @@ #include <cstring> #include <cstring> #include <iostream> #include <iostream> #include <limits> #include <limits> #include <map> #include <string> #include <string> #include <thread> #include <thread> #include <unordered_map> #include <vector> #include <vector> #include <android-base/file.h> #include <android-base/file.h> Loading Loading @@ -72,6 +72,9 @@ class Snapuserd final { bool IsAttached() const { return ctrl_fd_ >= 0; } bool IsAttached() const { return ctrl_fd_ >= 0; } private: private: bool DmuserReadRequest(); bool DmuserWriteRequest(); bool ReadDmUserHeader(); bool ReadDmUserHeader(); bool ReadDmUserPayload(void* buffer, size_t size); bool ReadDmUserPayload(void* buffer, size_t size); bool WriteDmUserPayload(size_t size); bool WriteDmUserPayload(size_t size); Loading @@ -79,10 +82,13 @@ class Snapuserd final { bool ReadMetadata(); bool ReadMetadata(); bool ZerofillDiskExceptions(size_t read_size); bool ZerofillDiskExceptions(size_t read_size); bool ReadDiskExceptions(chunk_t chunk, size_t size); bool ReadDiskExceptions(chunk_t chunk, size_t size); bool ReadData(chunk_t chunk, size_t size); int ReadUnalignedSector(sector_t sector, size_t size, std::map<sector_t, const CowOperation*>::iterator& it); int ReadData(sector_t sector, size_t size); bool IsChunkIdMetadata(chunk_t chunk); bool IsChunkIdMetadata(chunk_t chunk); chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); bool ProcessCowOp(const CowOperation* cow_op); bool ProcessReplaceOp(const CowOperation* cow_op); bool ProcessReplaceOp(const CowOperation* cow_op); bool ProcessCopyOp(const CowOperation* cow_op); bool ProcessCopyOp(const CowOperation* cow_op); bool ProcessZeroOp(); bool ProcessZeroOp(); Loading @@ -94,6 +100,7 @@ class Snapuserd final { bool ProcessMergeComplete(chunk_t chunk, void* buffer); bool ProcessMergeComplete(chunk_t chunk, void* buffer); sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SIZE - 1)) == 0); } std::string cow_device_; std::string cow_device_; std::string backing_store_device_; std::string backing_store_device_; Loading @@ -116,9 +123,15 @@ class Snapuserd final { // mapping of old-chunk to new-chunk // mapping of old-chunk to new-chunk std::vector<std::unique_ptr<uint8_t[]>> vec_; std::vector<std::unique_ptr<uint8_t[]>> vec_; // Key - Chunk ID // Key - Sector // Value - cow operation // Value - cow operation std::unordered_map<chunk_t, const CowOperation*> chunk_map_; // // chunk_map stores the pseudo mapping of sector // to COW operations. Each COW op is 4k; however, // we can get a read request which are as small // as 512 bytes. Hence, we need to binary search // in the chunk_map to find the nearest COW op. std::map<sector_t, const CowOperation*> chunk_map_; bool metadata_read_done_ = false; bool metadata_read_done_ = false; BufferSink bufsink_; BufferSink bufsink_; Loading