Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d0819569 authored by Akilesh Kailash's avatar Akilesh Kailash
Browse files

libsnapshot:snapuserd: Read the cow_operation in reverse order



This is required for merge IO path. During merge process,
kernel scans the metadata page from backwards when merge
is initiated. Since, merge ordering should follow our internal
COW format, read the COW operations from backwards and populate the metadata.

Additionally, Kernel can merge successive operations if the two chunk IDs
are contiguous. This can be problematic when there is a crash
during merge; specifically when the merge operation has dependency.
These dependencies can only happen during copy operations of the
overlapping regions. To avoid this problem, we make sure that
copy operations are merged individually.

Test: cow_snapuserd_test
Bug: 168311203

Signed-off-by: default avatarAkilesh Kailash <akailash@google.com>
Change-Id: Ic9c52b323ce58af897cfa343bf9277c8f1f022e3
parent d021544d
Loading
Loading
Loading
Loading
+36 −0
Original line number Diff line number Diff line
@@ -246,10 +246,46 @@ const CowOperation& CowOpIter::Get() {
    return (*op_iter_);
}

class CowOpReverseIter final : public ICowOpReverseIter {
  public:
    explicit CowOpReverseIter(std::shared_ptr<std::vector<CowOperation>> ops);

    bool Done() override;
    const CowOperation& Get() override;
    void Next() override;

  private:
    std::shared_ptr<std::vector<CowOperation>> ops_;
    std::vector<CowOperation>::reverse_iterator op_riter_;
};

CowOpReverseIter::CowOpReverseIter(std::shared_ptr<std::vector<CowOperation>> ops) {
    ops_ = ops;
    op_riter_ = ops_.get()->rbegin();
}

bool CowOpReverseIter::Done() {
    return op_riter_ == ops_.get()->rend();
}

void CowOpReverseIter::Next() {
    CHECK(!Done());
    op_riter_++;
}

const CowOperation& CowOpReverseIter::Get() {
    CHECK(!Done());
    return (*op_riter_);
}

std::unique_ptr<ICowOpIter> CowReader::GetOpIter() {
    return std::make_unique<CowOpIter>(ops_);
}

std::unique_ptr<ICowOpReverseIter> CowReader::GetRevOpIter() {
    return std::make_unique<CowOpReverseIter>(ops_);
}

bool CowReader::GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read) {
    // Validate the offset, taking care to acknowledge possible overflow of offset+len.
    if (offset < sizeof(header_) || offset >= fd_size_ - sizeof(footer_) || len >= fd_size_ ||
+23 −1
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ namespace android {
namespace snapshot {

class ICowOpIter;
class ICowOpReverseIter;

// A ByteSink object handles requests for a buffer of a specific size. It
// always owns the underlying buffer. It's designed to minimize potential
@@ -73,6 +74,9 @@ class ICowReader {
    // Return an iterator for retrieving CowOperation entries.
    virtual std::unique_ptr<ICowOpIter> GetOpIter() = 0;

    // Return an reverse iterator for retrieving CowOperation entries.
    virtual std::unique_ptr<ICowOpReverseIter> GetRevOpIter() = 0;

    // Get decoded bytes from the data section, handling any decompression.
    // All retrieved data is passed to the sink.
    virtual bool ReadData(const CowOperation& op, IByteSink* sink) = 0;
@@ -93,6 +97,21 @@ class ICowOpIter {
    virtual void Next() = 0;
};

// Reverse Iterate over a sequence of COW operations.
class ICowOpReverseIter {
  public:
    virtual ~ICowOpReverseIter() {}

    // True if there are more items to read, false otherwise.
    virtual bool Done() = 0;

    // Read the current operation.
    virtual const CowOperation& Get() = 0;

    // Advance to the next item.
    virtual void Next() = 0;
};

class CowReader : public ICowReader {
  public:
    CowReader();
@@ -107,8 +126,11 @@ class CowReader : public ICowReader {

    // Create a CowOpIter object which contains footer_.num_ops
    // CowOperation objects. Get() returns a unique CowOperation object
    // whose lifetime depends on the CowOpIter object
    // whose lifetime depends on the CowOpIter object; the return
    // value of these will never be null.
    std::unique_ptr<ICowOpIter> GetOpIter() override;
    std::unique_ptr<ICowOpReverseIter> GetRevOpIter() override;

    bool ReadData(const CowOperation& op, IByteSink* sink) override;

    bool GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read);
+9 −5
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
#include <limits>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>

#include <android-base/file.h>
@@ -69,6 +70,9 @@ class Snapuserd final {

    bool Init();
    int Run();
    const std::string& GetControlDevicePath() { return control_device_; }

  private:
    int ReadDmUserHeader();
    int WriteDmUserPayload(size_t size);
    int ConstructKernelCowHeader();
@@ -76,10 +80,9 @@ class Snapuserd final {
    int ZerofillDiskExceptions(size_t read_size);
    int ReadDiskExceptions(chunk_t chunk, size_t size);
    int ReadData(chunk_t chunk, size_t size);
    bool IsChunkIdMetadata(chunk_t chunk);
    chunk_t GetNextAllocatableChunkId(chunk_t chunk);

    const std::string& GetControlDevicePath() { return control_device_; }

  private:
    int ProcessReplaceOp(const CowOperation* cow_op);
    int ProcessCopyOp(const CowOperation* cow_op);
    int ProcessZeroOp();
@@ -95,15 +98,16 @@ class Snapuserd final {
    uint32_t exceptions_per_area_;

    std::unique_ptr<ICowOpIter> cowop_iter_;
    std::unique_ptr<ICowOpReverseIter> cowop_riter_;
    std::unique_ptr<CowReader> reader_;

    // Vector of disk exception which is a
    // mapping of old-chunk to new-chunk
    std::vector<std::unique_ptr<uint8_t[]>> vec_;

    // Index - Chunk ID
    // Key - Chunk ID
    // Value - cow operation
    std::vector<const CowOperation*> chunk_vec_;
    std::unordered_map<chunk_t, const CowOperation*> chunk_map_;

    bool metadata_read_done_;
    BufferSink bufsink_;
+65 −58
Original line number Diff line number Diff line
@@ -159,7 +159,7 @@ int Snapuserd::ReadData(chunk_t chunk, size_t size) {
    CHECK((read_size & (BLOCK_SIZE - 1)) == 0);

    while (read_size > 0) {
        const CowOperation* cow_op = chunk_vec_[chunk_key];
        const CowOperation* cow_op = chunk_map_[chunk_key];
        CHECK(cow_op != nullptr);
        int result;

@@ -202,6 +202,8 @@ int Snapuserd::ReadData(chunk_t chunk, size_t size) {
        // are contiguous
        chunk_key += 1;

        if (cow_op->type == kCowCopyOp) CHECK(read_size == 0);

        // This is similar to the way when chunk IDs were assigned
        // in ReadMetadata().
        //
@@ -287,6 +289,24 @@ int Snapuserd::ReadDiskExceptions(chunk_t chunk, size_t read_size) {
    return size;
}

bool Snapuserd::IsChunkIdMetadata(chunk_t chunk) {
    uint32_t stride = exceptions_per_area_ + 1;
    lldiv_t divresult = lldiv(chunk, stride);

    return divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS;
}

// Find the next free chunk-id to be assigned. Check if the next free
// chunk-id represents a metadata page. If so, skip it.
chunk_t Snapuserd::GetNextAllocatableChunkId(chunk_t chunk) {
    chunk_t next_chunk = chunk + 1;

    if (IsChunkIdMetadata(next_chunk)) {
        next_chunk += 1;
    }
    return next_chunk;
}

/*
 * Read the metadata from COW device and
 * construct the metadata as required by the kernel.
@@ -304,12 +324,26 @@ int Snapuserd::ReadDiskExceptions(chunk_t chunk, size_t read_size) {
 *    This represents the old_chunk in the kernel COW format
 * 4: We need to assign new_chunk for a corresponding old_chunk
 * 5: The algorithm is similar to how kernel assigns chunk number
 *    while creating exceptions.
 *    while creating exceptions. However, there are few cases
 *    which needs to be addressed here:
 *      a: During merge process, kernel scans the metadata page
 *      from backwards when merge is initiated. Since, we need
 *      to make sure that the merge ordering follows our COW format,
 *      we read the COW operation from backwards and populate the
 *      metadata so that when kernel starts the merging from backwards,
 *      those ops correspond to the beginning of our COW format.
 *      b: Kernel can merge successive operations if the two chunk IDs
 *      are contiguous. This can be problematic when there is a crash
 *      during merge; specifically when the merge operation has dependency.
 *      These dependencies can only happen during copy operations.
 *
 *      To avoid this problem, we make sure that no two copy-operations
 *      do not have contiguous chunk IDs. Additionally, we make sure
 *      that each copy operation is merged individually.
 * 6: Use a monotonically increasing chunk number to assign the
 *    new_chunk
 * 7: Each chunk-id represents either a: Metadata page or b: Data page
 * 8: Chunk-id representing a data page is stored in a vector. Index is the
 *    chunk-id and value is the pointer to the CowOperation
 * 8: Chunk-id representing a data page is stored in a map.
 * 9: Chunk-id representing a metadata page is converted into a vector
 *    index. We store this in vector as kernel requests metadata during
 *    two stage:
@@ -327,7 +361,10 @@ int Snapuserd::ReadDiskExceptions(chunk_t chunk, size_t read_size) {
int Snapuserd::ReadMetadata() {
    reader_ = std::make_unique<CowReader>();
    CowHeader header;
    CowFooter footer;
    CowOptions options;
    bool prev_copy_op = false;

    LOG(DEBUG) << "ReadMetadata Start...";

    if (!reader_->Parse(cow_fd_)) {
        LOG(ERROR) << "Failed to parse";
@@ -339,48 +376,33 @@ int Snapuserd::ReadMetadata() {
        return 1;
    }

    if (!reader_->GetFooter(&footer)) {
        LOG(ERROR) << "Failed to get footer";
        return 1;
    }

    CHECK(header.block_size == BLOCK_SIZE);

    LOG(DEBUG) << "Num-ops: " << std::hex << footer.op.num_ops;
    LOG(DEBUG) << "ops-size: " << std::hex << footer.op.ops_size;

    cowop_iter_ = reader_->GetOpIter();

    if (cowop_iter_ == nullptr) {
        LOG(ERROR) << "Failed to get cowop_iter";
        return 1;
    }
    cowop_riter_ = reader_->GetRevOpIter();

    exceptions_per_area_ = (CHUNK_SIZE << SECTOR_SHIFT) / sizeof(struct disk_exception);

    // Start from chunk number 2. Chunk 0 represents header and chunk 1
    // represents first metadata page.
    chunk_t next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1;
    chunk_vec_.push_back(nullptr);
    chunk_vec_.push_back(nullptr);

    loff_t offset = 0;
    std::unique_ptr<uint8_t[]> de_ptr =
            std::make_unique<uint8_t[]>(exceptions_per_area_ * sizeof(struct disk_exception));

    // This memset is important. Kernel will stop issuing IO when new-chunk ID
    // is 0. When Area is not filled completely will all 256 exceptions,
    // is 0. When Area is not filled completely with all 256 exceptions,
    // this memset will ensure that metadata read is completed.
    memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
    size_t num_ops = 0;

    while (!cowop_iter_->Done()) {
        const CowOperation* cow_op = &cowop_iter_->Get();
    while (!cowop_riter_->Done()) {
        const CowOperation* cow_op = &cowop_riter_->Get();
        struct disk_exception* de =
                reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);

        if (cow_op->type == kCowFooterOp || cow_op->type == kCowLabelOp) {
            cowop_iter_->Next();
            cowop_riter_->Next();
            continue;
        }

@@ -390,61 +412,52 @@ int Snapuserd::ReadMetadata() {
            return 1;
        }

        if ((cow_op->type == kCowCopyOp || prev_copy_op)) {
            next_free = GetNextAllocatableChunkId(next_free);
        }

        prev_copy_op = (cow_op->type == kCowCopyOp);

        // Construct the disk-exception
        de->old_chunk = cow_op->new_block;
        de->new_chunk = next_free;

        LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk;

        // Store operation pointer. Note, new-chunk ID is the index
        chunk_vec_.push_back(cow_op);
        CHECK(next_free == (chunk_vec_.size() - 1));
        // Store operation pointer.
        chunk_map_[next_free] = cow_op;
        num_ops += 1;

        offset += sizeof(struct disk_exception);

        cowop_iter_->Next();
        cowop_riter_->Next();

        // Find the next free chunk-id to be assigned. Check if the next free
        // chunk-id represents a metadata page. If so, skip it.
        next_free += 1;
        uint32_t stride = exceptions_per_area_ + 1;
        lldiv_t divresult = lldiv(next_free, stride);
        num_ops += 1;

        if (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS) {
            CHECK(num_ops == exceptions_per_area_);
        if (num_ops == exceptions_per_area_) {
            // Store it in vector at the right index. This maps the chunk-id to
            // vector index.
            vec_.push_back(std::move(de_ptr));
            offset = 0;
            num_ops = 0;

            chunk_t metadata_chunk = (next_free - exceptions_per_area_ - NUM_SNAPSHOT_HDR_CHUNKS);

            LOG(DEBUG) << "Area: " << vec_.size() - 1;
            LOG(DEBUG) << "Metadata-chunk: " << metadata_chunk;
            LOG(DEBUG) << "Sector number of Metadata-chunk: " << (metadata_chunk << CHUNK_SHIFT);

            // Create buffer for next area
            de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
                                                 sizeof(struct disk_exception));
            memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));

            // Since this is a metadata, store at this index
            chunk_vec_.push_back(nullptr);

            // Find the next free chunk-id
            next_free += 1;
            if (cowop_iter_->Done()) {
            if (cowop_riter_->Done()) {
                vec_.push_back(std::move(de_ptr));
                LOG(DEBUG) << "ReadMetadata() completed; Number of Areas: " << vec_.size();
            }
        }

        next_free = GetNextAllocatableChunkId(next_free);
    }

    // Partially filled area
    if (num_ops) {
        LOG(DEBUG) << "Partially filled area num_ops: " << num_ops;
        vec_.push_back(std::move(de_ptr));
        LOG(DEBUG) << "ReadMetadata() completed. Partially filled area num_ops: " << num_ops
                   << "Areas : " << vec_.size();
    }

    return 0;
@@ -569,13 +582,7 @@ int Snapuserd::Run() {
                    // vector, then it points to a metadata page.
                    chunk_t chunk = (header->sector >> CHUNK_SHIFT);

                    if (chunk >= chunk_vec_.size()) {
                        ret = ZerofillDiskExceptions(read_size);
                        if (ret < 0) {
                            LOG(ERROR) << "ZerofillDiskExceptions failed";
                            return ret;
                        }
                    } else if (chunk_vec_[chunk] == nullptr) {
                    if (chunk_map_.find(chunk) == chunk_map_.end()) {
                        ret = ReadDiskExceptions(chunk, read_size);
                        if (ret < 0) {
                            LOG(ERROR) << "ReadDiskExceptions failed";