Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b9e7fc7f authored by Tianjie Xu's avatar Tianjie Xu
Browse files

Add implemention of SortedRangeSet

This is useful in imgdiff to maintain the block ranges of
splitted source image.

Bug: 34220646
Test: mma && unit tests pass
Change-Id: I6427f2ea50f0e3b0aa3dd01880ec0206679b7429
parent 231c627a
Loading
Loading
Loading
Loading
+47 −0
Original line number Diff line number Diff line
@@ -110,3 +110,50 @@ TEST(RangeSetTest, iterators) {
  }
  ASSERT_EQ((std::vector<Range>{ Range{ 8, 10 }, Range{ 1, 5 } }), ranges);
}

TEST(RangeSetTest, tostring) {
  ASSERT_EQ("2,1,6", RangeSet::Parse("2,1,6").ToString());
  ASSERT_EQ("4,1,5,8,10", RangeSet::Parse("4,1,5,8,10").ToString());
  ASSERT_EQ("6,1,3,4,6,15,22", RangeSet::Parse("6,1,3,4,6,15,22").ToString());
}

TEST(SortedRangeSetTest, insertion) {
  SortedRangeSet rs({ { 2, 3 }, { 4, 6 }, { 8, 14 } });
  rs.Insert({ 1, 2 });
  ASSERT_EQ(SortedRangeSet({ { 1, 3 }, { 4, 6 }, { 8, 14 } }), rs);
  ASSERT_EQ(static_cast<size_t>(10), rs.blocks());
  rs.Insert({ 3, 5 });
  ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 } }), rs);
  ASSERT_EQ(static_cast<size_t>(11), rs.blocks());

  SortedRangeSet r1({ { 20, 22 }, { 15, 18 } });
  rs.Insert(r1);
  ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 }, { 15, 18 }, { 20, 22 } }), rs);
  ASSERT_EQ(static_cast<size_t>(16), rs.blocks());

  SortedRangeSet r2({ { 2, 7 }, { 15, 21 }, { 20, 25 } });
  rs.Insert(r2);
  ASSERT_EQ(SortedRangeSet({ { 1, 7 }, { 8, 14 }, { 15, 25 } }), rs);
  ASSERT_EQ(static_cast<size_t>(22), rs.blocks());
}

TEST(SortedRangeSetTest, file_range) {
  SortedRangeSet rs;
  rs.Insert(4096, 4096);
  ASSERT_EQ(SortedRangeSet({ { 1, 2 } }), rs);
  // insert block 2-9
  rs.Insert(4096 * 3 - 1, 4096 * 7);
  ASSERT_EQ(SortedRangeSet({ { 1, 10 } }), rs);
  // insert block 15-19
  rs.Insert(4096 * 15 + 1, 4096 * 4);
  ASSERT_EQ(SortedRangeSet({ { 1, 10 }, { 15, 20 } }), rs);

  // rs overlaps block 2-2
  ASSERT_TRUE(rs.Overlaps(4096 * 2 - 1, 10));
  ASSERT_FALSE(rs.Overlaps(4096 * 10, 4096 * 5));

  ASSERT_EQ(static_cast<size_t>(10), rs.GetOffsetInRangeSet(4106));
  ASSERT_EQ(static_cast<size_t>(40970), rs.GetOffsetInRangeSet(4096 * 16 + 10));
  // block#10 not in range.
  ASSERT_EXIT(rs.GetOffsetInRangeSet(40970), ::testing::KilledBySignal(SIGABRT), "");
}
 No newline at end of file
+115 −1
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@

#include <android-base/logging.h>
#include <android-base/parseint.h>
#include <android-base/stringprintf.h>
#include <android-base/strings.h>

using Range = std::pair<size_t, size_t>;
@@ -74,6 +75,18 @@ class RangeSet {
    return RangeSet(std::move(pairs));
  }

  std::string ToString() const {
    if (ranges_.empty()) {
      return "";
    }
    std::string result = std::to_string(ranges_.size() * 2);
    for (const auto& r : ranges_) {
      result += android::base::StringPrintf(",%zu,%zu", r.first, r.second);
    }

    return result;
  }

  // Get the block number for the i-th (starting from 0) block in the RangeSet.
  size_t GetBlockNumber(size_t idx) const {
    CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")";
@@ -157,8 +170,109 @@ class RangeSet {
    return ranges_ != other.ranges_;
  }

 private:
 protected:
  // Actual limit for each value and the total number are both INT_MAX.
  std::vector<Range> ranges_;
  size_t blocks_;
};

static constexpr size_t kBlockSize = 4096;

// The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input
// files when we're handling large zip files. Specifically, we can treat the input file as a
// continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into
// several smaller chunks based on the zip entries.

// For example, [source: 0-99] can be split into
// [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89]
// Here "10-29" simply means block 10th to block 29th with respect to the original input file.
// Also, note that the split sources should be mutual exclusive, but they don't need to cover
// every block in the original source.
class SortedRangeSet : public RangeSet {
 public:
  SortedRangeSet() {}

  // Ranges in the the set should be mutually exclusive; and they're sorted by the start block.
  explicit SortedRangeSet(std::vector<Range>&& pairs) : RangeSet(std::move(pairs)) {
    std::sort(ranges_.begin(), ranges_.end());
  }

  void Insert(const Range& to_insert) {
    SortedRangeSet rs({ to_insert });
    Insert(rs);
  }

  // Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges.
  void Insert(const SortedRangeSet& rs) {
    if (rs.size() == 0) {
      return;
    }
    // Merge and sort the two RangeSets.
    std::vector<Range> temp = std::move(ranges_);
    std::copy(rs.begin(), rs.end(), std::back_inserter(temp));
    std::sort(temp.begin(), temp.end());

    Clear();
    // Trim overlaps and insert the result back to ranges_.
    Range to_insert = temp.front();
    for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) {
      if (it->first <= to_insert.second) {
        to_insert.second = std::max(to_insert.second, it->second);
      } else {
        ranges_.push_back(to_insert);
        blocks_ += (to_insert.second - to_insert.first);
        to_insert = *it;
      }
    }
    ranges_.push_back(to_insert);
    blocks_ += (to_insert.second - to_insert.first);
  }

  void Clear() {
    blocks_ = 0;
    ranges_.clear();
  }

  using RangeSet::Overlaps;
  bool Overlaps(size_t start, size_t len) const {
    RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } });
    return Overlaps(rs);
  }

  // Compute the block range the file occupies, and insert that range.
  void Insert(size_t start, size_t len) {
    Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 };
    Insert(to_insert);
  }

  // Given an offset of the file, checks if the corresponding block (by considering the file as
  // 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset
  // within this SortedRangeSet.
  //
  // For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte.
  // The mapped offset within a SortedRangeSet("1-9 15-19") is 10.
  //
  // An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th
  // item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10
  // + 10) in a range represented by this SortedRangeSet.
  size_t GetOffsetInRangeSet(size_t old_offset) const {
    size_t old_block_start = old_offset / kBlockSize;
    size_t new_block_start = 0;
    for (const auto& range : ranges_) {
      // Find the index of old_block_start.
      if (old_block_start >= range.second) {
        new_block_start += (range.second - range.first);
      } else if (old_block_start >= range.first) {
        new_block_start += (old_block_start - range.first);
        return (new_block_start * kBlockSize + old_offset % kBlockSize);
      } else {
        CHECK(false) <<"block_start " << old_block_start << " is missing between two ranges: "
                     << this->ToString();
        return 0;
      }
    }
    CHECK(false) <<"block_start " << old_block_start << " exceeds the limit of current RangeSet: "
                 << this->ToString();
    return 0;
  }
};
 No newline at end of file