Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 95708eea authored by Tao Bao's avatar Tao Bao Committed by Gerrit Code Review
Browse files

Merge "Split large files for BBOTA v3."

parents 36d91b53 9a5caf2c
Loading
Loading
Loading
Loading
+68 −17
Original line number Diff line number Diff line
@@ -297,7 +297,6 @@ class BlockImageDiff(object):
    out = []

    total = 0
    performs_read = False

    stashes = {}
    stashed_blocks = 0
@@ -415,7 +414,6 @@ class BlockImageDiff(object):
        out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
        total += tgt_size
      elif xf.style == "move":
        performs_read = True
        assert xf.tgt_ranges
        assert xf.src_ranges.size() == tgt_size
        if xf.src_ranges != xf.tgt_ranges:
@@ -440,7 +438,6 @@ class BlockImageDiff(object):
                xf.tgt_ranges.to_string_raw(), src_str))
          total += tgt_size
      elif xf.style in ("bsdiff", "imgdiff"):
        performs_read = True
        assert xf.tgt_ranges
        assert xf.src_ranges
        if self.version == 1:
@@ -551,6 +548,7 @@ class BlockImageDiff(object):
    max_allowed = cache_size * stash_threshold / self.tgt.blocksize

    stashed_blocks = 0
    new_blocks = 0

    # Now go through all the commands. Compute the required stash size on the
    # fly. If a command requires excess stash than available, it deletes the
@@ -566,8 +564,7 @@ class BlockImageDiff(object):
          # that will use this stash and replace the command with "new".
          use_cmd = stashes[idx][2]
          replaced_cmds.append(use_cmd)
          print("  %s replaced due to an explicit stash of %d blocks." % (
              use_cmd, sr.size()))
          print("%10d  %9s  %s" % (sr.size(), "explicit", use_cmd))
        else:
          stashed_blocks += sr.size()

@@ -582,8 +579,7 @@ class BlockImageDiff(object):
        if xf.src_ranges.overlaps(xf.tgt_ranges):
          if stashed_blocks + xf.src_ranges.size() > max_allowed:
            replaced_cmds.append(xf)
            print("  %s replaced due to an implicit stash of %d blocks." % (
                xf, xf.src_ranges.size()))
            print("%10d  %9s  %s" % (xf.src_ranges.size(), "implicit", xf))

      # Replace the commands in replaced_cmds with "new"s.
      for cmd in replaced_cmds:
@@ -593,9 +589,13 @@ class BlockImageDiff(object):
          def_cmd = stashes[idx][1]
          assert (idx, sr) in def_cmd.stash_before
          def_cmd.stash_before.remove((idx, sr))
          new_blocks += sr.size()

        cmd.ConvertToNew()

    print("  Total %d blocks are packed as new blocks due to insufficient "
          "cache size." % (new_blocks,))

  def ComputePatches(self, prefix):
    print("Reticulating splines...")
    diff_q = []
@@ -951,6 +951,57 @@ class BlockImageDiff(object):
          a.goes_after[b] = size

  def FindTransfers(self):
    """Parse the file_map to generate all the transfers."""

    def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
                    split=False):
      """Wrapper function for adding a Transfer().

      For BBOTA v3, we need to stash source blocks for resumable feature.
      However, with the growth of file size and the shrink of the cache
      partition source blocks are too large to be stashed. If a file occupies
      too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
      into smaller pieces by getting multiple Transfer()s.

      The downside is that after splitting, we can no longer use imgdiff but
      only bsdiff."""

      MAX_BLOCKS_PER_DIFF_TRANSFER = 1024

      # We care about diff transfers only.
      if style != "diff" or not split:
        Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
        return

      # Change nothing for small files.
      if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
          src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
        Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
        return

      pieces = 0
      while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
             src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
        tgt_split_name = "%s-%d" % (tgt_name, pieces)
        src_split_name = "%s-%d" % (src_name, pieces)
        tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
        src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
        Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
                 by_id)

        tgt_ranges = tgt_ranges.subtract(tgt_first)
        src_ranges = src_ranges.subtract(src_first)
        pieces += 1

      # Handle remaining blocks.
      if tgt_ranges.size() or src_ranges.size():
        # Must be both non-empty.
        assert tgt_ranges.size() and src_ranges.size()
        tgt_split_name = "%s-%d" % (tgt_name, pieces)
        src_split_name = "%s-%d" % (src_name, pieces)
        Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
                 by_id)

    empty = RangeSet()
    for tgt_fn, tgt_ranges in self.tgt.file_map.items():
      if tgt_fn == "__ZERO":
@@ -958,28 +1009,28 @@ class BlockImageDiff(object):
        # in any file and that are filled with zeros.  We have a
        # special transfer style for zero blocks.
        src_ranges = self.src.file_map.get("__ZERO", empty)
        Transfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
        AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
                    "zero", self.transfers)
        continue

      elif tgt_fn == "__COPY":
        # "__COPY" domain includes all the blocks not contained in any
        # file and that need to be copied unconditionally to the target.
        Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
        AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
        continue

      elif tgt_fn in self.src.file_map:
        # Look for an exact pathname match in the source.
        Transfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
                 "diff", self.transfers)
        AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
                    "diff", self.transfers, self.version >= 3)
        continue

      b = os.path.basename(tgt_fn)
      if b in self.src_basenames:
        # Look for an exact basename match in the source.
        src_fn = self.src_basenames[b]
        Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
                 "diff", self.transfers)
        AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
                    "diff", self.transfers, self.version >= 3)
        continue

      b = re.sub("[0-9]+", "#", b)
@@ -989,11 +1040,11 @@ class BlockImageDiff(object):
        # for .so files that contain version numbers in the filename
        # that get bumped.)
        src_fn = self.src_numpatterns[b]
        Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
                 "diff", self.transfers)
        AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
                    "diff", self.transfers, self.version >= 3)
        continue

      Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
      AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

  def AbbreviateSourceNames(self):
    for k in self.src.file_map.keys():
+33 −0
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ class RangeSet(object):
  lots of runs."""

  def __init__(self, data=None):
    # TODO(tbao): monotonic is broken when passing in a tuple.
    self.monotonic = False
    if isinstance(data, str):
      self._parse_internal(data)
@@ -260,6 +261,38 @@ class RangeSet(object):
      out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
    return out

  def first(self, n):
    """Return the RangeSet that contains at most the first 'n' integers.

    >>> RangeSet("0-9").first(1)
    <RangeSet("0")>
    >>> RangeSet("10-19").first(5)
    <RangeSet("10-14")>
    >>> RangeSet("10-19").first(15)
    <RangeSet("10-19")>
    >>> RangeSet("10-19 30-39").first(3)
    <RangeSet("10-12")>
    >>> RangeSet("10-19 30-39").first(15)
    <RangeSet("10-19 30-34")>
    >>> RangeSet("10-19 30-39").first(30)
    <RangeSet("10-19 30-39")>
    >>> RangeSet("0-9").first(0)
    <RangeSet("")>
    """

    if self.size() <= n:
      return self

    out = []
    for s, e in self:
      if e - s >= n:
        out += (s, s+n)
        break
      else:
        out += (s, e)
        n -= e - s
    return RangeSet(data=out)


if __name__ == "__main__":
  import doctest