Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fcf2d8b8 authored by Tianjie Xu's avatar Tianjie Xu Committed by Gerrit Code Review
Browse files

Merge "Support parsing of data descriptor"

parents 8890d429 0ec0eaa2
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -77,6 +77,10 @@ struct ZipEntry {
  // footer.
  uint32_t uncompressed_length;

  // If the value of uncompressed length and compressed length are stored in
  // the zip64 extended info of the extra field.
  bool zip64_format_size{false};

  // The offset to the start of data for this ZipEntry.
  off64_t offset;

+24 −4
Original line number Diff line number Diff line
@@ -25,14 +25,18 @@ import zipfile
import time

class Zip64Test(unittest.TestCase):
  @staticmethod
  def _WriteFile(path, size_in_kib):
    contents = os.path.basename(path)[0] * 1024
    with open(path, 'w') as f:
      for it in range(0, size_in_kib):
        f.write(contents)

  @staticmethod
  def _AddEntriesToZip(output_zip, entries_dict=None):
    for name, size in entries_dict.items():
      contents = name[0] * 1024
      file_path = tempfile.NamedTemporaryFile()
      with open(file_path.name, 'w') as f:
        for it in range(0, size):
          f.write(contents)
      Zip64Test._WriteFile(file_path.name, size)
      output_zip.write(file_path.name, arcname = name)

  def _getEntryNames(self, zip_name):
@@ -93,6 +97,22 @@ class Zip64Test(unittest.TestCase):
    self._ExtractEntries(zip_path.name)


  def test_forceDataDescriptor(self):
    file_path = tempfile.NamedTemporaryFile(suffix='.txt')
    # TODO create the entry > 4GiB.
    self._WriteFile(file_path.name, 1024)

    zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
    with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
      pass
    # The fd option force writes a data descriptor
    cmd = ['zip', '-fd', zip_path.name, file_path.name]
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    proc.communicate()
    read_names = self._getEntryNames(zip_path.name)
    self.assertEquals([file_path.name[1:]], read_names)
    self._ExtractEntries(zip_path.name)

if __name__ == '__main__':
  testsuite = unittest.TestLoader().discover(
      os.path.dirname(os.path.realpath(__file__)))
+200 B

File added.

No diff preview for this file type.

+58 −57
Original line number Diff line number Diff line
@@ -57,8 +57,6 @@
#include "zip_archive_common.h"
#include "zip_archive_private.h"

using android::base::get_unaligned;

// Used to turn on crc checks - verify that the content CRC matches the values
// specified in the local file header and the central directory.
static const bool kCrcChecksEnabled = false;
@@ -221,7 +219,7 @@ static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive
  for (; i >= 0; i--) {
    if (scan_buffer[i] == 0x50) {
      uint32_t* sig_addr = reinterpret_cast<uint32_t*>(&scan_buffer[i]);
      if (get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
      if (android::base::get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
        ALOGV("+++ Found EOCD at buf+%d", i);
        break;
      }
@@ -360,8 +358,9 @@ static ZipError ParseZip64ExtendedInfoInExtraField(
  // Data Size - 2 bytes
  uint16_t offset = 0;
  while (offset < extraFieldLength - 4) {
    auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
    auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);
    auto readPtr = const_cast<uint8_t*>(extraFieldStart + offset);
    auto headerId = ConsumeUnaligned<uint16_t>(&readPtr);
    auto dataSize = ConsumeUnaligned<uint16_t>(&readPtr);

    offset += 4;
    if (dataSize > extraFieldLength - offset) {
@@ -376,55 +375,45 @@ static ZipError ParseZip64ExtendedInfoInExtraField(
      continue;
    }

    uint16_t expectedDataSize = 0;
    // We expect the extended field to include both uncompressed and compressed size.
    if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
      expectedDataSize += 16;
    std::optional<uint64_t> uncompressedFileSize;
    std::optional<uint64_t> compressedFileSize;
    std::optional<uint64_t> localHeaderOffset;
    if (zip32UncompressedSize == UINT32_MAX) {
      uncompressedFileSize = ConsumeUnaligned<uint64_t>(&readPtr);
    }
    if (zip32CompressedSize == UINT32_MAX) {
      compressedFileSize = ConsumeUnaligned<uint64_t>(&readPtr);
    }
    if (zip32LocalFileHeaderOffset == UINT32_MAX) {
      expectedDataSize += 8;
      localHeaderOffset = ConsumeUnaligned<uint64_t>(&readPtr);
    }

    if (expectedDataSize == 0) {
    // calculate how many bytes we read after the data size field.
    size_t bytesRead = readPtr - (extraFieldStart + offset);
    if (bytesRead == 0) {
      ALOGW("Zip: Data size should not be 0 in zip64 extended field");
      return kInvalidFile;
    }

    if (dataSize != expectedDataSize) {
    if (dataSize != bytesRead) {
      auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
                                   ? std::to_string(zip32LocalFileHeaderOffset.value())
                                   : "missing";
      ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
      ALOGW("Zip: Invalid data size in zip64 extended field, expect %zu , get %" PRIu16
            ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
            expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
            bytesRead, dataSize, zip32UncompressedSize, zip32CompressedSize,
            localOffsetString.c_str());
      return kInvalidFile;
    }

    std::optional<uint64_t> uncompressedFileSize;
    std::optional<uint64_t> compressedFileSize;
    std::optional<uint64_t> localHeaderOffset;
    if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
      uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
      compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
      offset += 16;

    // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
    // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
    // bytes counter to replace stream.total_out.
      if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
        ALOGW(
            "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
            ", compressed size %" PRIu64,
            uncompressedFileSize.value(), compressedFileSize.value());
    if ((uncompressedFileSize.has_value() && uncompressedFileSize.value() > UINT32_MAX) ||
        (compressedFileSize.has_value() && compressedFileSize.value() > UINT32_MAX)) {
      ALOGW("Zip: File size larger than UINT32_MAX isn't supported yet");
      return kInvalidFile;
    }
    }

    if (zip32LocalFileHeaderOffset == UINT32_MAX) {
      localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
      offset += 8;
    }

    zip64Info->uncompressed_file_size = uncompressedFileSize;
    zip64Info->compressed_file_size = compressedFileSize;
@@ -625,7 +614,8 @@ void CloseArchive(ZipArchiveHandle archive) {
}

static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry) {
  uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
  // Maximum possible size for data descriptor: 2 * 4 + 2 * 8 = 24 bytes
  uint8_t ddBuf[24];
  off64_t offset = entry->offset;
  if (entry->method != kCompressStored) {
    offset += entry->compressed_length;
@@ -638,18 +628,26 @@ static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry
  }

  const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
  const uint16_t ddOffset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
  const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + ddOffset);
  uint8_t* ddReadPtr = (ddSignature == DataDescriptor::kOptSignature) ? ddBuf + 4 : ddBuf;
  DataDescriptor descriptor{};
  descriptor.crc32 = ConsumeUnaligned<uint32_t>(&ddReadPtr);
  if (entry->zip64_format_size) {
    descriptor.compressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
    descriptor.uncompressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
  } else {
    descriptor.compressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
    descriptor.uncompressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
  }

  // Validate that the values in the data descriptor match those in the central
  // directory.
  if (entry->compressed_length != descriptor->compressed_size ||
      entry->uncompressed_length != descriptor->uncompressed_size ||
      entry->crc32 != descriptor->crc32) {
  if (entry->compressed_length != descriptor.compressed_size ||
      entry->uncompressed_length != descriptor.uncompressed_size ||
      entry->crc32 != descriptor.crc32) {
    ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
          "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
          "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
          entry->compressed_length, entry->uncompressed_length, entry->crc32,
          descriptor->compressed_size, descriptor->uncompressed_size, descriptor->crc32);
          descriptor.compressed_size, descriptor.uncompressed_size, descriptor.crc32);
    return kInconsistentInformation;
  }

@@ -706,18 +704,14 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
      return status;
    }

    if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
      CHECK(zip64_info.uncompressed_file_size.has_value());
      CHECK(zip64_info.compressed_file_size.has_value());
    // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
      data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
      data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
    }

    if (local_header_offset == UINT32_MAX) {
      CHECK(zip64_info.local_header_offset.has_value());
      local_header_offset = zip64_info.local_header_offset.value();
    }
    data->uncompressed_length =
        static_cast<uint32_t>(zip64_info.uncompressed_file_size.value_or(cdr->uncompressed_size));
    data->compressed_length =
        static_cast<uint32_t>(zip64_info.compressed_file_size.value_or(cdr->compressed_size));
    local_header_offset = zip64_info.local_header_offset.value_or(local_header_offset);
    data->zip64_format_size =
        cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX;
  }

  if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@@ -766,6 +760,13 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
  uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
  uint64_t lfh_compressed_size = lfh->compressed_size;
  if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
    if (lfh_uncompressed_size != UINT32_MAX || lfh_compressed_size != UINT32_MAX) {
      ALOGW(
          "Zip: The zip64 extended field in the local header MUST include BOTH original and "
          "compressed file size fields.");
      return kInvalidFile;
    }

    const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
    const uint16_t lfh_extra_field_size = lfh->extra_field_length;
    if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
+14 −5
Original line number Diff line number Diff line
@@ -165,15 +165,24 @@ struct DataDescriptor {

  // CRC-32 checksum of the entry.
  uint32_t crc32;
  // Compressed size of the entry.
  uint32_t compressed_size;
  // Uncompressed size of the entry.
  uint32_t uncompressed_size;

  // For ZIP64 format archives, the compressed and uncompressed sizes are 8
  // bytes each. Also, the ZIP64 format MAY be used regardless of the size
  // of a file.  When extracting, if the zip64 extended information extra field
  // is present for the file the compressed and uncompressed sizes will be 8
  // byte values.

  // Compressed size of the entry, the field can be either 4 bytes or 8 bytes
  // in the zip file.
  uint64_t compressed_size;
  // Uncompressed size of the entry, the field can be either 4 bytes or 8 bytes
  // in the zip file.
  uint64_t uncompressed_size;

 private:
  DataDescriptor() = default;
  DISALLOW_COPY_AND_ASSIGN(DataDescriptor);
} __attribute__((packed));
};

// The zip64 end of central directory locator helps to find the zip64 EOCD.
struct Zip64EocdLocator {
Loading