Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6ab29129 authored by Tianjie's avatar Tianjie
Browse files

Implement the functions to parse zip64 structs

Implement the logic to parse zip64 eocd and zip64 extended info
in the extra field. Also add unit tests and python tests which
create packages larger than 4GiB.

The extraction of zip entry size > 4GiB will be supported in the follow
ups.

Bug: 150900468
Test: unit tests pass
Change-Id: I4cd9ebbd9709b3d2f9cd293625d2c79024bb45a5
parent 18c00858
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -67,5 +67,11 @@
    {
      "name": "ziparchive-tests"
    }
  ],

  "postsubmit": [
    {
      "name": "ziparchive_tests_large"
    }
  ]
}
+17 −0
Original line number Diff line number Diff line
@@ -212,3 +212,20 @@ sh_test {
    data: ["cli-tests/**/*"],
    target_required: ["cli-test", "ziptool"],
}

python_test_host {
    name: "ziparchive_tests_large",
    srcs: ["test_ziparchive_large.py"],
    main: "test_ziparchive_large.py",
    version: {
        py2: {
            enabled: true,
            embedded_launcher: false,
        },
        py3: {
            enabled: false,
            embedded_launcher: false,
        },
    },
    test_suites: ["general-tests"],
}
+99 −0
Original line number Diff line number Diff line
#!/usr/bin/env python
#
# Copyright (C) 2020 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Unittests for parsing files in zip64 format"""

import os
import subprocess
import tempfile
import unittest
import zipfile
import time

class Zip64Test(unittest.TestCase):
  @staticmethod
  def _AddEntriesToZip(output_zip, entries_dict=None):
    for name, size in entries_dict.items():
      contents = name[0] * 1024
      file_path = tempfile.NamedTemporaryFile()
      with open(file_path.name, 'w') as f:
        for it in range(0, size):
          f.write(contents)
      output_zip.write(file_path.name, arcname = name)

  def _getEntryNames(self, zip_name):
    cmd = ['ziptool', 'zipinfo', '-1', zip_name]
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    output, _ = proc.communicate()
    self.assertEquals(0, proc.returncode)
    self.assertNotEqual(None, output)
    return output.split()

  def _ExtractEntries(self, zip_name):
    temp_dir = tempfile.mkdtemp()
    cmd = ['ziptool', 'unzip', '-d', temp_dir, zip_name]
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    proc.communicate()
    self.assertEquals(0, proc.returncode)

  def test_entriesSmallerThan2G(self):
    zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
    # Add a few entries with each of them smaller than 2GiB. But the entire zip file is larger
    # than 4GiB in size.
    with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
      entry_dict = {'a.txt': 1025 * 1024, 'b.txt': 1025 * 1024, 'c.txt': 1025 * 1024,
                    'd.txt': 1025 * 1024, 'e.txt': 1024}
      self._AddEntriesToZip(output_zip, entry_dict)

    read_names = self._getEntryNames(zip_path.name)
    self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
    self._ExtractEntries(zip_path.name)


  def test_largeNumberOfEntries(self):
    zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
    entry_dict = {}
    # Add 100k entries (more than 65535|UINT16_MAX).
    for num in range(0, 100 * 1024):
      entry_dict[str(num)] = 50

    with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
      self._AddEntriesToZip(output_zip, entry_dict)

    read_names = self._getEntryNames(zip_path.name)
    self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
    self._ExtractEntries(zip_path.name)


  def test_largeCompressedEntries(self):
    zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
    with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED,
                         allowZip64=True) as output_zip:
      # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
      # sizes in the extra field. Test if our ziptool should be able to parse it.
      entry_dict = {'e.txt': 4095 * 1024, 'f.txt': 4095 * 1024}
      self._AddEntriesToZip(output_zip, entry_dict)

    read_names = self._getEntryNames(zip_path.name)
    self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
    self._ExtractEntries(zip_path.name)


if __name__ == '__main__':
  testsuite = unittest.TestLoader().discover(
      os.path.dirname(os.path.realpath(__file__)))
  unittest.TextTestRunner(verbosity=2).run(testsuite)
+241 −44
Original line number Diff line number Diff line
@@ -138,10 +138,61 @@ struct CentralDirectoryInfo {
  uint64_t cd_start_offset;
};

static ZipError FindCentralDirectoryInfoForZip64(CentralDirectoryInfo* /* cdInfo */) {
  ALOGW("Zip: Parsing zip64 EOCD isn't supported yet.");
static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive,
                                                 off64_t eocdOffset, CentralDirectoryInfo* cdInfo) {
  if (eocdOffset <= sizeof(Zip64EocdLocator)) {
    ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName);
    return kInvalidFile;
  }
  // We expect to find the zip64 eocd locator immediately before the zip eocd.
  const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator);
  Zip64EocdLocator zip64EocdLocator{};
  if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocator)),
                                        sizeof(Zip64EocdLocator), locatorOffset)) {
    ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName,
          sizeof(Zip64EocdLocator), locatorOffset, debugFileName);
    return kIoError;
  }

  if (zip64EocdLocator.locator_signature != Zip64EocdLocator::kSignature) {
    ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName,
          locatorOffset);
    return kInvalidFile;
  }

  const int64_t zip64EocdOffset = zip64EocdLocator.zip64_eocd_offset;
  if (zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) {
    ALOGW("Zip: %s: Bad zip64 eocd offset %" PRIu64, debugFileName, zip64EocdOffset);
    return kInvalidOffset;
  }

  Zip64EocdRecord zip64EocdRecord{};
  if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecord),
                                        sizeof(Zip64EocdRecord), zip64EocdOffset)) {
    ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName,
          sizeof(Zip64EocdLocator), static_cast<int64_t>(zip64EocdOffset), debugFileName);
    return kIoError;
  }

  if (zip64EocdRecord.record_signature != Zip64EocdRecord::kSignature) {
    ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName,
          zip64EocdOffset);
    return kInvalidFile;
  }

  if (zip64EocdRecord.cd_start_offset > zip64EocdOffset - zip64EocdRecord.cd_size) {
    ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64
          ", zip64 eocd offset %" PRIu64,
          debugFileName, zip64EocdRecord.cd_start_offset, zip64EocdRecord.cd_size, zip64EocdOffset);
    return kInvalidOffset;
  }

  *cdInfo = {.num_records = zip64EocdRecord.num_records,
             .cd_size = zip64EocdRecord.cd_size,
             .cd_start_offset = zip64EocdRecord.cd_start_offset};

  return kSuccess;
}

static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive* archive,
                                         off64_t file_length, uint32_t read_amount,
@@ -194,7 +245,7 @@ static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive
  if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) {
    ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32,
          eocd->cd_size, eocd->cd_start_offset);
    return FindCentralDirectoryInfoForZip64(cdInfo);
    return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo);
  }

  /*
@@ -290,13 +341,104 @@ static ZipError MapCentralDirectory(const char* debug_file_name, ZipArchive* arc
  return kSuccess;
}

static ZipError ParseZip64ExtendedInfoInExtraField(
    const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize,
    uint32_t zip32CompressedSize, std::optional<uint32_t> zip32LocalFileHeaderOffset,
    Zip64ExtendedInfo* zip64Info) {
  if (extraFieldLength <= 4) {
    ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16,
          extraFieldLength);
    return kInvalidFile;
  }

  // Each header MUST consist of:
  // Header ID - 2 bytes
  // Data Size - 2 bytes
  uint16_t offset = 0;
  while (offset < extraFieldLength - 4) {
    auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
    auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);

    offset += 4;
    if (dataSize > extraFieldLength - offset) {
      ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize);
      return kInvalidOffset;
    }

    // Skip the other types of extensible data fields. Details in
    // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5
    if (headerId != Zip64ExtendedInfo::kHeaderId) {
      offset += dataSize;
      continue;
    }

    uint16_t expectedDataSize = 0;
    // We expect the extended field to include both uncompressed and compressed size.
    if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
      expectedDataSize += 16;
    }
    if (zip32LocalFileHeaderOffset == UINT32_MAX) {
      expectedDataSize += 8;
    }

    if (expectedDataSize == 0) {
      ALOGW("Zip: Data size should not be 0 in zip64 extended field");
      return kInvalidFile;
    }

    if (dataSize != expectedDataSize) {
      auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
                                   ? std::to_string(zip32LocalFileHeaderOffset.value())
                                   : "missing";
      ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
            ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
            expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
            localOffsetString.c_str());
      return kInvalidFile;
    }

    std::optional<uint64_t> uncompressedFileSize;
    std::optional<uint64_t> compressedFileSize;
    std::optional<uint64_t> localHeaderOffset;
    if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
      uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
      compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
      offset += 16;

      // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
      // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
      // bytes counter to replace stream.total_out.
      if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
        ALOGW(
            "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
            ", compressed size %" PRIu64,
            uncompressedFileSize.value(), compressedFileSize.value());
        return kInvalidFile;
      }
    }

    if (zip32LocalFileHeaderOffset == UINT32_MAX) {
      localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
      offset += 8;
    }

    zip64Info->uncompressed_file_size = uncompressedFileSize;
    zip64Info->compressed_file_size = compressedFileSize;
    zip64Info->local_header_offset = localHeaderOffset;
    return kSuccess;
  }

  ALOGW("Zip: zip64 extended info isn't found in the extra field.");
  return kInvalidFile;
}

/*
 * Parses the Zip archive's Central Directory.  Allocates and populates the
 * hash table.
 *
 * Returns 0 on success.
 */
static int32_t ParseZipArchive(ZipArchive* archive) {
static ZipError ParseZipArchive(ZipArchive* archive) {
  const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
  const size_t cd_length = archive->central_directory.GetMapLength();
  const uint64_t num_entries = archive->num_entries;
@@ -326,7 +468,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
      return kInvalidFile;
    }

    const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
    auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
    if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
      ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i);
      return kInvalidFile;
@@ -354,9 +496,15 @@ static int32_t ParseZipArchive(ZipArchive* archive) {

    off64_t local_header_offset = cdr->local_file_header_offset;
    if (local_header_offset == UINT32_MAX) {
      // TODO(xunchang) parse the zip64 eocd
      ALOGW("Zip: Parsing zip64 cd entry isn't supported yet");
      return kInvalidFile;
      Zip64ExtendedInfo zip64_info{};
      if (auto status = ParseZip64ExtendedInfoInExtraField(
              extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size,
              cdr->local_file_header_offset, &zip64_info);
          status != kSuccess) {
        return status;
      }
      CHECK(zip64_info.local_header_offset.has_value());
      local_header_offset = zip64_info.local_header_offset.value();
    }

    if (local_header_offset >= archive->directory_offset) {
@@ -404,7 +552,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) {

  ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries);

  return 0;
  return kSuccess;
}

static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) {
@@ -521,7 +669,7 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
    return kInvalidOffset;
  }

  const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
  auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);

  // The offset of the start of the central directory in the zipfile.
  // We keep this lying around so that we can sanity check all our lengths
@@ -545,8 +693,27 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
  // the extra field.
  if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX ||
      cdr->local_file_header_offset == UINT32_MAX) {
    ALOGW("Zip: Parsing zip64 local file header isn't supported yet");
    return kInvalidFile;
    const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length;
    Zip64ExtendedInfo zip64_info{};
    if (auto status = ParseZip64ExtendedInfoInExtraField(
            extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size,
            cdr->local_file_header_offset, &zip64_info);
        status != kSuccess) {
      return status;
    }

    if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
      CHECK(zip64_info.uncompressed_file_size.has_value());
      CHECK(zip64_info.compressed_file_size.has_value());
      // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
      data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
      data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
    }

    if (local_header_offset == UINT32_MAX) {
      CHECK(zip64_info.local_header_offset.has_value());
      local_header_offset = zip64_info.local_header_offset.value();
    }
  }

  if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@@ -561,14 +728,68 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
    return kIoError;
  }

  const LocalFileHeader* lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);

  auto lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
  if (lfh->lfh_signature != LocalFileHeader::kSignature) {
    ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
          static_cast<int64_t>(local_header_offset));
    return kInvalidOffset;
  }

  // Check that the local file header name matches the declared name in the central directory.
  CHECK_LE(entryName.size(), UINT16_MAX);
  auto nameLen = static_cast<uint16_t>(entryName.size());
  if (lfh->file_name_length != nameLen) {
    ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
          std::string(entryName).c_str(), lfh->file_name_length, nameLen);
    return kInconsistentInformation;
  }
  const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
  if (name_offset > cd_offset - lfh->file_name_length) {
    ALOGW("Zip: lfh name has invalid declared length");
    return kInvalidOffset;
  }

  std::vector<uint8_t> name_buf(nameLen);
  if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
    ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
    return kIoError;
  }
  if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
    ALOGW("Zip: lfh name did not match central directory");
    return kInconsistentInformation;
  }

  uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
  uint64_t lfh_compressed_size = lfh->compressed_size;
  if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
    const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
    const uint16_t lfh_extra_field_size = lfh->extra_field_length;
    if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
      ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str());
      return kInvalidOffset;
    }

    std::vector<uint8_t> local_extra_field(lfh_extra_field_size);
    if (!archive->mapped_zip.ReadAtOffset(local_extra_field.data(), lfh_extra_field_size,
                                          lfh_extra_field_offset)) {
      ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset);
      return kIoError;
    }

    Zip64ExtendedInfo zip64_info{};
    if (auto status = ParseZip64ExtendedInfoInExtraField(
            local_extra_field.data(), lfh_extra_field_size, lfh->uncompressed_size,
            lfh->compressed_size, std::nullopt, &zip64_info);
        status != kSuccess) {
      return status;
    }

    CHECK(zip64_info.uncompressed_file_size.has_value());
    CHECK(zip64_info.compressed_file_size.has_value());
    lfh_uncompressed_size = zip64_info.uncompressed_file_size.value();
    lfh_compressed_size = zip64_info.compressed_file_size.value();
  }

  // Paranoia: Match the values specified in the local file header
  // to those specified in the central directory.

@@ -594,12 +815,12 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
  // header agree on the crc, compressed, and uncompressed sizes of the entry.
  if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
    data->has_data_descriptor = 0;
    if (data->compressed_length != lfh->compressed_size ||
        data->uncompressed_length != lfh->uncompressed_size || data->crc32 != lfh->crc32) {
    if (data->compressed_length != lfh_compressed_size ||
        data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) {
      ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
            "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
            data->compressed_length, data->uncompressed_length, data->crc32, lfh->compressed_size,
            lfh->uncompressed_size, lfh->crc32);
            "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
            data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size,
            lfh_uncompressed_size, lfh->crc32);
      return kInconsistentInformation;
    }
  } else {
@@ -622,30 +843,6 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
  // Currently only needed to implement zipinfo.
  data->is_text = (cdr->internal_file_attributes & 1);

  // Check that the local file header name matches the declared
  // name in the central directory.
  CHECK_LE(entryName.size(), UINT16_MAX);
  auto nameLen = static_cast<uint16_t>(entryName.size());
  if (lfh->file_name_length != nameLen) {
    ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
          std::string(entryName).c_str(), lfh->file_name_length, nameLen);
    return kInconsistentInformation;
  }
  const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
  if (name_offset + lfh->file_name_length > cd_offset) {
    ALOGW("Zip: lfh name has invalid declared length");
    return kInvalidOffset;
  }
  std::vector<uint8_t> name_buf(nameLen);
  if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
    ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
    return kIoError;
  }
  if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
    ALOGW("Zip: lfh name did not match central directory");
    return kInconsistentInformation;
  }

  const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) +
                              lfh->file_name_length + lfh->extra_field_length;
  if (data_offset > cd_offset) {
+292 −2

File changed.

Preview size limit exceeded, changes collapsed.