Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4747e9c0 authored by Tianjie Xu's avatar Tianjie Xu Committed by Automerger Merge Worker
Browse files

Merge "Implement the functions to parse zip64 structs" am: 19c5cc22

Change-Id: Ica883ae44d23e635004345b42f5f6fb7186a23c9
parents c19b5bd8 19c5cc22
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -67,5 +67,11 @@
    {
      "name": "ziparchive-tests"
    }
  ],

  "postsubmit": [
    {
      "name": "ziparchive_tests_large"
    }
  ]
}
+17 −0
Original line number Diff line number Diff line
@@ -212,3 +212,20 @@ sh_test {
    data: ["cli-tests/**/*"],
    target_required: ["cli-test", "ziptool"],
}

python_test_host {
    name: "ziparchive_tests_large",
    srcs: ["test_ziparchive_large.py"],
    main: "test_ziparchive_large.py",
    version: {
        py2: {
            enabled: true,
            embedded_launcher: false,
        },
        py3: {
            enabled: false,
            embedded_launcher: false,
        },
    },
    test_suites: ["general-tests"],
}
+99 −0
Original line number Diff line number Diff line
#!/usr/bin/env python
#
# Copyright (C) 2020 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Unittests for parsing files in zip64 format"""

import os
import subprocess
import tempfile
import unittest
import zipfile
import time

class Zip64Test(unittest.TestCase):
  @staticmethod
  def _AddEntriesToZip(output_zip, entries_dict=None):
    for name, size in entries_dict.items():
      contents = name[0] * 1024
      file_path = tempfile.NamedTemporaryFile()
      with open(file_path.name, 'w') as f:
        for it in range(0, size):
          f.write(contents)
      output_zip.write(file_path.name, arcname = name)

  def _getEntryNames(self, zip_name):
    cmd = ['ziptool', 'zipinfo', '-1', zip_name]
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    output, _ = proc.communicate()
    self.assertEquals(0, proc.returncode)
    self.assertNotEqual(None, output)
    return output.split()

  def _ExtractEntries(self, zip_name):
    temp_dir = tempfile.mkdtemp()
    cmd = ['ziptool', 'unzip', '-d', temp_dir, zip_name]
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    proc.communicate()
    self.assertEquals(0, proc.returncode)

  def test_entriesSmallerThan2G(self):
    zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
    # Add a few entries with each of them smaller than 2GiB. But the entire zip file is larger
    # than 4GiB in size.
    with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
      entry_dict = {'a.txt': 1025 * 1024, 'b.txt': 1025 * 1024, 'c.txt': 1025 * 1024,
                    'd.txt': 1025 * 1024, 'e.txt': 1024}
      self._AddEntriesToZip(output_zip, entry_dict)

    read_names = self._getEntryNames(zip_path.name)
    self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
    self._ExtractEntries(zip_path.name)


  def test_largeNumberOfEntries(self):
    zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
    entry_dict = {}
    # Add 100k entries (more than 65535|UINT16_MAX).
    for num in range(0, 100 * 1024):
      entry_dict[str(num)] = 50

    with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
      self._AddEntriesToZip(output_zip, entry_dict)

    read_names = self._getEntryNames(zip_path.name)
    self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
    self._ExtractEntries(zip_path.name)


  def test_largeCompressedEntries(self):
    zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
    with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED,
                         allowZip64=True) as output_zip:
      # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
      # sizes in the extra field. Test if our ziptool should be able to parse it.
      entry_dict = {'e.txt': 4095 * 1024, 'f.txt': 4095 * 1024}
      self._AddEntriesToZip(output_zip, entry_dict)

    read_names = self._getEntryNames(zip_path.name)
    self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
    self._ExtractEntries(zip_path.name)


if __name__ == '__main__':
  testsuite = unittest.TestLoader().discover(
      os.path.dirname(os.path.realpath(__file__)))
  unittest.TextTestRunner(verbosity=2).run(testsuite)
+241 −44
Original line number Diff line number Diff line
@@ -139,10 +139,61 @@ struct CentralDirectoryInfo {
  uint64_t cd_start_offset;
};

static ZipError FindCentralDirectoryInfoForZip64(CentralDirectoryInfo* /* cdInfo */) {
  ALOGW("Zip: Parsing zip64 EOCD isn't supported yet.");
static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive,
                                                 off64_t eocdOffset, CentralDirectoryInfo* cdInfo) {
  if (eocdOffset <= sizeof(Zip64EocdLocator)) {
    ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName);
    return kInvalidFile;
  }
  // We expect to find the zip64 eocd locator immediately before the zip eocd.
  const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator);
  Zip64EocdLocator zip64EocdLocator{};
  if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocator)),
                                        sizeof(Zip64EocdLocator), locatorOffset)) {
    ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName,
          sizeof(Zip64EocdLocator), locatorOffset, debugFileName);
    return kIoError;
  }

  if (zip64EocdLocator.locator_signature != Zip64EocdLocator::kSignature) {
    ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName,
          locatorOffset);
    return kInvalidFile;
  }

  const int64_t zip64EocdOffset = zip64EocdLocator.zip64_eocd_offset;
  if (zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) {
    ALOGW("Zip: %s: Bad zip64 eocd offset %" PRIu64, debugFileName, zip64EocdOffset);
    return kInvalidOffset;
  }

  Zip64EocdRecord zip64EocdRecord{};
  if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecord),
                                        sizeof(Zip64EocdRecord), zip64EocdOffset)) {
    ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName,
          sizeof(Zip64EocdLocator), static_cast<int64_t>(zip64EocdOffset), debugFileName);
    return kIoError;
  }

  if (zip64EocdRecord.record_signature != Zip64EocdRecord::kSignature) {
    ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName,
          zip64EocdOffset);
    return kInvalidFile;
  }

  if (zip64EocdRecord.cd_start_offset > zip64EocdOffset - zip64EocdRecord.cd_size) {
    ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64
          ", zip64 eocd offset %" PRIu64,
          debugFileName, zip64EocdRecord.cd_start_offset, zip64EocdRecord.cd_size, zip64EocdOffset);
    return kInvalidOffset;
  }

  *cdInfo = {.num_records = zip64EocdRecord.num_records,
             .cd_size = zip64EocdRecord.cd_size,
             .cd_start_offset = zip64EocdRecord.cd_start_offset};

  return kSuccess;
}

static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive* archive,
                                         off64_t file_length, uint32_t read_amount,
@@ -195,7 +246,7 @@ static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive
  if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) {
    ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32,
          eocd->cd_size, eocd->cd_start_offset);
    return FindCentralDirectoryInfoForZip64(cdInfo);
    return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo);
  }

  /*
@@ -291,13 +342,104 @@ static ZipError MapCentralDirectory(const char* debug_file_name, ZipArchive* arc
  return kSuccess;
}

static ZipError ParseZip64ExtendedInfoInExtraField(
    const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize,
    uint32_t zip32CompressedSize, std::optional<uint32_t> zip32LocalFileHeaderOffset,
    Zip64ExtendedInfo* zip64Info) {
  if (extraFieldLength <= 4) {
    ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16,
          extraFieldLength);
    return kInvalidFile;
  }

  // Each header MUST consist of:
  // Header ID - 2 bytes
  // Data Size - 2 bytes
  uint16_t offset = 0;
  while (offset < extraFieldLength - 4) {
    auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
    auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);

    offset += 4;
    if (dataSize > extraFieldLength - offset) {
      ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize);
      return kInvalidOffset;
    }

    // Skip the other types of extensible data fields. Details in
    // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5
    if (headerId != Zip64ExtendedInfo::kHeaderId) {
      offset += dataSize;
      continue;
    }

    uint16_t expectedDataSize = 0;
    // We expect the extended field to include both uncompressed and compressed size.
    if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
      expectedDataSize += 16;
    }
    if (zip32LocalFileHeaderOffset == UINT32_MAX) {
      expectedDataSize += 8;
    }

    if (expectedDataSize == 0) {
      ALOGW("Zip: Data size should not be 0 in zip64 extended field");
      return kInvalidFile;
    }

    if (dataSize != expectedDataSize) {
      auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
                                   ? std::to_string(zip32LocalFileHeaderOffset.value())
                                   : "missing";
      ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
            ", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
            expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
            localOffsetString.c_str());
      return kInvalidFile;
    }

    std::optional<uint64_t> uncompressedFileSize;
    std::optional<uint64_t> compressedFileSize;
    std::optional<uint64_t> localHeaderOffset;
    if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
      uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
      compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
      offset += 16;

      // TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
      // for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
      // bytes counter to replace stream.total_out.
      if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
        ALOGW(
            "Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
            ", compressed size %" PRIu64,
            uncompressedFileSize.value(), compressedFileSize.value());
        return kInvalidFile;
      }
    }

    if (zip32LocalFileHeaderOffset == UINT32_MAX) {
      localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
      offset += 8;
    }

    zip64Info->uncompressed_file_size = uncompressedFileSize;
    zip64Info->compressed_file_size = compressedFileSize;
    zip64Info->local_header_offset = localHeaderOffset;
    return kSuccess;
  }

  ALOGW("Zip: zip64 extended info isn't found in the extra field.");
  return kInvalidFile;
}

/*
 * Parses the Zip archive's Central Directory.  Allocates and populates the
 * hash table.
 *
 * Returns 0 on success.
 */
static int32_t ParseZipArchive(ZipArchive* archive) {
static ZipError ParseZipArchive(ZipArchive* archive) {
  const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
  const size_t cd_length = archive->central_directory.GetMapLength();
  const uint64_t num_entries = archive->num_entries;
@@ -327,7 +469,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
      return kInvalidFile;
    }

    const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
    auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
    if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
      ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i);
      return kInvalidFile;
@@ -355,9 +497,15 @@ static int32_t ParseZipArchive(ZipArchive* archive) {

    off64_t local_header_offset = cdr->local_file_header_offset;
    if (local_header_offset == UINT32_MAX) {
      // TODO(xunchang) parse the zip64 eocd
      ALOGW("Zip: Parsing zip64 cd entry isn't supported yet");
      return kInvalidFile;
      Zip64ExtendedInfo zip64_info{};
      if (auto status = ParseZip64ExtendedInfoInExtraField(
              extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size,
              cdr->local_file_header_offset, &zip64_info);
          status != kSuccess) {
        return status;
      }
      CHECK(zip64_info.local_header_offset.has_value());
      local_header_offset = zip64_info.local_header_offset.value();
    }

    if (local_header_offset >= archive->directory_offset) {
@@ -405,7 +553,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) {

  ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries);

  return 0;
  return kSuccess;
}

static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) {
@@ -522,7 +670,7 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
    return kInvalidOffset;
  }

  const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
  auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);

  // The offset of the start of the central directory in the zipfile.
  // We keep this lying around so that we can sanity check all our lengths
@@ -546,8 +694,27 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
  // the extra field.
  if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX ||
      cdr->local_file_header_offset == UINT32_MAX) {
    ALOGW("Zip: Parsing zip64 local file header isn't supported yet");
    return kInvalidFile;
    const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length;
    Zip64ExtendedInfo zip64_info{};
    if (auto status = ParseZip64ExtendedInfoInExtraField(
            extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size,
            cdr->local_file_header_offset, &zip64_info);
        status != kSuccess) {
      return status;
    }

    if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
      CHECK(zip64_info.uncompressed_file_size.has_value());
      CHECK(zip64_info.compressed_file_size.has_value());
      // TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
      data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
      data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
    }

    if (local_header_offset == UINT32_MAX) {
      CHECK(zip64_info.local_header_offset.has_value());
      local_header_offset = zip64_info.local_header_offset.value();
    }
  }

  if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@@ -562,14 +729,68 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
    return kIoError;
  }

  const LocalFileHeader* lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);

  auto lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
  if (lfh->lfh_signature != LocalFileHeader::kSignature) {
    ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
          static_cast<int64_t>(local_header_offset));
    return kInvalidOffset;
  }

  // Check that the local file header name matches the declared name in the central directory.
  CHECK_LE(entryName.size(), UINT16_MAX);
  auto nameLen = static_cast<uint16_t>(entryName.size());
  if (lfh->file_name_length != nameLen) {
    ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
          std::string(entryName).c_str(), lfh->file_name_length, nameLen);
    return kInconsistentInformation;
  }
  const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
  if (name_offset > cd_offset - lfh->file_name_length) {
    ALOGW("Zip: lfh name has invalid declared length");
    return kInvalidOffset;
  }

  std::vector<uint8_t> name_buf(nameLen);
  if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
    ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
    return kIoError;
  }
  if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
    ALOGW("Zip: lfh name did not match central directory");
    return kInconsistentInformation;
  }

  uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
  uint64_t lfh_compressed_size = lfh->compressed_size;
  if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
    const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
    const uint16_t lfh_extra_field_size = lfh->extra_field_length;
    if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
      ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str());
      return kInvalidOffset;
    }

    std::vector<uint8_t> local_extra_field(lfh_extra_field_size);
    if (!archive->mapped_zip.ReadAtOffset(local_extra_field.data(), lfh_extra_field_size,
                                          lfh_extra_field_offset)) {
      ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset);
      return kIoError;
    }

    Zip64ExtendedInfo zip64_info{};
    if (auto status = ParseZip64ExtendedInfoInExtraField(
            local_extra_field.data(), lfh_extra_field_size, lfh->uncompressed_size,
            lfh->compressed_size, std::nullopt, &zip64_info);
        status != kSuccess) {
      return status;
    }

    CHECK(zip64_info.uncompressed_file_size.has_value());
    CHECK(zip64_info.compressed_file_size.has_value());
    lfh_uncompressed_size = zip64_info.uncompressed_file_size.value();
    lfh_compressed_size = zip64_info.compressed_file_size.value();
  }

  // Paranoia: Match the values specified in the local file header
  // to those specified in the central directory.

@@ -595,12 +816,12 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
  // header agree on the crc, compressed, and uncompressed sizes of the entry.
  if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
    data->has_data_descriptor = 0;
    if (data->compressed_length != lfh->compressed_size ||
        data->uncompressed_length != lfh->uncompressed_size || data->crc32 != lfh->crc32) {
    if (data->compressed_length != lfh_compressed_size ||
        data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) {
      ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
            "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
            data->compressed_length, data->uncompressed_length, data->crc32, lfh->compressed_size,
            lfh->uncompressed_size, lfh->crc32);
            "}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
            data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size,
            lfh_uncompressed_size, lfh->crc32);
      return kInconsistentInformation;
    }
  } else {
@@ -623,30 +844,6 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
  // Currently only needed to implement zipinfo.
  data->is_text = (cdr->internal_file_attributes & 1);

  // Check that the local file header name matches the declared
  // name in the central directory.
  CHECK_LE(entryName.size(), UINT16_MAX);
  auto nameLen = static_cast<uint16_t>(entryName.size());
  if (lfh->file_name_length != nameLen) {
    ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
          std::string(entryName).c_str(), lfh->file_name_length, nameLen);
    return kInconsistentInformation;
  }
  const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
  if (name_offset + lfh->file_name_length > cd_offset) {
    ALOGW("Zip: lfh name has invalid declared length");
    return kInvalidOffset;
  }
  std::vector<uint8_t> name_buf(nameLen);
  if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
    ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
    return kIoError;
  }
  if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
    ALOGW("Zip: lfh name did not match central directory");
    return kInconsistentInformation;
  }

  const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) +
                              lfh->file_name_length + lfh->extra_field_length;
  if (data_offset > cd_offset) {
+292 −2

File changed.

Preview size limit exceeded, changes collapsed.