Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 323c09c3 authored by Tianjie Xu's avatar Tianjie Xu
Browse files

Move the implementation of cd entry map to a separate file

Move the entry map classes to a separate file to make the hierarchy
clear.

Test: unittests pass
Change-Id: Ie01d7835359daa4f59af75a0eda204c696d5658e
parent 0ef9783c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -60,6 +60,7 @@ cc_defaults {
    srcs: [
        "zip_archive.cc",
        "zip_archive_stream_entry.cc",
        "zip_cd_entry_map.cc",
        "zip_writer.cc",
    ],

+0 −136
Original line number Diff line number Diff line
@@ -85,142 +85,6 @@ static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
 * of the string length into the hash table entry.
 */

/*
 * Round up to the next highest power of 2.
 *
 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
 */
static uint32_t RoundUpPower2(uint32_t val) {
  val--;
  val |= val >> 1;
  val |= val >> 2;
  val |= val >> 4;
  val |= val >> 8;
  val |= val >> 16;
  val++;

  return val;
}

static uint32_t ComputeHash(std::string_view name) {
  return static_cast<uint32_t>(std::hash<std::string_view>{}(name));
}

// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
std::pair<ZipError, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
                                                                const uint8_t* start) const {
  const uint32_t hash = ComputeHash(name);

  // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
  uint32_t ent = hash & (hash_table_size_ - 1);
  while (hash_table_[ent].name_offset != 0) {
    if (hash_table_[ent].ToStringView(start) == name) {
      return {kSuccess, hash_table_[ent].name_offset};
    }
    ent = (ent + 1) & (hash_table_size_ - 1);
  }

  ALOGV("Zip: Unable to find entry %.*s", static_cast<int>(name.size()), name.data());
  return {kEntryNotFound, 0};
}

ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
  const uint64_t hash = ComputeHash(name);
  uint32_t ent = hash & (hash_table_size_ - 1);

  /*
   * We over-allocated the table, so we're guaranteed to find an empty slot.
   * Further, we guarantee that the hashtable size is not 0.
   */
  while (hash_table_[ent].name_offset != 0) {
    if (hash_table_[ent].ToStringView(start) == name) {
      // We've found a duplicate entry. We don't accept duplicates.
      ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
      return kDuplicateEntry;
    }
    ent = (ent + 1) & (hash_table_size_ - 1);
  }

  // `name` has already been validated before entry.
  const char* start_char = reinterpret_cast<const char*>(start);
  hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
  hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
  return kSuccess;
}

void CdEntryMapZip32::ResetIteration() {
  current_position_ = 0;
}

std::pair<std::string_view, uint64_t> CdEntryMapZip32::Next(const uint8_t* cd_start) {
  while (current_position_ < hash_table_size_) {
    const auto& entry = hash_table_[current_position_];
    current_position_ += 1;

    if (entry.name_offset != 0) {
      return {entry.ToStringView(cd_start), entry.name_offset};
    }
  }
  // We have reached the end of the hash table.
  return {};
}

CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
  /*
   * Create hash table.  We have a minimum 75% load factor, possibly as
   * low as 50% after we round off to a power of 2.  There must be at
   * least one unused entry to avoid an infinite loop during creation.
   */
  hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
  hash_table_ = {
      reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
}

std::unique_ptr<CdEntryMapInterface> CdEntryMapZip32::Create(uint16_t num_entries) {
  auto entry_map = new CdEntryMapZip32(num_entries);
  CHECK(entry_map->hash_table_ != nullptr)
      << "Zip: unable to allocate the " << entry_map->hash_table_size_
      << " entry hash_table, entry size: " << sizeof(ZipStringOffset);
  return std::unique_ptr<CdEntryMapInterface>(entry_map);
}

std::unique_ptr<CdEntryMapInterface> CdEntryMapZip64::Create() {
  return std::unique_ptr<CdEntryMapInterface>(new CdEntryMapZip64());
}

ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) {
  const auto [it, added] =
      entry_table_.insert({name, name.data() - reinterpret_cast<const char*>(start)});
  if (!added) {
    ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
    return kDuplicateEntry;
  }
  return kSuccess;
}

std::pair<ZipError, uint64_t> CdEntryMapZip64::GetCdEntryOffset(std::string_view name,
                                                                const uint8_t* /*cd_start*/) const {
  const auto it = entry_table_.find(name);
  if (it == entry_table_.end()) {
    ALOGV("Zip: Could not find entry %.*s", static_cast<int>(name.size()), name.data());
    return {kEntryNotFound, 0};
  }

  return {kSuccess, it->second};
}

void CdEntryMapZip64::ResetIteration() {
  iterator_ = entry_table_.begin();
}

std::pair<std::string_view, uint64_t> CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) {
  if (iterator_ == entry_table_.end()) {
    return {};
  }

  return *iterator_++;
}

#if defined(__BIONIC__)
uint64_t GetOwnerTag(const ZipArchive* archive) {
  return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
+2 −157
Original line number Diff line number Diff line
@@ -22,81 +22,14 @@
#include <stdlib.h>
#include <unistd.h>

#include <map>
#include <memory>
#include <utility>
#include <vector>

#include "android-base/macros.h"
#include "android-base/mapped_file.h"

static const char* kErrorMessages[] = {
    "Success",
    "Iteration ended",
    "Zlib error",
    "Invalid file",
    "Invalid handle",
    "Duplicate entries in archive",
    "Empty archive",
    "Entry not found",
    "Invalid offset",
    "Inconsistent information",
    "Invalid entry name",
    "I/O error",
    "File mapping failed",
    "Allocation failed",
};

enum ZipError : int32_t {
  kSuccess = 0,

  kIterationEnd = -1,

  // We encountered a Zlib error when inflating a stream from this file.
  // Usually indicates file corruption.
  kZlibError = -2,

  // The input file cannot be processed as a zip archive. Usually because
  // it's too small, too large or does not have a valid signature.
  kInvalidFile = -3,

  // An invalid iteration / ziparchive handle was passed in as an input
  // argument.
  kInvalidHandle = -4,

  // The zip archive contained two (or possibly more) entries with the same
  // name.
  kDuplicateEntry = -5,

  // The zip archive contains no entries.
  kEmptyArchive = -6,

  // The specified entry was not found in the archive.
  kEntryNotFound = -7,

  // The zip archive contained an invalid local file header pointer.
  kInvalidOffset = -8,

  // The zip archive contained inconsistent entry information. This could
  // be because the central directory & local file header did not agree, or
  // if the actual uncompressed length or crc32 do not match their declared
  // values.
  kInconsistentInformation = -9,

  // An invalid entry name was encountered.
  kInvalidEntryName = -10,

  // An I/O related system call (read, lseek, ftruncate, map) failed.
  kIoError = -11,

  // We were not able to mmap the central directory or entry contents.
  kMmapFailed = -12,

  // An allocation failed.
  kAllocationFailed = -13,

  kLastErrorCode = kAllocationFailed,
};
#include "zip_cd_entry_map.h"
#include "zip_error.h"

class MappedZipFile {
 public:
@@ -144,94 +77,6 @@ class CentralDirectory {
  size_t length_;
};

// This class is the interface of the central directory entries map. The map
// helps to locate a particular cd entry based on the filename.
class CdEntryMapInterface {
 public:
  virtual ~CdEntryMapInterface() = default;
  // Adds an entry to the map. The |name| should internally points to the
  // filename field of a cd entry. And |start| points to the beginning of the
  // central directory. Returns 0 on success.
  virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
  // For the zip entry |entryName|, finds the offset of its filename field in
  // the central directory. Returns a pair of [status, offset]. The value of
  // the status is 0 on success.
  virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
                                                         const uint8_t* cd_start) const = 0;
  // Resets the iterator to the beginning of the map.
  virtual void ResetIteration() = 0;
  // Returns the [name, cd offset] of the current element. Also increments the
  // iterator to points to the next element. Returns an empty pair we have read
  // past boundary.
  virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0;
};

/**
 * More space efficient string representation of strings in an mmaped zipped
 * file than std::string_view. Using std::string_view as an entry in the
 * ZipArchive hash table wastes space. std::string_view stores a pointer to a
 * string (on 64 bit, 8 bytes) and the length to read from that pointer,
 * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting
 * 6 bytes.
 *
 * ZipStringOffset stores a 4 byte offset from a fixed location in the memory
 * mapped file instead of the entire address, consuming 8 bytes with alignment.
 */
struct ZipStringOffset {
  uint32_t name_offset;
  uint16_t name_length;

  const std::string_view ToStringView(const uint8_t* start) const {
    return std::string_view{reinterpret_cast<const char*>(start + name_offset), name_length};
  }
};

// This implementation of CdEntryMap uses an array hash table. It uses less
// memory than std::map; and it's used as the default implementation for zip
// archives without zip64 extension.
class CdEntryMapZip32 : public CdEntryMapInterface {
 public:
  static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);

  ZipError AddToMap(std::string_view name, const uint8_t* start) override;
  std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
                                                 const uint8_t* cd_start) const override;
  void ResetIteration() override;
  std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;

 private:
  explicit CdEntryMapZip32(uint16_t num_entries);

  // We know how many entries are in the Zip archive, so we can have a
  // fixed-size hash table. We define a load factor of 0.75 and over
  // allocate so the maximum number entries can never be higher than
  // ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
  uint32_t hash_table_size_{0};
  std::unique_ptr<ZipStringOffset[], decltype(&free)> hash_table_{nullptr, free};

  // The position of element for the current iteration.
  uint32_t current_position_{0};
};

// This implementation of CdEntryMap uses a std::map
class CdEntryMapZip64 : public CdEntryMapInterface {
 public:
  static std::unique_ptr<CdEntryMapInterface> Create();

  ZipError AddToMap(std::string_view name, const uint8_t* start) override;
  std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
                                                 const uint8_t* cd_start) const override;
  void ResetIteration() override;
  std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;

 private:
  CdEntryMapZip64() = default;

  std::map<std::string_view, uint64_t> entry_table_;

  std::map<std::string_view, uint64_t>::iterator iterator_;
};

struct ZipArchive {
  // open Zip archive
  mutable MappedZipFile mapped_zip;
+156 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2020 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "zip_cd_entry_map.h"

#include <android-base/logging.h>
#include <log/log.h>

/*
 * Round up to the next highest power of 2.
 *
 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
 */
static uint32_t RoundUpPower2(uint32_t val) {
  val--;
  val |= val >> 1;
  val |= val >> 2;
  val |= val >> 4;
  val |= val >> 8;
  val |= val >> 16;
  val++;

  return val;
}

static uint32_t ComputeHash(std::string_view name) {
  return static_cast<uint32_t>(std::hash<std::string_view>{}(name));
}

// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
std::pair<ZipError, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
                                                                const uint8_t* start) const {
  const uint32_t hash = ComputeHash(name);

  // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
  uint32_t ent = hash & (hash_table_size_ - 1);
  while (hash_table_[ent].name_offset != 0) {
    if (hash_table_[ent].ToStringView(start) == name) {
      return {kSuccess, hash_table_[ent].name_offset};
    }
    ent = (ent + 1) & (hash_table_size_ - 1);
  }

  ALOGV("Zip: Unable to find entry %.*s", static_cast<int>(name.size()), name.data());
  return {kEntryNotFound, 0};
}

ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
  const uint64_t hash = ComputeHash(name);
  uint32_t ent = hash & (hash_table_size_ - 1);

  /*
   * We over-allocated the table, so we're guaranteed to find an empty slot.
   * Further, we guarantee that the hashtable size is not 0.
   */
  while (hash_table_[ent].name_offset != 0) {
    if (hash_table_[ent].ToStringView(start) == name) {
      // We've found a duplicate entry. We don't accept duplicates.
      ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
      return kDuplicateEntry;
    }
    ent = (ent + 1) & (hash_table_size_ - 1);
  }

  // `name` has already been validated before entry.
  const char* start_char = reinterpret_cast<const char*>(start);
  hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
  hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
  return kSuccess;
}

void CdEntryMapZip32::ResetIteration() {
  current_position_ = 0;
}

std::pair<std::string_view, uint64_t> CdEntryMapZip32::Next(const uint8_t* cd_start) {
  while (current_position_ < hash_table_size_) {
    const auto& entry = hash_table_[current_position_];
    current_position_ += 1;

    if (entry.name_offset != 0) {
      return {entry.ToStringView(cd_start), entry.name_offset};
    }
  }
  // We have reached the end of the hash table.
  return {};
}

CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
  /*
   * Create hash table.  We have a minimum 75% load factor, possibly as
   * low as 50% after we round off to a power of 2.  There must be at
   * least one unused entry to avoid an infinite loop during creation.
   */
  hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
  hash_table_ = {
      reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
}

std::unique_ptr<CdEntryMapInterface> CdEntryMapZip32::Create(uint16_t num_entries) {
  auto entry_map = new CdEntryMapZip32(num_entries);
  CHECK(entry_map->hash_table_ != nullptr)
      << "Zip: unable to allocate the " << entry_map->hash_table_size_
      << " entry hash_table, entry size: " << sizeof(ZipStringOffset);
  return std::unique_ptr<CdEntryMapInterface>(entry_map);
}

std::unique_ptr<CdEntryMapInterface> CdEntryMapZip64::Create() {
  return std::unique_ptr<CdEntryMapInterface>(new CdEntryMapZip64());
}

ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) {
  const auto [it, added] =
      entry_table_.insert({name, name.data() - reinterpret_cast<const char*>(start)});
  if (!added) {
    ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
    return kDuplicateEntry;
  }
  return kSuccess;
}

std::pair<ZipError, uint64_t> CdEntryMapZip64::GetCdEntryOffset(std::string_view name,
                                                                const uint8_t* /*cd_start*/) const {
  const auto it = entry_table_.find(name);
  if (it == entry_table_.end()) {
    ALOGV("Zip: Could not find entry %.*s", static_cast<int>(name.size()), name.data());
    return {kEntryNotFound, 0};
  }

  return {kSuccess, it->second};
}

void CdEntryMapZip64::ResetIteration() {
  iterator_ = entry_table_.begin();
}

std::pair<std::string_view, uint64_t> CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) {
  if (iterator_ == entry_table_.end()) {
    return {};
  }

  return *iterator_++;
}
+114 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2020 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include <stdint.h>

#include <map>
#include <memory>
#include <string_view>
#include <utility>

#include "zip_error.h"

// This class is the interface of the central directory entries map. The map
// helps to locate a particular cd entry based on the filename.
class CdEntryMapInterface {
 public:
  virtual ~CdEntryMapInterface() = default;
  // Adds an entry to the map. The |name| should internally points to the
  // filename field of a cd entry. And |start| points to the beginning of the
  // central directory. Returns 0 on success.
  virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
  // For the zip entry |entryName|, finds the offset of its filename field in
  // the central directory. Returns a pair of [status, offset]. The value of
  // the status is 0 on success.
  virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
                                                         const uint8_t* cd_start) const = 0;
  // Resets the iterator to the beginning of the map.
  virtual void ResetIteration() = 0;
  // Returns the [name, cd offset] of the current element. Also increments the
  // iterator to points to the next element. Returns an empty pair we have read
  // past boundary.
  virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0;
};

/**
 * More space efficient string representation of strings in an mmaped zipped
 * file than std::string_view. Using std::string_view as an entry in the
 * ZipArchive hash table wastes space. std::string_view stores a pointer to a
 * string (on 64 bit, 8 bytes) and the length to read from that pointer,
 * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting
 * 6 bytes.
 *
 * ZipStringOffset stores a 4 byte offset from a fixed location in the memory
 * mapped file instead of the entire address, consuming 8 bytes with alignment.
 */
struct ZipStringOffset {
  uint32_t name_offset;
  uint16_t name_length;

  const std::string_view ToStringView(const uint8_t* start) const {
    return std::string_view{reinterpret_cast<const char*>(start + name_offset), name_length};
  }
};

// This implementation of CdEntryMap uses an array hash table. It uses less
// memory than std::map; and it's used as the default implementation for zip
// archives without zip64 extension.
class CdEntryMapZip32 : public CdEntryMapInterface {
 public:
  static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);

  ZipError AddToMap(std::string_view name, const uint8_t* start) override;
  std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
                                                 const uint8_t* cd_start) const override;
  void ResetIteration() override;
  std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;

 private:
  explicit CdEntryMapZip32(uint16_t num_entries);

  // We know how many entries are in the Zip archive, so we can have a
  // fixed-size hash table. We define a load factor of 0.75 and over
  // allocate so the maximum number entries can never be higher than
  // ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
  uint32_t hash_table_size_{0};
  std::unique_ptr<ZipStringOffset[], decltype(&free)> hash_table_{nullptr, free};

  // The position of element for the current iteration.
  uint32_t current_position_{0};
};

// This implementation of CdEntryMap uses a std::map
class CdEntryMapZip64 : public CdEntryMapInterface {
 public:
  static std::unique_ptr<CdEntryMapInterface> Create();

  ZipError AddToMap(std::string_view name, const uint8_t* start) override;
  std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
                                                 const uint8_t* cd_start) const override;
  void ResetIteration() override;
  std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;

 private:
  CdEntryMapZip64() = default;

  std::map<std::string_view, uint64_t> entry_table_;

  std::map<std::string_view, uint64_t>::iterator iterator_;
};
Loading