Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 03f61135 authored by Jeremy Meyer's avatar Jeremy Meyer Committed by Android (Google) Code Review
Browse files

Merge "Move StringPool to libandroidfw"

parents 91190155 56f36e8f
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -61,6 +61,7 @@ cc_library {
        "AssetManager2.cpp",
        "AssetsProvider.cpp",
        "AttributeResolution.cpp",
        "BigBuffer.cpp",
        "ChunkIterator.cpp",
        "ConfigDescription.cpp",
        "Idmap.cpp",
@@ -73,6 +74,7 @@ cc_library {
        "ResourceTypes.cpp",
        "ResourceUtils.cpp",
        "StreamingZipInflater.cpp",
        "StringPool.cpp",
        "TypeWrappers.cpp",
        "Util.cpp",
        "ZipFileRO.cpp",
@@ -162,6 +164,7 @@ cc_test {
        "tests/AssetManager2_test.cpp",
        "tests/AttributeFinder_test.cpp",
        "tests/AttributeResolution_test.cpp",
        "tests/BigBuffer_test.cpp",
        "tests/ByteBucketArray_test.cpp",
        "tests/Config_test.cpp",
        "tests/ConfigDescription_test.cpp",
@@ -174,6 +177,7 @@ cc_test {
        "tests/ResTable_test.cpp",
        "tests/Split_test.cpp",
        "tests/StringPiece_test.cpp",
        "tests/StringPool_test.cpp",
        "tests/Theme_test.cpp",
        "tests/TypeWrappers_test.cpp",
        "tests/ZipUtils_test.cpp",
+4 −0
Original line number Diff line number Diff line
@@ -601,6 +601,10 @@ base::expected<FindEntryResult, NullOrIOError> AssetManager2::FindEntry(
    return base::unexpected(result.error());
  }

  if (type_idx == 0x1c) {
    LOG(ERROR) << base::StringPrintf("foobar first result %s", result->package_name->c_str());
  }

  bool overlaid = false;
  if (!stop_at_first_match && !ignore_configuration && !apk_assets_[result->cookie]->IsLoader()) {
    for (const auto& id_map : package_group.overlays_) {
+3 −3
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

#include "util/BigBuffer.h"
#include <androidfw/BigBuffer.h>

#include <algorithm>
#include <memory>
@@ -22,7 +22,7 @@

#include "android-base/logging.h"

namespace aapt {
namespace android {

void* BigBuffer::NextBlockImpl(size_t size) {
  if (!blocks_.empty()) {
@@ -84,4 +84,4 @@ std::string BigBuffer::to_string() const {
  return result;
}

}  // namespace aapt
}  // namespace android
+23 −26
Original line number Diff line number Diff line
@@ -14,7 +14,8 @@
 * limitations under the License.
 */

#include "StringPool.h"
#include <androidfw/BigBuffer.h>
#include <androidfw/StringPool.h>

#include <algorithm>
#include <memory>
@@ -23,15 +24,14 @@
#include "android-base/logging.h"
#include "androidfw/ResourceTypes.h"
#include "androidfw/StringPiece.h"

#include "util/BigBuffer.h"
#include "util/Util.h"
#include "androidfw/Util.h"

using ::android::StringPiece;

namespace aapt {
namespace android {

StringPool::Ref::Ref() : entry_(nullptr) {}
StringPool::Ref::Ref() : entry_(nullptr) {
}

StringPool::Ref::Ref(const StringPool::Ref& rhs) : entry_(rhs.entry_) {
  if (entry_ != nullptr) {
@@ -88,10 +88,10 @@ const StringPool::Context& StringPool::Ref::GetContext() const {
  return entry_->context;
}

StringPool::StyleRef::StyleRef() : entry_(nullptr) {}
StringPool::StyleRef::StyleRef() : entry_(nullptr) {
}

StringPool::StyleRef::StyleRef(const StringPool::StyleRef& rhs)
    : entry_(rhs.entry_) {
StringPool::StyleRef::StyleRef(const StringPool::StyleRef& rhs) : entry_(rhs.entry_) {
  if (entry_ != nullptr) {
    entry_->ref_++;
  }
@@ -210,7 +210,7 @@ StringPool::StyleRef StringPool::MakeRef(const StyleString& str, const Context&
  entry->context = context;
  entry->index_ = styles_.size();
  entry->ref_ = 0;
  for (const aapt::Span& span : str.spans) {
  for (const android::Span& span : str.spans) {
    entry->spans.emplace_back(Span{MakeRef(span.name), span.first_char, span.last_char});
  }

@@ -368,15 +368,14 @@ static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out
                         IDiagnostics* diag) {
  if (utf8) {
    const std::string& encoded = util::Utf8ToModifiedUtf8(str);
    const ssize_t utf16_length = utf8_to_utf16_length(
        reinterpret_cast<const uint8_t*>(encoded.data()), encoded.size());
    const ssize_t utf16_length =
        utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(encoded.data()), encoded.size());
    CHECK(utf16_length >= 0);

    // Make sure the lengths to be encoded do not exceed the maximum length that
    // can be encoded using chars
    if ((((size_t)encoded.size()) > EncodeLengthMax<char>())
        || (((size_t)utf16_length) > EncodeLengthMax<char>())) {

    if ((((size_t)encoded.size()) > EncodeLengthMax<char>()) ||
        (((size_t)utf16_length) > EncodeLengthMax<char>())) {
      diag->Error(DiagMessage() << "string too large to encode using UTF-8 "
                                << "written instead as '" << kStringTooLarge << "'");

@@ -384,8 +383,8 @@ static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out
      return false;
    }

    const size_t total_size = EncodedLengthUnits<char>(utf16_length)
        + EncodedLengthUnits<char>(encoded.size()) + encoded.size() + 1;
    const size_t total_size = EncodedLengthUnits<char>(utf16_length) +
                              EncodedLengthUnits<char>(encoded.size()) + encoded.size() + 1;

    char* data = out->NextBlock<char>(total_size);

@@ -411,8 +410,7 @@ static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out
    }

    // Total number of 16-bit words to write.
    const size_t total_size = EncodedLengthUnits<char16_t>(utf16_length)
        + encoded.size() + 1;
    const size_t total_size = EncodedLengthUnits<char16_t>(utf16_length) + encoded.size() + 1;

    char16_t* data = out->NextBlock<char16_t>(total_size);

@@ -431,8 +429,7 @@ static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out
  return true;
}

bool StringPool::Flatten(BigBuffer* out, const StringPool& pool, bool utf8,
                         IDiagnostics* diag) {
bool StringPool::Flatten(BigBuffer* out, const StringPool& pool, bool utf8, IDiagnostics* diag) {
  bool no_error = true;
  const size_t start_index = out->size();
  android::ResStringPool_header* header = out->NextBlock<android::ResStringPool_header>();
@@ -490,8 +487,8 @@ bool StringPool::Flatten(BigBuffer* out, const StringPool& pool, bool utf8,
    // ResStringPool_span structure worth of 0xFFFFFFFF at the end
    // of the style block, so fill in the remaining 2 32bit words
    // with 0xFFFFFFFF.
    const size_t padding_length = sizeof(android::ResStringPool_span) -
                                  sizeof(android::ResStringPool_span::name);
    const size_t padding_length =
        sizeof(android::ResStringPool_span) - sizeof(android::ResStringPool_span::name);
    uint8_t* padding = out->NextBlock<uint8_t>(padding_length);
    memset(padding, 0xff, padding_length);
    out->Align4();
@@ -508,4 +505,4 @@ bool StringPool::FlattenUtf16(BigBuffer* out, const StringPool& pool, IDiagnosti
  return Flatten(out, pool, false, diag);
}

}  // namespace aapt
}  // namespace android
+124 −0
Original line number Diff line number Diff line
@@ -68,6 +68,107 @@ std::string Utf16ToUtf8(const StringPiece16& utf16) {
  return utf8;
}

std::string Utf8ToModifiedUtf8(const std::string& utf8) {
  // Java uses Modified UTF-8 which only supports the 1, 2, and 3 byte formats of UTF-8. To encode
  // 4 byte UTF-8 codepoints, Modified UTF-8 allows the use of surrogate pairs in the same format
  // of CESU-8 surrogate pairs. Calculate the size of the utf8 string with all 4 byte UTF-8
  // codepoints replaced with 2 3 byte surrogate pairs
  size_t modified_size = 0;
  const size_t size = utf8.size();
  for (size_t i = 0; i < size; i++) {
    if (((uint8_t)utf8[i] >> 4) == 0xF) {
      modified_size += 6;
      i += 3;
    } else {
      modified_size++;
    }
  }

  // Early out if no 4 byte codepoints are found
  if (size == modified_size) {
    return utf8;
  }

  std::string output;
  output.reserve(modified_size);
  for (size_t i = 0; i < size; i++) {
    if (((uint8_t)utf8[i] >> 4) == 0xF) {
      int32_t codepoint = utf32_from_utf8_at(utf8.data(), size, i, nullptr);

      // Calculate the high and low surrogates as UTF-16 would
      int32_t high = ((codepoint - 0x10000) / 0x400) + 0xD800;
      int32_t low = ((codepoint - 0x10000) % 0x400) + 0xDC00;

      // Encode each surrogate in UTF-8
      output.push_back((char)(0xE4 | ((high >> 12) & 0xF)));
      output.push_back((char)(0x80 | ((high >> 6) & 0x3F)));
      output.push_back((char)(0x80 | (high & 0x3F)));
      output.push_back((char)(0xE4 | ((low >> 12) & 0xF)));
      output.push_back((char)(0x80 | ((low >> 6) & 0x3F)));
      output.push_back((char)(0x80 | (low & 0x3F)));
      i += 3;
    } else {
      output.push_back(utf8[i]);
    }
  }

  return output;
}

std::string ModifiedUtf8ToUtf8(const std::string& modified_utf8) {
  // The UTF-8 representation will have a byte length less than or equal to the Modified UTF-8
  // representation.
  std::string output;
  output.reserve(modified_utf8.size());

  size_t index = 0;
  const size_t modified_size = modified_utf8.size();
  while (index < modified_size) {
    size_t next_index;
    int32_t high_surrogate =
        utf32_from_utf8_at(modified_utf8.data(), modified_size, index, &next_index);
    if (high_surrogate < 0) {
      return {};
    }

    // Check that the first codepoint is within the high surrogate range
    if (high_surrogate >= 0xD800 && high_surrogate <= 0xDB7F) {
      int32_t low_surrogate =
          utf32_from_utf8_at(modified_utf8.data(), modified_size, next_index, &next_index);
      if (low_surrogate < 0) {
        return {};
      }

      // Check that the second codepoint is within the low surrogate range
      if (low_surrogate >= 0xDC00 && low_surrogate <= 0xDFFF) {
        const char32_t codepoint =
            (char32_t)(((high_surrogate - 0xD800) * 0x400) + (low_surrogate - 0xDC00) + 0x10000);

        // The decoded codepoint should represent a 4 byte, UTF-8 character
        const size_t utf8_length = (size_t)utf32_to_utf8_length(&codepoint, 1);
        if (utf8_length != 4) {
          return {};
        }

        // Encode the UTF-8 representation of the codepoint into the string
        char* start = &output[output.size()];
        output.resize(output.size() + utf8_length);
        utf32_to_utf8((char32_t*)&codepoint, 1, start, utf8_length + 1);

        index = next_index;
        continue;
      }
    }

    // Append non-surrogate pairs to the output string
    for (size_t i = index; i < next_index; i++) {
      output.push_back(modified_utf8[i]);
    }
    index = next_index;
  }
  return output;
}

static std::vector<std::string> SplitAndTransform(
    const StringPiece& str, char sep, const std::function<char(char)>& f) {
  std::vector<std::string> parts;
@@ -90,6 +191,29 @@ std::vector<std::string> SplitAndLowercase(const StringPiece& str, char sep) {
  return SplitAndTransform(str, sep, ::tolower);
}

std::unique_ptr<uint8_t[]> Copy(const BigBuffer& buffer) {
  std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
  uint8_t* p = data.get();
  for (const auto& block : buffer) {
    memcpy(p, block.buffer.get(), block.size);
    p += block.size;
  }
  return data;
}

StringPiece16 GetString16(const android::ResStringPool& pool, size_t idx) {
  if (auto str = pool.stringAt(idx); str.ok()) {
    return *str;
  }
  return StringPiece16();
}

std::string GetString(const android::ResStringPool& pool, size_t idx) {
  if (auto str = pool.string8At(idx); str.ok()) {
    return ModifiedUtf8ToUtf8(str->to_string());
  }
  return Utf16ToUtf8(GetString16(pool, idx));
}

} // namespace util
} // namespace android
Loading