Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 56f36e8f authored by Jeremy Meyer's avatar Jeremy Meyer
Browse files

Move StringPool to libandroidfw

Test: verified affected tests pass
Bug: 232940948
Change-Id: I22089893d7e5013f759c39ce190bec07fa6435db
parent 14be35a5
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -61,6 +61,7 @@ cc_library {
        "AssetManager2.cpp",
        "AssetsProvider.cpp",
        "AttributeResolution.cpp",
        "BigBuffer.cpp",
        "ChunkIterator.cpp",
        "ConfigDescription.cpp",
        "Idmap.cpp",
@@ -73,6 +74,7 @@ cc_library {
        "ResourceTypes.cpp",
        "ResourceUtils.cpp",
        "StreamingZipInflater.cpp",
        "StringPool.cpp",
        "TypeWrappers.cpp",
        "Util.cpp",
        "ZipFileRO.cpp",
@@ -162,6 +164,7 @@ cc_test {
        "tests/AssetManager2_test.cpp",
        "tests/AttributeFinder_test.cpp",
        "tests/AttributeResolution_test.cpp",
        "tests/BigBuffer_test.cpp",
        "tests/ByteBucketArray_test.cpp",
        "tests/Config_test.cpp",
        "tests/ConfigDescription_test.cpp",
@@ -174,6 +177,7 @@ cc_test {
        "tests/ResTable_test.cpp",
        "tests/Split_test.cpp",
        "tests/StringPiece_test.cpp",
        "tests/StringPool_test.cpp",
        "tests/Theme_test.cpp",
        "tests/TypeWrappers_test.cpp",
        "tests/ZipUtils_test.cpp",
+4 −0
Original line number Diff line number Diff line
@@ -601,6 +601,10 @@ base::expected<FindEntryResult, NullOrIOError> AssetManager2::FindEntry(
    return base::unexpected(result.error());
  }

  if (type_idx == 0x1c) {
    LOG(ERROR) << base::StringPrintf("foobar first result %s", result->package_name->c_str());
  }

  bool overlaid = false;
  if (!stop_at_first_match && !ignore_configuration && !apk_assets_[result->cookie]->IsLoader()) {
    for (const auto& id_map : package_group.overlays_) {
+3 −3
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

#include "util/BigBuffer.h"
#include <androidfw/BigBuffer.h>

#include <algorithm>
#include <memory>
@@ -22,7 +22,7 @@

#include "android-base/logging.h"

namespace aapt {
namespace android {

void* BigBuffer::NextBlockImpl(size_t size) {
  if (!blocks_.empty()) {
@@ -84,4 +84,4 @@ std::string BigBuffer::to_string() const {
  return result;
}

}  // namespace aapt
}  // namespace android
+23 −26
Original line number Diff line number Diff line
@@ -14,7 +14,8 @@
 * limitations under the License.
 */

#include "StringPool.h"
#include <androidfw/BigBuffer.h>
#include <androidfw/StringPool.h>

#include <algorithm>
#include <memory>
@@ -23,15 +24,14 @@
#include "android-base/logging.h"
#include "androidfw/ResourceTypes.h"
#include "androidfw/StringPiece.h"

#include "util/BigBuffer.h"
#include "util/Util.h"
#include "androidfw/Util.h"

using ::android::StringPiece;

namespace aapt {
namespace android {

StringPool::Ref::Ref() : entry_(nullptr) {}
StringPool::Ref::Ref() : entry_(nullptr) {
}

StringPool::Ref::Ref(const StringPool::Ref& rhs) : entry_(rhs.entry_) {
  if (entry_ != nullptr) {
@@ -88,10 +88,10 @@ const StringPool::Context& StringPool::Ref::GetContext() const {
  return entry_->context;
}

StringPool::StyleRef::StyleRef() : entry_(nullptr) {}
StringPool::StyleRef::StyleRef() : entry_(nullptr) {
}

StringPool::StyleRef::StyleRef(const StringPool::StyleRef& rhs)
    : entry_(rhs.entry_) {
StringPool::StyleRef::StyleRef(const StringPool::StyleRef& rhs) : entry_(rhs.entry_) {
  if (entry_ != nullptr) {
    entry_->ref_++;
  }
@@ -210,7 +210,7 @@ StringPool::StyleRef StringPool::MakeRef(const StyleString& str, const Context&
  entry->context = context;
  entry->index_ = styles_.size();
  entry->ref_ = 0;
  for (const aapt::Span& span : str.spans) {
  for (const android::Span& span : str.spans) {
    entry->spans.emplace_back(Span{MakeRef(span.name), span.first_char, span.last_char});
  }

@@ -368,15 +368,14 @@ static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out
                         IDiagnostics* diag) {
  if (utf8) {
    const std::string& encoded = util::Utf8ToModifiedUtf8(str);
    const ssize_t utf16_length = utf8_to_utf16_length(
        reinterpret_cast<const uint8_t*>(encoded.data()), encoded.size());
    const ssize_t utf16_length =
        utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(encoded.data()), encoded.size());
    CHECK(utf16_length >= 0);

    // Make sure the lengths to be encoded do not exceed the maximum length that
    // can be encoded using chars
    if ((((size_t)encoded.size()) > EncodeLengthMax<char>())
        || (((size_t)utf16_length) > EncodeLengthMax<char>())) {

    if ((((size_t)encoded.size()) > EncodeLengthMax<char>()) ||
        (((size_t)utf16_length) > EncodeLengthMax<char>())) {
      diag->Error(DiagMessage() << "string too large to encode using UTF-8 "
                                << "written instead as '" << kStringTooLarge << "'");

@@ -384,8 +383,8 @@ static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out
      return false;
    }

    const size_t total_size = EncodedLengthUnits<char>(utf16_length)
        + EncodedLengthUnits<char>(encoded.size()) + encoded.size() + 1;
    const size_t total_size = EncodedLengthUnits<char>(utf16_length) +
                              EncodedLengthUnits<char>(encoded.size()) + encoded.size() + 1;

    char* data = out->NextBlock<char>(total_size);

@@ -411,8 +410,7 @@ static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out
    }

    // Total number of 16-bit words to write.
    const size_t total_size = EncodedLengthUnits<char16_t>(utf16_length)
        + encoded.size() + 1;
    const size_t total_size = EncodedLengthUnits<char16_t>(utf16_length) + encoded.size() + 1;

    char16_t* data = out->NextBlock<char16_t>(total_size);

@@ -431,8 +429,7 @@ static bool EncodeString(const std::string& str, const bool utf8, BigBuffer* out
  return true;
}

bool StringPool::Flatten(BigBuffer* out, const StringPool& pool, bool utf8,
                         IDiagnostics* diag) {
bool StringPool::Flatten(BigBuffer* out, const StringPool& pool, bool utf8, IDiagnostics* diag) {
  bool no_error = true;
  const size_t start_index = out->size();
  android::ResStringPool_header* header = out->NextBlock<android::ResStringPool_header>();
@@ -490,8 +487,8 @@ bool StringPool::Flatten(BigBuffer* out, const StringPool& pool, bool utf8,
    // ResStringPool_span structure worth of 0xFFFFFFFF at the end
    // of the style block, so fill in the remaining 2 32bit words
    // with 0xFFFFFFFF.
    const size_t padding_length = sizeof(android::ResStringPool_span) -
                                  sizeof(android::ResStringPool_span::name);
    const size_t padding_length =
        sizeof(android::ResStringPool_span) - sizeof(android::ResStringPool_span::name);
    uint8_t* padding = out->NextBlock<uint8_t>(padding_length);
    memset(padding, 0xff, padding_length);
    out->Align4();
@@ -508,4 +505,4 @@ bool StringPool::FlattenUtf16(BigBuffer* out, const StringPool& pool, IDiagnosti
  return Flatten(out, pool, false, diag);
}

}  // namespace aapt
}  // namespace android
+124 −0
Original line number Diff line number Diff line
@@ -68,6 +68,107 @@ std::string Utf16ToUtf8(const StringPiece16& utf16) {
  return utf8;
}

std::string Utf8ToModifiedUtf8(const std::string& utf8) {
  // Java uses Modified UTF-8 which only supports the 1, 2, and 3 byte formats of UTF-8. To encode
  // 4 byte UTF-8 codepoints, Modified UTF-8 allows the use of surrogate pairs in the same format
  // of CESU-8 surrogate pairs. Calculate the size of the utf8 string with all 4 byte UTF-8
  // codepoints replaced with 2 3 byte surrogate pairs
  size_t modified_size = 0;
  const size_t size = utf8.size();
  for (size_t i = 0; i < size; i++) {
    if (((uint8_t)utf8[i] >> 4) == 0xF) {
      modified_size += 6;
      i += 3;
    } else {
      modified_size++;
    }
  }

  // Early out if no 4 byte codepoints are found
  if (size == modified_size) {
    return utf8;
  }

  std::string output;
  output.reserve(modified_size);
  for (size_t i = 0; i < size; i++) {
    if (((uint8_t)utf8[i] >> 4) == 0xF) {
      int32_t codepoint = utf32_from_utf8_at(utf8.data(), size, i, nullptr);

      // Calculate the high and low surrogates as UTF-16 would
      int32_t high = ((codepoint - 0x10000) / 0x400) + 0xD800;
      int32_t low = ((codepoint - 0x10000) % 0x400) + 0xDC00;

      // Encode each surrogate in UTF-8
      output.push_back((char)(0xE4 | ((high >> 12) & 0xF)));
      output.push_back((char)(0x80 | ((high >> 6) & 0x3F)));
      output.push_back((char)(0x80 | (high & 0x3F)));
      output.push_back((char)(0xE4 | ((low >> 12) & 0xF)));
      output.push_back((char)(0x80 | ((low >> 6) & 0x3F)));
      output.push_back((char)(0x80 | (low & 0x3F)));
      i += 3;
    } else {
      output.push_back(utf8[i]);
    }
  }

  return output;
}

std::string ModifiedUtf8ToUtf8(const std::string& modified_utf8) {
  // The UTF-8 representation will have a byte length less than or equal to the Modified UTF-8
  // representation.
  std::string output;
  output.reserve(modified_utf8.size());

  size_t index = 0;
  const size_t modified_size = modified_utf8.size();
  while (index < modified_size) {
    size_t next_index;
    int32_t high_surrogate =
        utf32_from_utf8_at(modified_utf8.data(), modified_size, index, &next_index);
    if (high_surrogate < 0) {
      return {};
    }

    // Check that the first codepoint is within the high surrogate range
    if (high_surrogate >= 0xD800 && high_surrogate <= 0xDB7F) {
      int32_t low_surrogate =
          utf32_from_utf8_at(modified_utf8.data(), modified_size, next_index, &next_index);
      if (low_surrogate < 0) {
        return {};
      }

      // Check that the second codepoint is within the low surrogate range
      if (low_surrogate >= 0xDC00 && low_surrogate <= 0xDFFF) {
        const char32_t codepoint =
            (char32_t)(((high_surrogate - 0xD800) * 0x400) + (low_surrogate - 0xDC00) + 0x10000);

        // The decoded codepoint should represent a 4 byte, UTF-8 character
        const size_t utf8_length = (size_t)utf32_to_utf8_length(&codepoint, 1);
        if (utf8_length != 4) {
          return {};
        }

        // Encode the UTF-8 representation of the codepoint into the string
        char* start = &output[output.size()];
        output.resize(output.size() + utf8_length);
        utf32_to_utf8((char32_t*)&codepoint, 1, start, utf8_length + 1);

        index = next_index;
        continue;
      }
    }

    // Append non-surrogate pairs to the output string
    for (size_t i = index; i < next_index; i++) {
      output.push_back(modified_utf8[i]);
    }
    index = next_index;
  }
  return output;
}

static std::vector<std::string> SplitAndTransform(
    const StringPiece& str, char sep, const std::function<char(char)>& f) {
  std::vector<std::string> parts;
@@ -90,6 +191,29 @@ std::vector<std::string> SplitAndLowercase(const StringPiece& str, char sep) {
  return SplitAndTransform(str, sep, ::tolower);
}

std::unique_ptr<uint8_t[]> Copy(const BigBuffer& buffer) {
  std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
  uint8_t* p = data.get();
  for (const auto& block : buffer) {
    memcpy(p, block.buffer.get(), block.size);
    p += block.size;
  }
  return data;
}

StringPiece16 GetString16(const android::ResStringPool& pool, size_t idx) {
  if (auto str = pool.stringAt(idx); str.ok()) {
    return *str;
  }
  return StringPiece16();
}

std::string GetString(const android::ResStringPool& pool, size_t idx) {
  if (auto str = pool.string8At(idx); str.ok()) {
    return ModifiedUtf8ToUtf8(str->to_string());
  }
  return Utf16ToUtf8(GetString16(pool, idx));
}

} // namespace util
} // namespace android
Loading