Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 549e437e authored by Adam Lesinski's avatar Adam Lesinski
Browse files

AAPT2: Iterate over UTF-8 string by codepoints

Iterating over a UTF-8 string by codepoints ensures that
unicode characters do not get sliced. Otherwise the resulting
string could contain malformed characters.

Bug: 62839202
Test: make aapt2_tests
Change-Id: Ia0c44fbceb7dcfa11e77a1a77011da0f5466e342
parent 2fa4a346
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -115,6 +115,7 @@ cc_library_host_static {
        "unflatten/ResChunkPullParser.cpp",
        "util/BigBuffer.cpp",
        "util/Files.cpp",
        "util/Utf8Iterator.cpp",
        "util/Util.cpp",
        "ConfigDescription.cpp",
        "Debug.cpp",
+5 −0
Original line number Diff line number Diff line
@@ -95,6 +95,11 @@ TEST_F(ResourceParserTest, ParseEscapedString) {
  ASSERT_THAT(str, NotNull());
  EXPECT_THAT(*str, StrValueEq("?123"));
  EXPECT_THAT(str->untranslatable_sections, IsEmpty());

  ASSERT_TRUE(TestParse(R"(<string name="bar">This isn\’t a bad string</string>)"));
  str = test::GetValue<String>(&table_, "string/bar");
  ASSERT_THAT(str, NotNull());
  EXPECT_THAT(*str, StrValueEq("This isn’t a bad string"));
}

TEST_F(ResourceParserTest, ParseFormattedString) {
+1 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
- Fixed issue where Java classes referenced from fragments and menus were not added to
  the set of Proguard keep rules. (bug 62216174)
- Automatically version XML `<adaptive-icon>` resources to v26. (bug 62316340)
- Fixed issue where escaped unicode characters would generate malformed UTF-8. (bug 62839202)

## Version 2.17
### `aapt2 ...`
+60 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "util/Utf8Iterator.h"

#include "android-base/logging.h"
#include "utils/Unicode.h"

using ::android::StringPiece;

namespace aapt {

Utf8Iterator::Utf8Iterator(const StringPiece& str)
    : str_(str), next_pos_(0), current_codepoint_(0) {
  DoNext();
}

void Utf8Iterator::DoNext() {
  size_t next_pos = 0u;
  int32_t result = utf32_from_utf8_at(str_.data(), str_.size(), next_pos_, &next_pos);
  if (result == -1) {
    current_codepoint_ = 0u;
  } else {
    current_codepoint_ = static_cast<char32_t>(result);
    next_pos_ = next_pos;
  }
}

bool Utf8Iterator::HasNext() const {
  return current_codepoint_ != 0;
}

void Utf8Iterator::Skip(int amount) {
  while (amount > 0 && HasNext()) {
    Next();
    --amount;
  }
}

char32_t Utf8Iterator::Next() {
  CHECK(HasNext()) << "Next() called after iterator exhausted";
  char32_t result = current_codepoint_;
  DoNext();
  return result;
}

}  // namespace aapt
+47 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef AAPT_UTIL_UTF8ITERATOR_H
#define AAPT_UTIL_UTF8ITERATOR_H

#include "android-base/macros.h"
#include "androidfw/StringPiece.h"

namespace aapt {

class Utf8Iterator {
 public:
  explicit Utf8Iterator(const android::StringPiece& str);

  bool HasNext() const;

  void Skip(int amount);

  char32_t Next();

 private:
  DISALLOW_COPY_AND_ASSIGN(Utf8Iterator);

  void DoNext();

  android::StringPiece str_;
  size_t next_pos_;
  char32_t current_codepoint_;
};

}  // namespace aapt

#endif  // AAPT_UTIL_UTF8ITERATOR_H
Loading