Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a490e2cf authored by Victor Chang's avatar Victor Chang
Browse files

Extract implementation of script and locale matching into LocaleDataLookup.h

It helps adding new unit tests, and fixing correctness and performance
bugs later.

Bug: 386340812
Test: atest libandroidfw_tests
Change-Id: I4d3ee1333637d2cd22d5fdfad730935951feeccb
parent 7b641717
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -80,6 +80,7 @@ cc_library {
        "LoadedArsc.cpp",
        "Locale.cpp",
        "LocaleData.cpp",
        "LocaleDataLookup.cpp",
        "misc.cpp",
        "NinePatch.cpp",
        "ObbFile.cpp",
@@ -224,6 +225,7 @@ cc_test {
        "tests/Idmap_test.cpp",
        "tests/LoadedArsc_test.cpp",
        "tests/Locale_test.cpp",
        "tests/LocaleDataLookup_test.cpp",
        "tests/NinePatch_test.cpp",
        "tests/ResourceTimer_test.cpp",
        "tests/ResourceUtils_test.cpp",
+13 −45
Original line number Diff line number Diff line
@@ -23,39 +23,18 @@
#include <unordered_set>

#include <androidfw/LocaleData.h>
#include <androidfw/LocaleDataLookup.h>

namespace android {

#include "LocaleDataTables.cpp"

inline uint32_t packLocale(const char* language, const char* region) {
    return (((uint8_t) language[0]) << 24u) | (((uint8_t) language[1]) << 16u) |
           (((uint8_t) region[0]) << 8u) | ((uint8_t) region[1]);
}

inline uint32_t dropRegion(uint32_t packed_locale) {
    return packed_locale & 0xFFFF0000LU;
}

inline bool hasRegion(uint32_t packed_locale) {
    return (packed_locale & 0x0000FFFFLU) != 0;
}

const size_t SCRIPT_LENGTH = 4;
const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]);
const uint32_t PACKED_ROOT = 0; // to represent the root locale
const uint32_t MAX_PARENT_DEPTH = getMaxAncestorTreeDepth();

uint32_t findParent(uint32_t packed_locale, const char* script) {
    if (hasRegion(packed_locale)) {
        for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) {
            if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) {
                auto map = SCRIPT_PARENTS[i].map;
                auto lookup_result = map->find(packed_locale);
                if (lookup_result != map->end()) {
                    return lookup_result->second;
                }
                break;
            }
        auto parent_key = findParentLocalePackedKey(script, packed_locale);
        if (parent_key != 0) {
            return parent_key;
        }
        return dropRegion(packed_locale);
    }
@@ -111,17 +90,6 @@ size_t findDistance(uint32_t supported,
    return supported_ancestor_count + request_ancestors_index - 1;
}

inline bool isRepresentative(uint32_t language_and_region, const char* script) {
    const uint64_t packed_locale = (
            (((uint64_t) language_and_region) << 32u) |
            (((uint64_t) script[0]) << 24u) |
            (((uint64_t) script[1]) << 16u) |
            (((uint64_t) script[2]) <<  8u) |
            ((uint64_t) script[3]));

    return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0);
}

const uint32_t US_SPANISH = 0x65735553LU; // es-US
const uint32_t MEXICAN_SPANISH = 0x65734D58LU; // es-MX
const uint32_t LATIN_AMERICAN_SPANISH = 0x6573A424LU; // es-419
@@ -185,8 +153,8 @@ int localeDataCompareRegions(

    // If we are here, left and right are equidistant from the request. We will
    // try and see if any of them is a representative locale.
    const bool left_is_representative = isRepresentative(left, requested_script);
    const bool right_is_representative = isRepresentative(right, requested_script);
    const bool left_is_representative = isLocaleRepresentative(left, requested_script);
    const bool right_is_representative = isLocaleRepresentative(right, requested_script);
    if (left_is_representative != right_is_representative) {
        return (int) left_is_representative - (int) right_is_representative;
    }
@@ -204,14 +172,14 @@ void localeDataComputeScript(char out[4], const char* language, const char* regi
        return;
    }
    uint32_t lookup_key = packLocale(language, region);
    auto lookup_result = LIKELY_SCRIPTS.find(lookup_key);
    if (lookup_result == LIKELY_SCRIPTS.end()) {
    auto lookup_result = lookupLikelyScript(lookup_key);
    if (lookup_result == nullptr) {
        // We couldn't find the locale. Let's try without the region
        if (region[0] != '\0') {
            lookup_key = dropRegion(lookup_key);
            lookup_result = LIKELY_SCRIPTS.find(lookup_key);
            if (lookup_result != LIKELY_SCRIPTS.end()) {
                memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
            lookup_result = lookupLikelyScript(lookup_key);
            if (lookup_result != nullptr) {
                memcpy(out, lookup_result, SCRIPT_LENGTH);
                return;
            }
        }
@@ -220,7 +188,7 @@ void localeDataComputeScript(char out[4], const char* language, const char* regi
        return;
    } else {
        // We found the locale.
        memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
        memcpy(out, lookup_result, SCRIPT_LENGTH);
    }
}

+64 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2024 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <unordered_map>
#include <unordered_set>

#include <androidfw/LocaleDataLookup.h>

namespace android {

#include "LocaleDataTables.cpp"

const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]);

const char* lookupLikelyScript(uint32_t packed_lang_region) {

    auto lookup_result = LIKELY_SCRIPTS.find(packed_lang_region);
    if (lookup_result == LIKELY_SCRIPTS.end()) {
        return nullptr;
    } else {
        return SCRIPT_CODES[lookup_result->second];
    }
}

uint32_t findParentLocalePackedKey(const char* script, uint32_t packed_lang_region) {
    for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) {
        if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) {
            auto map = SCRIPT_PARENTS[i].map;
            auto lookup_result = map->find(packed_lang_region);
            if (lookup_result != map->end()) {
                return lookup_result->second;
            }
            break;
        }
    }
    return 0;
}

uint32_t getMaxAncestorTreeDepth() {
    return MAX_PARENT_DEPTH;
}

namespace hidden {

bool isRepresentative(uint64_t packed_locale) {
    return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0);
}

} // namespace hidden

} // namespace android
+79 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2024 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include <stddef.h>
#include <stdint.h>


namespace android {

namespace hidden {
    bool isRepresentative(uint64_t packed_locale);
}

constexpr size_t SCRIPT_LENGTH = 4;

constexpr inline uint32_t packLocale(const char* language, const char* region) {
    const unsigned char* lang = reinterpret_cast<const unsigned char*>(language);
    const unsigned char* reg = reinterpret_cast<const unsigned char*>(region);
    return (static_cast<uint32_t>(lang[0]) << 24u) |
            (static_cast<uint32_t>(lang[1]) << 16u) |
            (static_cast<uint32_t>(reg[0]) << 8u) |
            static_cast<uint32_t>(reg[1]);
}

constexpr inline uint32_t dropRegion(uint32_t packed_locale) {
    return packed_locale & 0xFFFF0000LU;
}

constexpr inline bool hasRegion(uint32_t packed_locale) {
    return (packed_locale & 0x0000FFFFLU) != 0;
}

/**
 * Return nullptr if the key isn't found. The input packed_lang_region can be computed
 * by android::packLocale.
 * Note that the returned char* is either nullptr or 4-byte char seqeuence, but isn't
 * a null-terminated string.
 */
const char* lookupLikelyScript(uint32_t packed_lang_region);
/**
 * Return false if the key isn't representative. The input lookup key can be computed
 * by android::packLocale.
 */
bool inline isLocaleRepresentative(uint32_t language_and_region, const char* script) {
    const unsigned char* s = reinterpret_cast<const unsigned char*>(script);
    const uint64_t packed_locale = (
            ((static_cast<uint64_t>(language_and_region)) << 32u) |
            (static_cast<uint64_t>(s[0]) << 24u) |
            (static_cast<uint64_t>(s[1]) << 16u) |
            (static_cast<uint64_t>(s[2]) <<  8u) |
            static_cast<uint64_t>(s[3]));

    return hidden::isRepresentative(packed_locale);
}

/**
 * Return a parent packed key for a given script and child packed key. Return 0 if
 * no parent is found.
 */
uint32_t findParentLocalePackedKey(const char* script, uint32_t packed_lang_region);

uint32_t getMaxAncestorTreeDepth();

} // namespace android
+108 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2024 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "androidfw/LocaleDataLookup.h"

#include <cstddef>
#include <string>

#include "gtest/gtest.h"
#include "gmock/gmock.h"


namespace android {

constexpr const char NULL_SCRIPT[4] = {'\0', '\0', '\0','\0' };

#define EXPECT_SCEIPT_EQ(ex, s) EXPECT_EQ(0, s == nullptr ? -1 : memcmp(ex, s, 4))

// Similar to packLanguageOrRegion() in ResourceTypes.cpp
static uint32_t encodeLanguageOrRegionLiteral(const char* in, const char base) {
  size_t len = strlen(in);
  if (len <= 1) {
    return 0;
  }

  if (len == 2) {
      return (((uint8_t) in[0]) << 8) | ((uint8_t) in[1]);
  }
  uint8_t first = (in[0] - base) & 0x007f;
  uint8_t second = (in[1] - base) & 0x007f;
  uint8_t third = (in[2] - base) & 0x007f;

  return ((uint8_t) (0x80 | (third << 2) | (second >> 3)) << 8) | ((second << 5) | first);
}

static uint32_t encodeLocale(const char* language, const char* region) {
    return (encodeLanguageOrRegionLiteral(language, 'a') << 16) |
            encodeLanguageOrRegionLiteral(region, '0');
}

TEST(LocaleDataLookupTest, lookupLikelyScript) {
  EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("", "")));
  EXPECT_SCEIPT_EQ("Latn", lookupLikelyScript(encodeLocale("en", "")));
  EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("en", "US")));
  EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("en", "GB")));
  EXPECT_SCEIPT_EQ("Latn", lookupLikelyScript(encodeLocale("fr", "")));
  EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("fr", "FR")));


  EXPECT_SCEIPT_EQ("~~~A", lookupLikelyScript(encodeLocale("en", "XA")));
  EXPECT_SCEIPT_EQ("Latn", lookupLikelyScript(encodeLocale("ha", "")));
  EXPECT_SCEIPT_EQ("Arab", lookupLikelyScript(encodeLocale("ha", "SD")));
  EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("ha", "Sd"))); // case sensitive
  EXPECT_SCEIPT_EQ("Hans", lookupLikelyScript(encodeLocale("zh", "")));
  EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("zh", "CN")));
  EXPECT_SCEIPT_EQ("Hant", lookupLikelyScript(encodeLocale("zh", "HK")));

  EXPECT_SCEIPT_EQ("Nshu", lookupLikelyScript(encodeLocale("zhx", "")));
  EXPECT_SCEIPT_EQ("Nshu", lookupLikelyScript(0xDCF90000u)); // encoded "zhx"
}

TEST(LocaleDataLookupTest, isLocaleRepresentative) {
  EXPECT_TRUE(isLocaleRepresentative(encodeLocale("en", "US"), "Latn"));
  EXPECT_TRUE(isLocaleRepresentative(encodeLocale("en", "GB"), "Latn"));
  EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", "US"), NULL_SCRIPT));
  EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", ""), "Latn"));
  EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", ""), NULL_SCRIPT));
  EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", "US"), "Arab"));

  EXPECT_TRUE(isLocaleRepresentative(encodeLocale("fr", "FR"), "Latn"));

  EXPECT_TRUE(isLocaleRepresentative(encodeLocale("zh", "CN"), "Hans"));
  EXPECT_FALSE(isLocaleRepresentative(encodeLocale("zh", "TW"), "Hans"));
  EXPECT_FALSE(isLocaleRepresentative(encodeLocale("zhx", "CN"), "Hans"));
  EXPECT_FALSE(isLocaleRepresentative(0xDCF9434E, "Hans"));
  EXPECT_TRUE(isLocaleRepresentative(encodeLocale("zhx", "CN"), "Nshu"));
  EXPECT_TRUE(isLocaleRepresentative(0xDCF9434E, "Nshu"));
}

TEST(LocaleDataLookupTest, findParentLocalePackedKey) {
  EXPECT_EQ(encodeLocale("en", "001"), findParentLocalePackedKey("Latn", encodeLocale("en", "GB")));
  EXPECT_EQ(0x656E8400u, findParentLocalePackedKey("Latn", encodeLocale("en", "GB")));

  EXPECT_EQ(encodeLocale("en", "IN"), findParentLocalePackedKey("Deva", encodeLocale("hi", "")));

  EXPECT_EQ(encodeLocale("ar", "015"), findParentLocalePackedKey("Arab", encodeLocale("ar", "AE")));
  EXPECT_EQ(0x61729420u, findParentLocalePackedKey("Arab", encodeLocale("ar", "AE")));

  EXPECT_EQ(encodeLocale("ar", "015"), findParentLocalePackedKey("~~~B", encodeLocale("ar", "XB")));
  EXPECT_EQ(0x61729420u, findParentLocalePackedKey("Arab", encodeLocale("ar", "AE")));

  EXPECT_EQ(encodeLocale("zh", "HK"), findParentLocalePackedKey("Hant", encodeLocale("zh", "MO")));
}

}  // namespace android