Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b927c559 authored by Roozbeh Pournader's avatar Roozbeh Pournader
Browse files

Implement smarter locale resource selection

* Add support for determining script from language and region.
* Add support for determining special parents of locales.
* Add support for smart comparison of locales with only a difference
  in region, using the locale parentage tree.
* Fix LocaleData.matchScore() to not fallback to old locale matching
  behavior if we can't determine a script.
* Allow four-character variant codes. (Previously, only five- to
  eight-character variant codes were allowed.)

Bug: 7296673
Bug: 26589793
Change-Id: Ibde0a48c0564ff383b41068095a5cbacfe7b94bc
parent ac3e5990
Loading
Loading
Loading
Loading
+11 −2
Original line number Diff line number Diff line
@@ -301,10 +301,19 @@ public final class LocaleList implements Parcelable {
            // is a pseudo-locale. So this is not a match.
            return 0;
        }
        // There is no match if the two locales use different scripts. This will most imporantly
        // take care of traditional vs simplified Chinese.
        final String supportedScr = getLikelyScript(supported);
        if (supportedScr.isEmpty()) {
            // If we can't guess a script, we don't know enough about the locales' language to find
            // if the locales match. So we fall back to old behavior of matching, which considered
            // locales with different regions different.
            final String supportedRegion = supported.getCountry();
            return (supportedRegion.isEmpty() ||
                    supportedRegion.equals(desired.getCountry()))
                    ? 1 : 0;
        }
        final String desiredScr = getLikelyScript(desired);
        // There is no match if the two locales use different scripts. This will most imporantly
        // take care of traditional vs simplified Chinese.
        return supportedScr.equals(desiredScr) ? 1 : 0;
    }

+34 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2016 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef _LIBS_UTILS_LOCALE_DATA_H
#define _LIBS_UTILS_LOCALE_DATA_H

#include <stddef.h>
#include <stdint.h>

namespace android {

int localeDataCompareRegions(
        const char* left_region, const char* right_region,
        const char* requested_language, const char* requested_script,
        const char* requested_region);

void localeDataComputeScript(char out[4], const char* language, const char* region);

} // namespace android

#endif // _LIBS_UTILS_LOCALE_DATA_H
+14 −1
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#define _LIBS_UTILS_RESOURCE_TYPES_H

#include <androidfw/Asset.h>
#include <androidfw/LocaleData.h>
#include <utils/ByteOrder.h>
#include <utils/Errors.h>
#include <utils/String16.h>
@@ -1127,8 +1128,9 @@ struct ResTable_config
    // configuration. (eg. Hant, Latn, etc.). Interpreted in conjunction with
    // the locale field.
    char localeScript[4];
    bool localeScriptWasProvided;

    // A single BCP-47 variant subtag. Will vary in length between 5 and 8
    // A single BCP-47 variant subtag. Will vary in length between 4 and 8
    // chars. Interpreted in conjunction with the locale field.
    char localeVariant[8];

@@ -1228,10 +1230,15 @@ struct ResTable_config

    inline void clearLocale() {
        locale = 0;
        localeScriptWasProvided = false;
        memset(localeScript, 0, sizeof(localeScript));
        memset(localeVariant, 0, sizeof(localeVariant));
    }

    inline void computeScript() {
        localeDataComputeScript(localeScript, language, country);
    }

    // Get the 2 or 3 letter language code of this configuration. Trailing
    // bytes are set to '\0'.
    size_t unpackLanguage(char language[4]) const;
@@ -1255,6 +1262,12 @@ struct ResTable_config
    // and 0 if they're equally specific.
    int isLocaleMoreSpecificThan(const ResTable_config &o) const;

    // Return true if 'this' is a better locale match than 'o' for the
    // 'requested' configuration. Similar to isBetterThan(), this assumes that
    // match() has already been used to remove any configurations that don't
    // match the requested configuration at all.
    bool isLocaleBetterThan(const ResTable_config& o, const ResTable_config* requested) const;

    String8 toString() const;
};

+1 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ commonSources := \
    Asset.cpp \
    AssetDir.cpp \
    AssetManager.cpp \
    LocaleData.cpp \
    misc.cpp \
    ObbFile.cpp \
    ResourceTypes.cpp \
+201 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2016 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <string>
#include <unordered_map>
#include <unordered_set>

#include <androidfw/LocaleData.h>

namespace android {

#include "LocaleDataTables.cpp"

inline uint32_t packLocale(const char* language, const char* region) {
    return (((uint8_t) language[0]) << 24u) | (((uint8_t) language[1]) << 16u) |
           (((uint8_t) region[0]) << 8u) | ((uint8_t) region[1]);
}

inline uint32_t dropRegion(uint32_t packed_locale) {
    return packed_locale & 0xFFFF0000lu;
}

inline bool hasRegion(uint32_t packed_locale) {
    return (packed_locale & 0x0000FFFFlu) != 0;
}

const size_t SCRIPT_LENGTH = 4;
const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]);
const uint32_t PACKED_ROOT = 0; // to represent the root locale

uint32_t findParent(uint32_t packed_locale, const char* script) {
    if (hasRegion(packed_locale)) {
        for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) {
            if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) {
                auto map = SCRIPT_PARENTS[i].map;
                auto lookup_result = map->find(packed_locale);
                if (lookup_result != map->end()) {
                    return lookup_result->second;
                }
                break;
            }
        }
        return dropRegion(packed_locale);
    }
    return PACKED_ROOT;
}

// Find the ancestors of a locale, and fill 'out' with it (assumes out has enough
// space). If any of the members of stop_list was seen, write it in the
// output but stop afterwards.
//
// This also outputs the index of the last written ancestor in the stop_list
// to stop_list_index, which will be -1 if it is not found in the stop_list.
//
// Returns the number of ancestors written in the output, which is always
// at least one.
size_t findAncestors(uint32_t* out, ssize_t* stop_list_index,
                     uint32_t packed_locale, const char* script,
                     const uint32_t* stop_list, size_t stop_set_length) {
    uint32_t ancestor = packed_locale;
    size_t count = 0;
    do {
        out[count++] = ancestor;
        for (size_t i = 0; i < stop_set_length; i++) {
            if (stop_list[i] == ancestor) {
                *stop_list_index = (ssize_t) i;
                return count;
            }
        }
        ancestor = findParent(ancestor, script);
    } while (ancestor != PACKED_ROOT);
    *stop_list_index = (ssize_t) -1;
    return count;
}

size_t findDistance(uint32_t supported,
                    const char* script,
                    const uint32_t* request_ancestors,
                    size_t request_ancestors_count) {
    uint32_t supported_ancestors[MAX_PARENT_DEPTH+1];
    ssize_t request_ancestors_index;
    const size_t supported_ancestor_count = findAncestors(
            supported_ancestors, &request_ancestors_index,
            supported, script,
            request_ancestors, request_ancestors_count);
    // Since both locales share the same root, there will always be a shared
    // ancestor, so the distance in the parent tree is the sum of the distance
    // of 'supported' to the lowest common ancestor (number of ancestors
    // written for 'supported' minus 1) plus the distance of 'request' to the
    // lowest common ancestor (the index of the ancestor in request_ancestors).
    return supported_ancestor_count + request_ancestors_index - 1;
}

inline bool isRepresentative(uint32_t language_and_region, const char* script) {
    const uint64_t packed_locale = (
            (((uint64_t) language_and_region) << 32u) |
            (((uint64_t) script[0]) << 24u) |
            (((uint64_t) script[1]) << 16u) |
            (((uint64_t) script[2]) <<  8u) |
            ((uint64_t) script[3]));

    return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0);
}

int localeDataCompareRegions(
        const char* left_region, const char* right_region,
        const char* requested_language, const char* requested_script,
        const char* requested_region) {

    if (left_region[0] == right_region[0] && left_region[1] == right_region[1]) {
        return 0;
    }
    const uint32_t left = packLocale(requested_language, left_region);
    const uint32_t right = packLocale(requested_language, right_region);
    const uint32_t request = packLocale(requested_language, requested_region);

    uint32_t request_ancestors[MAX_PARENT_DEPTH+1];
    ssize_t left_right_index;
    // Find the parents of the request, but stop as soon as we saw left or right
    const uint32_t left_and_right[] = {left, right};
    const size_t ancestor_count = findAncestors(
            request_ancestors, &left_right_index,
            request, requested_script,
            left_and_right, sizeof(left_and_right)/sizeof(left_and_right[0]));
    if (left_right_index == 0) { // We saw left earlier
        return 1;
    }
    if (left_right_index == 1) { // We saw right earlier
        return -1;
    }

    // If we are here, neither left nor right are an ancestor of the
    // request. This means that all the ancestors have been computed and
    // the last ancestor is just the language by itself. We will use the
    // distance in the parent tree for determining the better match.
    const size_t left_distance = findDistance(
            left, requested_script, request_ancestors, ancestor_count);
    const size_t right_distance = findDistance(
            right, requested_script, request_ancestors, ancestor_count);
    if (left_distance != right_distance) {
        return (int) right_distance - (int) left_distance; // smaller distance is better
    }

    // If we are here, left and right are equidistant from the request. We will
    // try and see if any of them is a representative locale.
    const bool left_is_representative = isRepresentative(left, requested_script);
    const bool right_is_representative = isRepresentative(right, requested_script);
    if (left_is_representative != right_is_representative) {
        return (int) left_is_representative - (int) right_is_representative;
    }

    // We have no way of figuring out which locale is a better match. For
    // the sake of stability, we consider the locale with the lower region
    // code (in dictionary order) better, with two-letter codes before
    // three-digit codes (since two-letter codes are more specific).
    return (int64_t) right - (int64_t) left;
}

void localeDataComputeScript(char out[4], const char* language, const char* region) {
    if (language[0] == '\0') {
        memset(out, '\0', SCRIPT_LENGTH);
        return;
    }
    uint32_t lookup_key = packLocale(language, region);
    auto lookup_result = LIKELY_SCRIPTS.find(lookup_key);
    if (lookup_result == LIKELY_SCRIPTS.end()) {
        // We couldn't find the locale. Let's try without the region
        if (region[0] != '\0') {
            lookup_key = dropRegion(lookup_key);
            lookup_result = LIKELY_SCRIPTS.find(lookup_key);
            if (lookup_result != LIKELY_SCRIPTS.end()) {
                memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
                return;
            }
        }
        // We don't know anything about the locale
        memset(out, '\0', SCRIPT_LENGTH);
        return;
    } else {
        // We found the locale.
        memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
    }
}

} // namespace android
Loading