Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 19ad7cce authored by Treehugger Robot's avatar Treehugger Robot Committed by Automerger Merge Worker
Browse files

Merge "Turn LocaleDataTables into C switch statements" into main am: b45d2c4d am: 2206598e

parents 1a5bacc5 2206598e
Loading
Loading
Loading
Loading
+14910 −27

File changed.

Preview size limit exceeded, changes collapsed.

+0 −14751

File deleted.

Preview size limit exceeded, changes collapsed.

+1 −1
Original line number Diff line number Diff line
@@ -3,4 +3,4 @@ zyy@google.com
patb@google.com

per-file CursorWindow.cpp=omakoto@google.com
per-file LocaleDataTables.cpp=vichang@google.com,ngeoffray@google.com
per-file LocaleDataLookup.cpp=vichang@google.com,ngeoffray@google.com
+9 −15
Original line number Diff line number Diff line
@@ -22,10 +22,6 @@

namespace android {

namespace hidden {
    bool isRepresentative(uint64_t packed_locale);
}

constexpr size_t SCRIPT_LENGTH = 4;

constexpr inline uint32_t packLocale(const char* language, const char* region) {
@@ -45,6 +41,14 @@ constexpr inline bool hasRegion(uint32_t packed_locale) {
    return (packed_locale & 0x0000FFFFLU) != 0;
}

constexpr inline uint32_t packScript(const char* script) {
    const unsigned char* s = reinterpret_cast<const unsigned char*>(script);
    return ((static_cast<uint32_t>(s[0]) << 24u) |
            (static_cast<uint32_t>(s[1]) << 16u) |
            (static_cast<uint32_t>(s[2]) <<  8u) |
            static_cast<uint32_t>(s[3]));
}

/**
 * Return nullptr if the key isn't found. The input packed_lang_region can be computed
 * by android::packLocale.
@@ -56,17 +60,7 @@ const char* lookupLikelyScript(uint32_t packed_lang_region);
 * Return false if the key isn't representative. The input lookup key can be computed
 * by android::packLocale.
 */
bool inline isLocaleRepresentative(uint32_t language_and_region, const char* script) {
    const unsigned char* s = reinterpret_cast<const unsigned char*>(script);
    const uint64_t packed_locale = (
            ((static_cast<uint64_t>(language_and_region)) << 32u) |
            (static_cast<uint64_t>(s[0]) << 24u) |
            (static_cast<uint64_t>(s[1]) << 16u) |
            (static_cast<uint64_t>(s[2]) <<  8u) |
            static_cast<uint64_t>(s[3]));

    return hidden::isRepresentative(packed_locale);
}
bool isLocaleRepresentative(uint32_t language_and_region, const char* script);

/**
 * Return a parent packed key for a given script and child packed key. Return 0 if
+112 −30
Original line number Diff line number Diff line
@@ -121,7 +121,7 @@ def pack_to_uint32(locale):

def dump_script_codes(all_scripts):
    """Dump the SCRIPT_CODES table."""
    print('const char SCRIPT_CODES[][4] = {')
    print('constexpr const char SCRIPT_CODES[][4] = {')
    for index, script in enumerate(all_scripts):
        print("    /* %-2d */ {'%c', '%c', '%c', '%c'}," % (
            index, script[0], script[1], script[2], script[3]))
@@ -132,15 +132,33 @@ def dump_script_codes(all_scripts):
def dump_script_data(likely_script_dict, all_scripts):
    """Dump the script data."""
    print()
    print('const std::unordered_map<uint32_t, uint8_t> LIKELY_SCRIPTS({')
    print('const char* lookupLikelyScript(uint32_t packed_lang_region) {')
    print('    switch(packed_lang_region) {')

    # partition the mapping by the script code
    parts = {}
    for locale in sorted(likely_script_dict.keys()):
        script = likely_script_dict[locale]
        print('    {0x%08Xu, %2du}, // %s -> %s' % (
        if script in parts:
            l = parts[script]
        else:
            l = []
            parts[script] = l
        l.append(locale)

    for script in sorted(parts.keys()):
        locales = parts[script]
        for locale in locales:
            print('        case 0x%08Xu: // %s -> %s' % (
                pack_to_uint32(locale),
            all_scripts.index(script),
                locale.replace('_', '-'),
                script))
    print('});')
        print('            return SCRIPT_CODES[%2du];' %
              all_scripts.index(script))
    print('        default:')
    print('            return nullptr;')
    print('     }')
    print('}')


def pack_to_uint64(locale):
@@ -152,16 +170,32 @@ def pack_to_uint64(locale):
            (ord(script[2]) << 8) |
            ord(script[3]))

def pack_script_to_uint32(script):
    """Pack a 4-letter script code into a 32-bit unsigned integer."""
    return ((ord(script[0]) << 24) |
            (ord(script[1]) << 16) |
            (ord(script[2]) << 8) |
            ord(script[3]))


def dump_representative_locales(representative_locales):
    """Dump the set of representative locales."""
    print()
    print('std::unordered_set<uint64_t> REPRESENTATIVE_LOCALES({')
    print('bool isLocaleRepresentative(uint32_t language_and_region, const char* script) {')
    print('    const uint64_t packed_locale =')
    print('            ((static_cast<uint64_t>(language_and_region)) << 32u) |')
    print('            (static_cast<uint64_t>(packScript(script)));')
    print('    switch(packed_locale) {')
    for locale in sorted(representative_locales):
        print('    0x%08XLLU, // %s' % (
        print('        case 0x%08XLLU: // %s' % (
            pack_to_uint64(locale),
            locale))
    print('});')

    print('            return true;')
    print('        default:')
    print('            return false;')
    print('    }')
    print('}')


def read_and_dump_likely_data(cldr_source_dir):
@@ -182,7 +216,7 @@ def read_and_dump_likely_data(cldr_source_dir):

def escape_script_variable_name(script):
    """Escape characters, e.g. '~', in a C++ variable name"""
    return script.replace("~", "_")
    return script.replace("~", "0")

def read_parent_data(icu_data_dir):
    """Read locale parent data from ICU data files."""
@@ -225,29 +259,52 @@ def dump_parent_data(script_organized_dict):
    """Dump information for parents of locales."""
    sorted_scripts = sorted(script_organized_dict.keys())
    print()

    for script in sorted_scripts:
        parent_dict = script_organized_dict[script]
        print ('const std::unordered_map<uint32_t, uint32_t> %s_PARENTS({'
            % escape_script_variable_name(script.upper()))

        # partition the mapping by the parent's value
        parts = {}
        for locale in sorted(parent_dict.keys()):
            parent = parent_dict[locale]
            print('    {0x%08Xu, 0x%08Xu}, // %s -> %s' % (
            if parent in parts:
                l = parts[parent]
            else:
                l = []
                parts[parent] = l
            l.append(locale)

        print('static uint32_t find%sParent(uint32_t packed_lang_region) {' % escape_script_variable_name(script))
        print('    switch(packed_lang_region) {')
        for parent in sorted(parts.keys()):
            locales = parts[parent]
            for locale in locales:
                print('        case 0x%08Xu: // %s -> %s' % (
                    pack_to_uint32(locale),
                pack_to_uint32(parent),
                    locale.replace('_', '-'),
                    parent.replace('_', '-')))
        print('});')

            print('            return 0x%08Xu;' % pack_to_uint32(parent))

        print('        default:')
        print('            return 0;')
        print('    }')
        print('}')
        print()

    print('const struct {')
    print('    const char script[4];')
    print('    const std::unordered_map<uint32_t, uint32_t>* map;')
    print('} SCRIPT_PARENTS[] = {')
    print('uint32_t findParentLocalePackedKey(const char* script, uint32_t packed_lang_region) {')
    print('    uint32_t packedScript = packScript(script);')
    print('    switch (packedScript) {')

    for script in sorted_scripts:
        print("    {{'%c', '%c', '%c', '%c'}, &%s_PARENTS}," % (
            script[0], script[1], script[2], script[3],
            escape_script_variable_name(script.upper())))
    print('};')
        print('        case 0x%08Xu: // %s' % (pack_script_to_uint32(script), script))
        print('            return find%sParent(packed_lang_region);' %
              escape_script_variable_name(script))

    print('        default:')
    print('            return 0;')
    print('    }')
    print('}')


def dump_parent_tree_depth(parent_dict):
@@ -261,7 +318,9 @@ def dump_parent_tree_depth(parent_dict):
        max_depth = max(max_depth, depth)
    assert max_depth < 5 # Our algorithms assume small max_depth
    print()
    print('const size_t MAX_PARENT_DEPTH = %d;' % max_depth)
    print('uint32_t getMaxAncestorTreeDepth() {')
    print('    return %d;' % max_depth)
    print('}')


def read_and_dump_parent_data(icu_data_dir, likely_script_dict):
@@ -286,10 +345,33 @@ def main():
        'external', 'icu', 'icu4c', 'source', 'data')
    cldr_source_dir = os.path.join(source_root, 'external', 'cldr')

    print('''/*
 * Copyright (C) 2025 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

''')
    print('// Auto-generated by %s' % sys.argv[0])
    print()
    print('''
#include <androidfw/LocaleDataLookup.h>

namespace android {
''')
    likely_script_dict = read_and_dump_likely_data(cldr_source_dir)
    read_and_dump_parent_data(icu_data_dir, likely_script_dict)
    print()
    print('} // namespace android')


if __name__ == '__main__':