Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 072a95a3 authored by Seigo Nonaka's avatar Seigo Nonaka
Browse files

Introduce script matching for enabling default IME subtypes.

This is 2nd attempt of I5bb1bd8cdb9096d516d60beb9936e55bf2b757ae

The motivation of this CL is enhance the default IME subtype enabling
algorithm.  The new approach is done by score based algorithm.  The
design of the matching score is determined as follows:
- The matching score for the each two locale is up to 3 and determined
  as follows:
  - Score of 3 : matches all language, script and country.
  - Score of 2 : matches the language and script.
  - Score of 1 : matches the language regardless of country.
  - Score of 0 : doesn't match the language regardless of script and
                 country.
- All locales are fully expanded before matching by addLikelySubtags in
  ICU.

Bug: 27129703
Bug: 27348943
Change-Id: I8fc774154f5175abff2f16e8f12a4847bf5f5b7c
parent 75e91294
Loading
Loading
Loading
Loading
+158 −74
Original line number Diff line number Diff line
@@ -18,15 +18,17 @@ package com.android.internal.inputmethod;

import com.android.internal.annotations.VisibleForTesting;

import android.annotation.IntRange;
import android.annotation.NonNull;
import android.annotation.Nullable;
import android.text.TextUtils;
import android.icu.util.ULocale;
import android.util.LocaleList;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Objects;

public final class LocaleUtils {

@@ -36,12 +38,138 @@ public final class LocaleUtils {
        Locale get(@Nullable T source);
    }

    @Nullable
    private static String getLanguage(@Nullable Locale locale) {
        if (locale == null) {
            return null;
    /**
     * Calculates a matching score for the single desired locale.
     *
     * @see LocaleUtils#calculateMatchingScore(ULocale, LocaleList, byte[])
     *
     * @param supported The locale supported by IME subtype.
     * @param desired The locale preferred by user.
     * @return A score based on the locale matching for the default subtype enabling.
     */
    @IntRange(from=1, to=3)
    private static byte calculateMatchingSubScore(@NonNull final ULocale supported,
            @NonNull final ULocale desired) {
        // Assuming supported/desired is fully expanded.
        if (supported.equals(desired)) {
            return 3;  // Exact match.
        }

        // Skip language matching since it was already done in calculateMatchingScore.

        final String supportedScript = supported.getScript();
        if (supportedScript.isEmpty() || !supportedScript.equals(desired.getScript())) {
            // TODO: Need subscript matching. For example, Hanb should match with Bopo.
            return 1;
        }

        final String supportedCountry = supported.getCountry();
        if (supportedCountry.isEmpty() || !supportedCountry.equals(desired.getCountry())) {
            return 2;
        }

        // Ignore others e.g. variants, extensions.
        return 3;
    }

    /**
     * Calculates a matching score for the desired locale list.
     *
     * <p>The supported locale gets a matching score of 3 if all language, script and country of the
     * supported locale matches with the desired locale.  The supported locale gets a matching
     * score of 2 if the language and script of the supported locale matches with the desired
     * locale. The supported locale gets a matching score of 1 if only language of the supported
     * locale matches with the desired locale.  The supported locale gets a matching score of 0 if
     * the language of the supported locale doesn't match with the desired locale.</p>
     *
     * @param supported The locale supported by IME subtyle.
     * @param desired The locale list preferred by user. Typically system locale list.
     * @param out The output buffer to be stored the individual score for the desired language list.
     * The length of {@code out} must be same as the length of {@code desired} language list.
     * @return {@code false} if supported locale doesn't match with any desired locale list.
     * Otherwise {@code true}.
     */
    private static boolean calculateMatchingScore(@NonNull final ULocale supported,
            @NonNull final LocaleList desired, @NonNull byte[] out) {
        if (desired.isEmpty()) {
            return false;
        }

        boolean allZeros = true;
        final int N = desired.size();
        for (int i = 0; i < N; ++i) {
            final Locale locale = desired.get(i);

            if (!locale.getLanguage().equals(supported.getLanguage())) {
                // TODO: cache the result of addLikelySubtags if it is slow.
                out[i] = 0;
            } else {
                out[i] = calculateMatchingSubScore(
                        supported, ULocale.addLikelySubtags(ULocale.forLocale(locale)));
                if (allZeros && out[i] != 0) {
                    allZeros = false;
                }
            }
        }
        return !allZeros;
    }

    private static final class ScoreEntry implements Comparable<ScoreEntry> {
        public int mIndex = -1;
        @NonNull public final byte[] mScore;  // matching score of the i-th system languages.

        ScoreEntry(@NonNull byte[] score, int index) {
            mScore = new byte[score.length];
            set(score, index);
        }

        private void set(@NonNull byte[] score, int index) {
            for (int i = 0; i < mScore.length; ++i) {
                mScore[i] = score[i];
            }
            mIndex = index;
        }

        /**
         * Update score and index if the given score is better than this.
         */
        public void updateIfBetter(@NonNull byte[] score, int index) {
            if (compare(mScore, score) == -1) {  // mScore < score
                set(score, index);
            }
        }

        /**
         * Provides comaprison for bytes[].
         *
         * <p> Comparison does as follows. If the first value of {@code left} is larger than the
         * first value of {@code right}, {@code left} is large than {@code right}.  If the first
         * value of {@code left} is less than the first value of {@code right}, {@code left} is less
         * than {@code right}. If the first value of {@code left} and the first value of
         * {@code right} is equal, do the same comparison to the next value. Finally if all values
         * in {@code left} and {@code right} are equal, {@code left} and {@code right} is equal.</p>
         *
         * @param left The length must be equal to {@code right}.
         * @param right The length must be equal to {@code left}.
         * @return 1 if {@code left} is larger than {@code right}. -1 if {@code left} is less than
         * {@code right}. 0 if {@code left} and {@code right} is equal.
         */
        @IntRange(from=-1, to=1)
        private static int compare(@NonNull byte[] left, @NonNull byte[] right) {
            for (int i = 0; i < left.length; ++i) {
                if (left[i] > right[i]) {
                    return 1;
                } else if (left[i] < right[i]) {
                    return -1;
                }
            }
            return 0;
        }

        @Override
        public int compareTo(final ScoreEntry other) {
            return -1 * compare(mScore, other.mScore);  // Order by descending order.
        }
        return locale.getLanguage();
    }

    /**
@@ -52,14 +180,8 @@ public final class LocaleUtils {
     * {@code "en-GB", "ja", "en-AU", "fr-CA", "en-IN"} is specified to {@code preferredLanguages},
     * this method tries to copy at most one English locale, at most one Japanese, and at most one
     * French locale from {@code source} to {@code dest}.  Here the best matching English locale
     * will be searched from {@code source} as follows.
     * <ol>
     *     <li>The first instance in {@code sources} that exactly matches {@code "en-GB"}</li>
     *     <li>The first instance in {@code sources} that exactly matches {@code "en-AU"}</li>
     *     <li>The first instance in {@code sources} that exactly matches {@code "en-IN"}</li>
     *     <li>The first instance in {@code sources} that partially matches {@code "en"}</li>
     * </ol>
     * <p>Then this method iterates the same algorithm for Japanese then French.</p>
     * will be searched from {@code source} based on matching score. For the score design, see
     * {@link LocaleUtils#calculateMatchingScore(ULocale, LocaleList, byte[])}</p>
     *
     * @param sources Source items to be filtered.
     * @param extractor Type converter from the source items to {@link Locale} object.
@@ -74,69 +196,31 @@ public final class LocaleUtils {
            @NonNull LocaleExtractor<T> extractor,
            @NonNull LocaleList preferredLanguages,
            @NonNull ArrayList<T> dest) {
        final Locale[] availableLocales = new Locale[sources.size()];
        for (int i = 0; i < availableLocales.length; ++i) {
            availableLocales[i] = extractor.get(sources.get(i));
        }
        final Locale[] sortedPreferredLanguages = new Locale[preferredLanguages.size()];
        if (sortedPreferredLanguages.length > 0) {
            int nextIndex = 0;
            final int N = preferredLanguages.size();
            languageLoop:
            for (int i = 0; i < N; ++i) {
                final String language = getLanguage(preferredLanguages.get(i));
                for (int j = 0; j < nextIndex; ++j) {
                    if (TextUtils.equals(getLanguage(sortedPreferredLanguages[j]), language)) {
                        continue languageLoop;
                    }
                }
                for (int j = i; j < N; ++j) {
                    final Locale locale = preferredLanguages.get(j);
                    if (TextUtils.equals(language, getLanguage(locale))) {
                        sortedPreferredLanguages[nextIndex] = locale;
                        ++nextIndex;
                    }
                }
            }
        }
        final HashMap<String, ScoreEntry> scoreboard = new HashMap<>();
        final byte[] score = new byte[preferredLanguages.size()];


        for (int languageIndex = 0; languageIndex < sortedPreferredLanguages.length;) {
            // Finding the range.
            final String language = getLanguage(sortedPreferredLanguages[languageIndex]);
            int nextLanguageIndex = languageIndex;
            for (; nextLanguageIndex < sortedPreferredLanguages.length; ++nextLanguageIndex) {
                final Locale locale = sortedPreferredLanguages[nextLanguageIndex];
                if (!TextUtils.equals(getLanguage(locale), language)) {
                    break;
                }
            }

            // Check exact match
            boolean found = false;
            for (int i = languageIndex; !found && i < nextLanguageIndex; ++i) {
                final Locale locale = sortedPreferredLanguages[i];
                for (int j = 0; j < availableLocales.length; ++j) {
                    if (!Objects.equals(locale, availableLocales[j])) {
        final int sourceSize = sources.size();
        for (int i = 0; i < sourceSize; ++i) {
            final Locale locale = extractor.get(sources.get(i));
            if (locale == null ||
                    !calculateMatchingScore(ULocale.addLikelySubtags(ULocale.forLocale(locale)),
                            preferredLanguages, score)) {
                continue;
            }
                    dest.add(sources.get(j));
                    found = true;
                    break;
                }
            }

            if (!found) {
                // No exact match.  Use language match.
                for (int j = 0; j < availableLocales.length; ++j) {
                    if (!TextUtils.equals(language, getLanguage(availableLocales[j]))) {
                        continue;
                    }
                    dest.add(sources.get(j));
                    break;
            final String lang = locale.getLanguage();
            final ScoreEntry bestScore = scoreboard.get(lang);
            if (bestScore == null) {
                scoreboard.put(lang, new ScoreEntry(score, i));
            } else {
                bestScore.updateIfBetter(score, i);
            }
        }
            languageIndex = nextLanguageIndex;

        final ScoreEntry[] result = scoreboard.values().toArray(new ScoreEntry[scoreboard.size()]);
        Arrays.sort(result);
        for (final ScoreEntry entry : result) {
            dest.add(sources.get(entry.mIndex));
        }
    }
}
+205 −13
Original line number Diff line number Diff line
@@ -49,6 +49,22 @@ public class LocaleUtilsTest extends InstrumentationTestCase {
        assertEquals(0, dest.size());
    }

    @SmallTest
    public void testFilterDoesNotMatchAnything() throws Exception {
        final ArrayList<Locale> availableLocales = new ArrayList<>();
        availableLocales.add(Locale.forLanguageTag("en-US"));
        availableLocales.add(Locale.forLanguageTag("fr-CA"));
        availableLocales.add(Locale.forLanguageTag("in"));
        availableLocales.add(Locale.forLanguageTag("ja"));
        availableLocales.add(Locale.forLanguageTag("fil"));

        final LocaleList preferredLocales = LocaleList.forLanguageTags("zh-Hans-TW");

        final ArrayList<Locale> dest = new ArrayList<>();
        LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
        assertEquals(0, dest.size());
    }

    @SmallTest
    public void testFilterByLanguageEmptySource() throws Exception {
        final ArrayList<Locale> availableLocales = new ArrayList<>();
@@ -124,6 +140,7 @@ public class LocaleUtilsTest extends InstrumentationTestCase {

    @SmallTest
    public void testFilterByLanguage() throws Exception {
        {
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("en-US"));
            availableLocales.add(Locale.forLanguageTag("fr-CA"));
@@ -140,6 +157,20 @@ public class LocaleUtilsTest extends InstrumentationTestCase {
            assertEquals(availableLocales.get(0), dest.get(1));  // "en-US"
            assertEquals(availableLocales.get(3), dest.get(2));  // "ja"
        }
        {
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("en-US"));
            availableLocales.add(Locale.forLanguageTag("en-GB"));
            availableLocales.add(Locale.forLanguageTag("en-IN"));

            final LocaleList preferredLocales = LocaleList.forLanguageTags("en-US");

            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(0), dest.get(0));  // "en-US"
        }
    }

    @SmallTest
    public void testFilterByLanguageTheSameLanguage() throws Exception {
@@ -191,4 +222,165 @@ public class LocaleUtilsTest extends InstrumentationTestCase {
            assertEquals(availableLocales.get(1), dest.get(0));  // "en-CA"
        }
    }

    @SmallTest
    public void testFilterByLanguageFallbackRules() throws Exception {
        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn-RS");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-BA"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-CS"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-ME"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-BA"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-CS"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-ME"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-RS"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(7), dest.get(0));  // "sr-Latn-RS"
        }
        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn-RS-x-android");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-BA"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-CS"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-ME"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-BA"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-CS"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-ME"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-RS"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(7), dest.get(0));  // "sr-Latn-RS"
        }
        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn-RS");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-BA-x-android"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-CS-x-android"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-ME-x-android"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS-x-android"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-BA-x-android"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-CS-x-android"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-ME-x-android"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-RS-x-android"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(7), dest.get(0));  // "sr-Latn-RS-x-android"
        }

        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn-RS");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(2), dest.get(0));  // "sr-Latn"
        }

        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-RS");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr"));
            availableLocales.add(Locale.forLanguageTag("sr-RS"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(0), dest.get(0));  // "sr"
        }

        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr"));
            availableLocales.add(Locale.forLanguageTag("sr-RS"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(2), dest.get(0));  // "sr-Latn"
        }

        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr"));
            availableLocales.add(Locale.forLanguageTag("sr-RS"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(0), dest.get(0));  // "sr"
        }
        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr-Latn"));
            availableLocales.add(Locale.forLanguageTag("sr-RS"));
            availableLocales.add(Locale.forLanguageTag("sr"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(1), dest.get(0));  // "sr-RS"
        }

        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS"));
            availableLocales.add(Locale.forLanguageTag("sr-Latn-RS"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(0), dest.get(0));  // "sr-Cyrl-RS"
        }
        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("sr-Latn");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("sr-Latn-RS"));
            availableLocales.add(Locale.forLanguageTag("sr-Cyrl-RS"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            assertEquals(availableLocales.get(0), dest.get(0));  // "sr-Latn-RS"
        }
    }

    public void testFilterKnownLimitation() throws Exception {
        // Following test cases are not for intentional behavior but checks for preventing the
        // behavior from becoming worse.
        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("ja-Hrkt");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("ja-Jpan"));
            availableLocales.add(Locale.forLanguageTag("ja-Hrkt"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            // Should be ja-Jpan since it supports ja-Hrkt and listed before ja-Hrkt.
            assertEquals(availableLocales.get(1), dest.get(0));
        }
        {
            final LocaleList preferredLocales = LocaleList.forLanguageTags("zh-Hani");
            final ArrayList<Locale> availableLocales = new ArrayList<>();
            availableLocales.add(Locale.forLanguageTag("zh-Hans"));
            availableLocales.add(Locale.forLanguageTag("zh-Hant"));
            availableLocales.add(Locale.forLanguageTag("zh-Hanb"));
            availableLocales.add(Locale.forLanguageTag("zh-Hani"));
            final ArrayList<Locale> dest = new ArrayList<>();
            LocaleUtils.filterByLanguage(availableLocales, sIdentityMapper, preferredLocales, dest);
            assertEquals(1, dest.size());
            // Should be zh-Hans since it supports zh-Hani. Also zh-Hant, zh-Hanb supports zh-Hani.
            assertEquals(availableLocales.get(3), dest.get(0));
        }
    }
}