Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6fef4ff0 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Use suggestions in the distracter filter." into lmp-dev

parents 83a96fe5 33ca0c80
Loading
Loading
Loading
Loading
+162 −6
Original line number Original line Diff line number Diff line
@@ -16,17 +16,28 @@


package com.android.inputmethod.latin.utils;
package com.android.inputmethod.latin.utils;


import java.util.HashMap;
import java.util.List;
import java.util.List;
import java.util.Locale;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeUnit;


import android.content.Context;
import android.content.Context;
import android.content.res.Resources;
import android.text.InputType;
import android.util.Log;
import android.util.Log;
import android.util.LruCache;
import android.util.LruCache;
import android.view.inputmethod.EditorInfo;
import android.view.inputmethod.InputMethodSubtype;
import android.view.inputmethod.InputMethodSubtype;


import com.android.inputmethod.keyboard.Keyboard;
import com.android.inputmethod.keyboard.KeyboardId;
import com.android.inputmethod.keyboard.KeyboardLayoutSet;
import com.android.inputmethod.latin.DictionaryFacilitator;
import com.android.inputmethod.latin.DictionaryFacilitator;
import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.WordComposer;
import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;


/**
/**
 * This class is used to prevent distracters being added to personalization
 * This class is used to prevent distracters being added to personalization
@@ -40,10 +51,20 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
    private static final int MAX_DISTRACTERS_CACHE_SIZE = 512;
    private static final int MAX_DISTRACTERS_CACHE_SIZE = 512;


    private final Context mContext;
    private final Context mContext;
    private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
    private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
    private final DictionaryFacilitator mDictionaryFacilitator;
    private final DictionaryFacilitator mDictionaryFacilitator;
    private final LruCache<String, Boolean> mDistractersCache;
    private final LruCache<String, Boolean> mDistractersCache;
    private Keyboard mKeyboard;
    private final Object mLock = new Object();
    private final Object mLock = new Object();


    // If the score of the top suggestion exceeds this value, the tested word (e.g.,
    // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distractor to
    // words in dictionary. The greater the threshold is, the less likely the tested word would
    // become a distractor, which means the tested word will be more likely to be added to
    // the dictionary.
    private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 0.4f;

    /**
    /**
     * Create a DistracterFilter instance.
     * Create a DistracterFilter instance.
     *
     *
@@ -51,8 +72,11 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
     */
     */
    public DistracterFilterCheckingExactMatches(final Context context) {
    public DistracterFilterCheckingExactMatches(final Context context) {
        mContext = context;
        mContext = context;
        mLocaleToSubtypeMap = new HashMap<>();
        mLocaleToKeyboardMap = new HashMap<>();
        mDictionaryFacilitator = new DictionaryFacilitator();
        mDictionaryFacilitator = new DictionaryFacilitator();
        mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE);
        mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE);
        mKeyboard = null;
    }
    }


    @Override
    @Override
@@ -62,6 +86,54 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {


    @Override
    @Override
    public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
    public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
        final Map<Locale, InputMethodSubtype> newLocaleToSubtypeMap = new HashMap<>();
        if (enabledSubtypes != null) {
            for (final InputMethodSubtype subtype : enabledSubtypes) {
                final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype);
                if (newLocaleToSubtypeMap.containsKey(locale)) {
                    // Multiple subtypes are enabled for one locale.
                    // TODO: Investigate what we should do for this case.
                    continue;
                }
                newLocaleToSubtypeMap.put(locale, subtype);
            }
        }
        if (mLocaleToSubtypeMap.equals(newLocaleToSubtypeMap)) {
            // Enabled subtypes have not been changed.
            return;
        }
        synchronized (mLock) {
            mLocaleToSubtypeMap.clear();
            mLocaleToSubtypeMap.putAll(newLocaleToSubtypeMap);
            mLocaleToKeyboardMap.clear();
        }
    }

    private void loadKeyboardForLocale(final Locale newLocale) {
        final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale);
        if (cachedKeyboard != null) {
            mKeyboard = cachedKeyboard;
            return;
        }
        final InputMethodSubtype subtype;
        synchronized (mLock) {
            subtype = mLocaleToSubtypeMap.get(newLocale);
        }
        if (subtype == null) {
            return;
        }
        final EditorInfo editorInfo = new EditorInfo();
        editorInfo.inputType = InputType.TYPE_CLASS_TEXT;
        final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder(
                mContext, editorInfo);
        final Resources res = mContext.getResources();
        final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res);
        final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res);
        builder.setKeyboardGeometry(keyboardWidth, keyboardHeight);
        builder.setSubtype(subtype);
        builder.setIsSpellChecker(false /* isSpellChecker */);
        final KeyboardLayoutSet layoutSet = builder.build();
        mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET);
    }
    }


    private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException {
    private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException {
@@ -89,6 +161,12 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
        }
        }
        if (!locale.equals(mDictionaryFacilitator.getLocale())) {
        if (!locale.equals(mDictionaryFacilitator.getLocale())) {
            synchronized (mLock) {
            synchronized (mLock) {
                if (!mLocaleToSubtypeMap.containsKey(locale)) {
                    Log.e(TAG, "Locale " + locale + " is not enabled.");
                    // TODO: Investigate what we should do for disabled locales.
                    return false;
                }
                loadKeyboardForLocale(locale);
                // Reset dictionaries for the locale.
                // Reset dictionaries for the locale.
                try {
                try {
                    mDistractersCache.evictAll();
                    mDistractersCache.evictAll();
@@ -101,29 +179,107 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
            }
            }
        }
        }


        if (DEBUG) {
            Log.d(TAG, "testedWord: " + testedWord);
        }
        final Boolean isCachedDistracter = mDistractersCache.get(testedWord);
        final Boolean isCachedDistracter = mDistractersCache.get(testedWord);
        if (isCachedDistracter != null && isCachedDistracter) {
        if (isCachedDistracter != null && isCachedDistracter) {
            if (DEBUG) {
            if (DEBUG) {
                Log.d(TAG, "testedWord: " + testedWord);
                Log.d(TAG, "isDistracter: true (cache hit)");
                Log.d(TAG, "isDistracter: true (cache hit)");
            }
            }
            return true;
            return true;
        }
        }

        final boolean isDistracterCheckedByGetMaxFreqencyOfExactMatches =
                checkDistracterUsingMaxFreqencyOfExactMatches(testedWord);
        if (isDistracterCheckedByGetMaxFreqencyOfExactMatches) {
            // Add the word to the cache.
            mDistractersCache.put(testedWord, Boolean.TRUE);
            return true;
        }
        final boolean isValidWord = mDictionaryFacilitator.isValidWord(testedWord,
                false /* ignoreCase */);
        if (isValidWord) {
            // Valid word is not a distractor.
            if (DEBUG) {
                Log.d(TAG, "isDistracter: false (valid word)");
            }
            return false;
        }

        final boolean isDistracterCheckedByGetSuggestion =
                checkDistracterUsingGetSuggestions(testedWord);
        if (isDistracterCheckedByGetSuggestion) {
            // Add the word to the cache.
            mDistractersCache.put(testedWord, Boolean.TRUE);
            return true;
        }
        return false;
    }

    private boolean checkDistracterUsingMaxFreqencyOfExactMatches(final String testedWord) {
        // The tested word is a distracter when there is a word that is exact matched to the tested
        // The tested word is a distracter when there is a word that is exact matched to the tested
        // word and its probability is higher than the tested word's probability.
        // word and its probability is higher than the tested word's probability.
        final int perfectMatchFreq = mDictionaryFacilitator.getFrequency(testedWord);
        final int perfectMatchFreq = mDictionaryFacilitator.getFrequency(testedWord);
        final int exactMatchFreq = mDictionaryFacilitator.getMaxFrequencyOfExactMatches(testedWord);
        final int exactMatchFreq = mDictionaryFacilitator.getMaxFrequencyOfExactMatches(testedWord);
        final boolean isDistracter = perfectMatchFreq < exactMatchFreq;
        final boolean isDistracter = perfectMatchFreq < exactMatchFreq;
        if (DEBUG) {
        if (DEBUG) {
            Log.d(TAG, "testedWord: " + testedWord);
            Log.d(TAG, "perfectMatchFreq: " + perfectMatchFreq);
            Log.d(TAG, "perfectMatchFreq: " + perfectMatchFreq);
            Log.d(TAG, "exactMatchFreq: " + exactMatchFreq);
            Log.d(TAG, "exactMatchFreq: " + exactMatchFreq);
            Log.d(TAG, "isDistracter: " + isDistracter);
            Log.d(TAG, "isDistracter: " + isDistracter);
        }
        }
        if (isDistracter) {
            // Add the word to the cache.
            mDistractersCache.put(testedWord, Boolean.TRUE);
        }
        return isDistracter;
        return isDistracter;
    }
    }

    private boolean checkDistracterUsingGetSuggestions(final String testedWord) {
        if (mKeyboard == null) {
            return false;
        }
        final SettingsValuesForSuggestion settingsValuesForSuggestion =
                new SettingsValuesForSuggestion(false /* blockPotentiallyOffensive */,
                        false /* spaceAwareGestureEnabled */,
                        null /* additionalFeaturesSettingValues */);
        final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
        final String consideredWord = trailingSingleQuotesCount > 0 ?
                testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
                testedWord;
        final WordComposer composer = new WordComposer();
        final int[] codePoints = StringUtils.toCodePointArray(testedWord);

        synchronized (mLock) {
            final int[] coordinates = mKeyboard.getCoordinates(codePoints);
            composer.setComposingWord(codePoints, coordinates);
            final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
                    composer, PrevWordsInfo.EMPTY_PREV_WORDS_INFO, mKeyboard.getProximityInfo(),
                    settingsValuesForSuggestion, 0 /* sessionId */);
            if (suggestionResults.isEmpty()) {
                return false;
            }
            final SuggestedWordInfo firstSuggestion = suggestionResults.first();
            final boolean isDistractor = suggestionExceedsDistracterThreshold(
                    firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
            if (DEBUG) {
                Log.d(TAG, "isDistracter: " + isDistractor);
            }
            return isDistractor;
        }
    }

    private static boolean suggestionExceedsDistracterThreshold(final SuggestedWordInfo suggestion,
            final String consideredWord, final float distracterThreshold) {
        if (suggestion == null) {
            return false;
        }
        final int suggestionScore = suggestion.mScore;
        final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
                consideredWord, suggestion.mWord, suggestionScore);
        if (DEBUG) {
            Log.d(TAG, "normalizedScore: " + normalizedScore);
            Log.d(TAG, "distracterThreshold: " + distracterThreshold);
        }
        if (normalizedScore > distracterThreshold) {
            return true;
        }
        return false;
    }
}
}
+54 −8
Original line number Original line Diff line number Diff line
@@ -16,9 +16,13 @@


package com.android.inputmethod.latin;
package com.android.inputmethod.latin;


import java.util.ArrayList;
import java.util.Locale;
import java.util.Locale;


import android.content.Context;
import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest;
import android.test.suitebuilder.annotation.LargeTest;
import android.view.inputmethod.InputMethodSubtype;


import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches;
import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches;


@@ -26,14 +30,24 @@ import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches;
 * Unit test for DistracterFilter
 * Unit test for DistracterFilter
 */
 */
@LargeTest
@LargeTest
public class DistracterFilterTest extends InputTestsBase {
public class DistracterFilterTest extends AndroidTestCase {
    private DistracterFilterCheckingExactMatches mDistracterFilter;
    private DistracterFilterCheckingExactMatches mDistracterFilter;


    @Override
    @Override
    protected void setUp() throws Exception {
    protected void setUp() throws Exception {
        super.setUp();
        super.setUp();
        mDistracterFilter = new DistracterFilterCheckingExactMatches(getContext());
        final Context context = getContext();
        mDistracterFilter.updateEnabledSubtypes(mLatinIME.getEnabledSubtypesForTest());
        mDistracterFilter = new DistracterFilterCheckingExactMatches(context);
        RichInputMethodManager.init(context);
        final RichInputMethodManager richImm = RichInputMethodManager.getInstance();
        final ArrayList<InputMethodSubtype> subtypes = new ArrayList<>();
        subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet(
                Locale.US.toString(), "qwerty"));
        subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet(
                Locale.FRENCH.toString(), "azerty"));
        subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet(
                Locale.GERMAN.toString(), "qwertz"));
        mDistracterFilter.updateEnabledSubtypes(subtypes);
    }
    }


    public void testIsDistractorToWordsInDictionaries() {
    public void testIsDistractorToWordsInDictionaries() {
@@ -104,24 +118,56 @@ public class DistracterFilterTest extends InputTestsBase {
        assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
        assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
                EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));


        final Locale localeDeDe = new Locale("de", "DE");
        typedWord = "thabk";
        // For this test case, we consider "thabk" is a distracter to "thank"
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));


        typedWord = "fuer";
        typedWord = "thanks";
        // For this test case, we consider "fuer" is a distracter to "für".
        // For this test case, we consider "thanks" is not a distracter to any other word
        // in dictionaries.
        assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));

        typedWord = "thabks";
        // For this test case, we consider "thabks" is a distracter to "thanks"
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
                EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));

        typedWord = "think";
        // For this test case, we consider "think" is not a distracter to any other word
        // in dictionaries.
        assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));

        typedWord = "thibk";
        // For this test case, we consider "thibk" is a distracter to "think"
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));

        typedWord = "tgis";
        // For this test case, we consider "tgis" is a distracter to "this"
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));

        final Locale localeDeDe = new Locale("de");


        typedWord = "fUEr";
        typedWord = "fUEr";
        // For this test case, we consider "fUEr" is a distracter to "für".
        // For this test case, we consider "fUEr" is a distracter to "für".
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
                EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));


        typedWord = "fuer";
        // For this test case, we consider "fuer" is a distracter to "für".
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));

        typedWord = "fur";
        typedWord = "fur";
        // For this test case, we consider "fur" is a distracter to "für".
        // For this test case, we consider "fur" is a distracter to "für".
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
        assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
                EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
                EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));


        final Locale localeFrFr = new Locale("fr", "FR");
        final Locale localeFrFr = new Locale("fr");


        typedWord = "a";
        typedWord = "a";
        // For this test case, we consider "a" is a distracter to "à".
        // For this test case, we consider "a" is a distracter to "à".