Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ade5ad1d authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Use trigrams for personalization dict."

parents b11fc7d4 16cc3992
Loading
Loading
Loading
Loading
+9 −9
Original line number Diff line number Diff line
@@ -32,8 +32,8 @@ import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.JniUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;

import java.io.File;
import java.util.ArrayList;
@@ -205,8 +205,8 @@ public final class BinaryDictionary extends Dictionary {
    private static native boolean updateEntriesForWordWithNgramContextNative(long dict,
            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
            int[] word, boolean isValidWord, int count, int timestamp);
    private static native int addMultipleDictionaryEntriesNative(long dict,
            LanguageModelParam[] languageModelParams, int startIndex);
    private static native int updateEntriesForInputEventsNative(long dict,
            WordInputEventForPersonalization[] inputEvents, int startIndex);
    private static native String getPropertyNative(long dict, String query);
    private static native boolean isCorruptedNative(long dict);
    private static native boolean migrateNative(long dict, String dictFilePath,
@@ -526,19 +526,19 @@ public final class BinaryDictionary extends Dictionary {
    }

    @UsedForTesting
    public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
    public void updateEntriesForInputEvents(final WordInputEventForPersonalization[] inputEvents) {
        if (!isValidDictionary()) {
            return;
        }
        int processedParamCount = 0;
        while (processedParamCount < languageModelParams.length) {
        int processedEventCount = 0;
        while (processedEventCount < inputEvents.length) {
            if (needsToRunGC(true /* mindsBlockByGC */)) {
                flushWithGC();
            }
            processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict,
                    languageModelParams, processedParamCount);
            processedEventCount = updateEntriesForInputEventsNative(mNativeDict, inputEvents,
                    processedEventCount);
            mHasUpdated = true;
            if (processedParamCount <= 0) {
            if (processedEventCount <= 0) {
                return;
            }
        }
+3 −3
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@ import android.view.inputmethod.InputMethodSubtype;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback;
import com.android.inputmethod.latin.ExpandableBinaryDictionary.UpdateEntriesForInputEventsCallback;
import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.personalization.ContextualDictionary;
@@ -796,8 +796,8 @@ public class DictionaryFacilitator {
    public void addEntriesToPersonalizationDictionary(
            final PersonalizationDataChunk personalizationDataChunk,
            final SpacingAndPunctuations spacingAndPunctuations,
            final AddMultipleDictionaryEntriesCallback callback) {
        mPersonalizationHelper.addEntriesToPersonalizationDictionariesToUpdate(
            final UpdateEntriesForInputEventsCallback callback) {
        mPersonalizationHelper.updateEntriesOfPersonalizationDictionaries(
                getMostProbableLocale(), personalizationDataChunk, spacingAndPunctuations,
                callback);
    }
+9 −9
Original line number Diff line number Diff line
@@ -32,7 +32,7 @@ import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import com.android.inputmethod.latin.utils.DistracterFilter;
import com.android.inputmethod.latin.utils.ExecutorUtils;
import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;

import java.io.File;
import java.util.ArrayList;
@@ -447,16 +447,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
        }, word, distracterFilter);
    }

    public interface AddMultipleDictionaryEntriesCallback {
    public interface UpdateEntriesForInputEventsCallback {
        public void onFinished();
    }

    /**
     * Dynamically add multiple entries to the dictionary.
     * Dynamically update entries according to input events.
     */
    public void addMultipleDictionaryEntriesDynamically(
            @Nonnull final ArrayList<LanguageModelParam> languageModelParams,
            final AddMultipleDictionaryEntriesCallback callback) {
    public void updateEntriesForInputEvents(
            @Nonnull final ArrayList<WordInputEventForPersonalization> inputEvents,
            final UpdateEntriesForInputEventsCallback callback) {
        reloadDictionaryIfRequired();
        asyncExecuteTaskWithWriteLock(new Runnable() {
            @Override
@@ -466,9 +466,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
                    if (binaryDictionary == null) {
                        return;
                    }
                    binaryDictionary.addMultipleDictionaryEntries(
                            languageModelParams.toArray(
                                    new LanguageModelParam[languageModelParams.size()]));
                    binaryDictionary.updateEntriesForInputEvents(
                            inputEvents.toArray(
                                    new WordInputEventForPersonalization[inputEvents.size()]));
                } finally {
                    if (callback != null) {
                        callback.onFinished();
+13 −13
Original line number Diff line number Diff line
@@ -26,14 +26,14 @@ import java.util.concurrent.atomic.AtomicInteger;
import android.content.Context;
import android.view.inputmethod.InputMethodSubtype;

import com.android.inputmethod.latin.ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback;
import com.android.inputmethod.latin.ExpandableBinaryDictionary.UpdateEntriesForInputEventsCallback;
import com.android.inputmethod.latin.personalization.PersonalizationDataChunk;
import com.android.inputmethod.latin.personalization.PersonalizationDictionary;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.DistracterFilter;
import com.android.inputmethod.latin.utils.DistracterFilterCheckingIsInDictionary;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.SubtypeLocaleUtils;
import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;

/**
 * Class for managing and updating personalization dictionaries.
@@ -119,10 +119,10 @@ public class PersonalizationHelperForDictionaryFacilitator {
        return personalizationDict;
    }

    private void addEntriesToPersonalizationDictionariesForLocale(final Locale locale,
    private void updateEntriesOfPersonalizationDictionariesForLocale(final Locale locale,
            final PersonalizationDataChunk personalizationDataChunk,
            final SpacingAndPunctuations spacingAndPunctuations,
            final AddMultipleDictionaryEntriesCallback callback) {
            final UpdateEntriesForInputEventsCallback callback) {
        final ExpandableBinaryDictionary personalizationDict =
                getPersonalizationDictToUpdate(mContext, locale);
        if (personalizationDict == null) {
@@ -131,25 +131,25 @@ public class PersonalizationHelperForDictionaryFacilitator {
            }
            return;
        }
        final ArrayList<LanguageModelParam> languageModelParams =
                LanguageModelParam.createLanguageModelParamsFrom(
        final ArrayList<WordInputEventForPersonalization> inputEvents =
                WordInputEventForPersonalization.createInputEventFrom(
                        personalizationDataChunk.mTokens,
                        personalizationDataChunk.mTimestampInSeconds, spacingAndPunctuations,
                        locale, new DistracterFilterCheckingIsInDictionary(
                                mDistracterFilter, personalizationDict));
        if (languageModelParams == null || languageModelParams.isEmpty()) {
        if (inputEvents == null || inputEvents.isEmpty()) {
            if (callback != null) {
                callback.onFinished();
            }
            return;
        }
        personalizationDict.addMultipleDictionaryEntriesDynamically(languageModelParams, callback);
        personalizationDict.updateEntriesForInputEvents(inputEvents, callback);
    }

    public void addEntriesToPersonalizationDictionariesToUpdate(final Locale defaultLocale,
    public void updateEntriesOfPersonalizationDictionaries(final Locale defaultLocale,
            final PersonalizationDataChunk personalizationDataChunk,
            final SpacingAndPunctuations spacingAndPunctuations,
            final AddMultipleDictionaryEntriesCallback callback) {
            final UpdateEntriesForInputEventsCallback callback) {
        final String language = personalizationDataChunk.mDetectedLanguage;
        final HashSet<Locale> locales;
        if (mIsMonolingualUser && PersonalizationDataChunk.LANGUAGE_UNKNOWN.equals(language)
@@ -165,8 +165,8 @@ public class PersonalizationHelperForDictionaryFacilitator {
            return;
        }
        final AtomicInteger remainingTaskCount = new AtomicInteger(locales.size());
        final AddMultipleDictionaryEntriesCallback callbackForLocales =
                new AddMultipleDictionaryEntriesCallback() {
        final UpdateEntriesForInputEventsCallback callbackForLocales =
                new UpdateEntriesForInputEventsCallback() {
                    @Override
                    public void onFinished() {
                        if (remainingTaskCount.decrementAndGet() == 0) {
@@ -178,7 +178,7 @@ public class PersonalizationHelperForDictionaryFacilitator {
                    }
                };
        for (final Locale locale : locales) {
            addEntriesToPersonalizationDictionariesForLocale(locale, personalizationDataChunk,
            updateEntriesOfPersonalizationDictionariesForLocale(locale, personalizationDataChunk,
                    spacingAndPunctuations, callbackForLocales);
        }
    }
+117 −0
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ package com.android.inputmethod.latin.utils;
import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.NgramContext;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
@@ -30,63 +30,36 @@ import java.util.Locale;

// Note: this class is used as a parameter type of a native method. You should be careful when you
// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative().
public final class LanguageModelParam {
    private static final String TAG = LanguageModelParam.class.getSimpleName();
    private static final boolean DEBUG = false;
public final class WordInputEventForPersonalization {
    private static final String TAG = WordInputEventForPersonalization.class.getSimpleName();
    private static final boolean DEBUG_TOKEN = false;

    // For now, these probability values are being referred to only when we add new entries to
    // decaying dynamic binary dictionaries. When these are referred to, what matters is 0 or
    // non-0. Thus, it's not meaningful to compare 10, 100, and so on.
    // TODO: Revise the logic in ForgettingCurveUtils in native code.
    private static final int UNIGRAM_PROBABILITY_FOR_VALID_WORD = 100;
    private static final int UNIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY;
    private static final int BIGRAM_PROBABILITY_FOR_VALID_WORD = 10;
    private static final int BIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY;

    public final CharSequence mTargetWord;
    public final int[] mWord0;
    public final int[] mWord1;
    // TODO: this needs to be a list of shortcuts
    public final int[] mShortcutTarget;
    public final int mUnigramProbability;
    public final int mBigramProbability;
    public final int mShortcutProbability;
    public final boolean mIsNotAWord;
    public final boolean mIsPossiblyOffensive;
    public final int[] mTargetWord;
    public final int mPrevWordsCount;
    public final int[][] mPrevWordArray = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][];
    public final boolean[] mIsPrevWordBeginningOfSentenceArray =
            new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    public final boolean mIsValid;
    // Time stamp in seconds.
    public final int mTimestamp;

    // Constructor for unigram. TODO: support shortcuts
    @UsedForTesting
    public LanguageModelParam(final CharSequence word, final int unigramProbability,
            final int timestamp) {
        this(null /* word0 */, word, unigramProbability, Dictionary.NOT_A_PROBABILITY, timestamp);
    }

    // Constructor for unigram and bigram.
    @UsedForTesting
    public LanguageModelParam(final CharSequence word0, final CharSequence word1,
            final int unigramProbability, final int bigramProbability,
            final int timestamp) {
        mTargetWord = word1;
        mWord0 = (word0 == null) ? null : StringUtils.toCodePointArray(word0);
        mWord1 = StringUtils.toCodePointArray(word1);
        mShortcutTarget = null;
        mUnigramProbability = unigramProbability;
        mBigramProbability = bigramProbability;
        mShortcutProbability = Dictionary.NOT_A_PROBABILITY;
        mIsNotAWord = false;
        mIsPossiblyOffensive = false;
    public WordInputEventForPersonalization(final CharSequence targetWord,
            final NgramContext ngramContext, final boolean isValid, final int timestamp) {
        mTargetWord = StringUtils.toCodePointArray(targetWord);
        mPrevWordsCount = ngramContext.getPrevWordCount();
        ngramContext.outputToArray(mPrevWordArray, mIsPrevWordBeginningOfSentenceArray);
        mIsValid = isValid;
        mTimestamp = timestamp;
    }

    // Process a list of words and return a list of {@link LanguageModelParam} objects.
    public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
    // Process a list of words and return a list of {@link WordInputEventForPersonalization}
    // objects.
    public static ArrayList<WordInputEventForPersonalization> createInputEventFrom(
            final List<String> tokens, final int timestamp,
            final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
            final DistracterFilter distracterFilter) {
        final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>();
        final ArrayList<WordInputEventForPersonalization> inputEvents = new ArrayList<>();
        final int N = tokens.size();
        NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
        for (int i = 0; i < N; ++i) {
@@ -105,26 +78,26 @@ public final class LanguageModelParam {
                            + tempWord + "\"");
                }
                // Sentence terminator found. Split.
                // TODO: Detect whether the context is beginning-of-sentence.
                ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
                continue;
            }
            if (DEBUG_TOKEN) {
                Log.d(TAG, "--- word: \"" + tempWord + "\"");
            }
            final LanguageModelParam languageModelParam =
                    detectWhetherVaildWordOrNotAndGetLanguageModelParam(
            final WordInputEventForPersonalization inputEvent =
                    detectWhetherVaildWordOrNotAndGetInputEvent(
                            ngramContext, tempWord, timestamp, locale, distracterFilter);
            if (languageModelParam == null) {
            if (inputEvent == null) {
                continue;
            }
            languageModelParams.add(languageModelParam);
            ngramContext = ngramContext.getNextNgramContext(
                    new NgramContext.WordInfo(tempWord));
            inputEvents.add(inputEvent);
            ngramContext = ngramContext.getNextNgramContext(new NgramContext.WordInfo(tempWord));
        }
        return languageModelParams;
        return inputEvents;
    }

    private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
    private static WordInputEventForPersonalization detectWhetherVaildWordOrNotAndGetInputEvent(
            final NgramContext ngramContext, final String targetWord, final int timestamp,
            final Locale locale, final DistracterFilter distracterFilter) {
        if (locale == null) {
@@ -138,29 +111,7 @@ public final class LanguageModelParam {
            // The word is a distracter.
            return null;
        }
        return createAndGetLanguageModelParamOfWord(ngramContext, word, timestamp,
                !HandlingType.shouldBeHandledAsOov(wordHandlingType));
    }

    private static LanguageModelParam createAndGetLanguageModelParamOfWord(
            final NgramContext ngramContext, final String word, final int timestamp,
            final boolean isValidWord) {
        final int unigramProbability = isValidWord ?
                UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
        if (!ngramContext.isValid()) {
            if (DEBUG) {
                Log.d(TAG, "--- add unigram: current("
                        + (isValidWord ? "Valid" : "OOV") + ") = " + word);
            }
            return new LanguageModelParam(word, unigramProbability, timestamp);
        }
        if (DEBUG) {
            Log.d(TAG, "--- add bigram: prev = " + ngramContext + ", current("
                    + (isValidWord ? "Valid" : "OOV") + ") = " + word);
        }
        final int bigramProbability = isValidWord ?
                BIGRAM_PROBABILITY_FOR_VALID_WORD : BIGRAM_PROBABILITY_FOR_OOV_WORD;
        return new LanguageModelParam(ngramContext.getNthPrevWord(1 /* n */), word,
                unigramProbability, bigramProbability, timestamp);
        return new WordInputEventForPersonalization(word, ngramContext,
                !HandlingType.shouldBeHandledAsOov(wordHandlingType), timestamp);
    }
}
Loading