Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 16cc3992 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Use trigrams for personalization dict.

5Bug: 14425059
Change-Id: I73cf6904e569d60996a3b079f16ea6df0cb90f02
parent d3a4c513
Loading
Loading
Loading
Loading
+9 −9
Original line number Diff line number Diff line
@@ -32,8 +32,8 @@ import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.JniUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;

import java.io.File;
import java.util.ArrayList;
@@ -205,8 +205,8 @@ public final class BinaryDictionary extends Dictionary {
    private static native boolean updateEntriesForWordWithNgramContextNative(long dict,
            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
            int[] word, boolean isValidWord, int count, int timestamp);
    private static native int addMultipleDictionaryEntriesNative(long dict,
            LanguageModelParam[] languageModelParams, int startIndex);
    private static native int updateEntriesForInputEventsNative(long dict,
            WordInputEventForPersonalization[] inputEvents, int startIndex);
    private static native String getPropertyNative(long dict, String query);
    private static native boolean isCorruptedNative(long dict);
    private static native boolean migrateNative(long dict, String dictFilePath,
@@ -526,19 +526,19 @@ public final class BinaryDictionary extends Dictionary {
    }

    @UsedForTesting
    public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
    public void updateEntriesForInputEvents(final WordInputEventForPersonalization[] inputEvents) {
        if (!isValidDictionary()) {
            return;
        }
        int processedParamCount = 0;
        while (processedParamCount < languageModelParams.length) {
        int processedEventCount = 0;
        while (processedEventCount < inputEvents.length) {
            if (needsToRunGC(true /* mindsBlockByGC */)) {
                flushWithGC();
            }
            processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict,
                    languageModelParams, processedParamCount);
            processedEventCount = updateEntriesForInputEventsNative(mNativeDict, inputEvents,
                    processedEventCount);
            mHasUpdated = true;
            if (processedParamCount <= 0) {
            if (processedEventCount <= 0) {
                return;
            }
        }
+3 −3
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@ import android.view.inputmethod.InputMethodSubtype;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback;
import com.android.inputmethod.latin.ExpandableBinaryDictionary.UpdateEntriesForInputEventsCallback;
import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.personalization.ContextualDictionary;
@@ -796,8 +796,8 @@ public class DictionaryFacilitator {
    public void addEntriesToPersonalizationDictionary(
            final PersonalizationDataChunk personalizationDataChunk,
            final SpacingAndPunctuations spacingAndPunctuations,
            final AddMultipleDictionaryEntriesCallback callback) {
        mPersonalizationHelper.addEntriesToPersonalizationDictionariesToUpdate(
            final UpdateEntriesForInputEventsCallback callback) {
        mPersonalizationHelper.updateEntriesOfPersonalizationDictionaries(
                getMostProbableLocale(), personalizationDataChunk, spacingAndPunctuations,
                callback);
    }
+9 −9
Original line number Diff line number Diff line
@@ -32,7 +32,7 @@ import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import com.android.inputmethod.latin.utils.DistracterFilter;
import com.android.inputmethod.latin.utils.ExecutorUtils;
import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;

import java.io.File;
import java.util.ArrayList;
@@ -447,16 +447,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
        }, word, distracterFilter);
    }

    public interface AddMultipleDictionaryEntriesCallback {
    public interface UpdateEntriesForInputEventsCallback {
        public void onFinished();
    }

    /**
     * Dynamically add multiple entries to the dictionary.
     * Dynamically update entries according to input events.
     */
    public void addMultipleDictionaryEntriesDynamically(
            @Nonnull final ArrayList<LanguageModelParam> languageModelParams,
            final AddMultipleDictionaryEntriesCallback callback) {
    public void updateEntriesForInputEvents(
            @Nonnull final ArrayList<WordInputEventForPersonalization> inputEvents,
            final UpdateEntriesForInputEventsCallback callback) {
        reloadDictionaryIfRequired();
        asyncExecuteTaskWithWriteLock(new Runnable() {
            @Override
@@ -466,9 +466,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
                    if (binaryDictionary == null) {
                        return;
                    }
                    binaryDictionary.addMultipleDictionaryEntries(
                            languageModelParams.toArray(
                                    new LanguageModelParam[languageModelParams.size()]));
                    binaryDictionary.updateEntriesForInputEvents(
                            inputEvents.toArray(
                                    new WordInputEventForPersonalization[inputEvents.size()]));
                } finally {
                    if (callback != null) {
                        callback.onFinished();
+13 −13
Original line number Diff line number Diff line
@@ -26,14 +26,14 @@ import java.util.concurrent.atomic.AtomicInteger;
import android.content.Context;
import android.view.inputmethod.InputMethodSubtype;

import com.android.inputmethod.latin.ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback;
import com.android.inputmethod.latin.ExpandableBinaryDictionary.UpdateEntriesForInputEventsCallback;
import com.android.inputmethod.latin.personalization.PersonalizationDataChunk;
import com.android.inputmethod.latin.personalization.PersonalizationDictionary;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.DistracterFilter;
import com.android.inputmethod.latin.utils.DistracterFilterCheckingIsInDictionary;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.SubtypeLocaleUtils;
import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;

/**
 * Class for managing and updating personalization dictionaries.
@@ -119,10 +119,10 @@ public class PersonalizationHelperForDictionaryFacilitator {
        return personalizationDict;
    }

    private void addEntriesToPersonalizationDictionariesForLocale(final Locale locale,
    private void updateEntriesOfPersonalizationDictionariesForLocale(final Locale locale,
            final PersonalizationDataChunk personalizationDataChunk,
            final SpacingAndPunctuations spacingAndPunctuations,
            final AddMultipleDictionaryEntriesCallback callback) {
            final UpdateEntriesForInputEventsCallback callback) {
        final ExpandableBinaryDictionary personalizationDict =
                getPersonalizationDictToUpdate(mContext, locale);
        if (personalizationDict == null) {
@@ -131,25 +131,25 @@ public class PersonalizationHelperForDictionaryFacilitator {
            }
            return;
        }
        final ArrayList<LanguageModelParam> languageModelParams =
                LanguageModelParam.createLanguageModelParamsFrom(
        final ArrayList<WordInputEventForPersonalization> inputEvents =
                WordInputEventForPersonalization.createInputEventFrom(
                        personalizationDataChunk.mTokens,
                        personalizationDataChunk.mTimestampInSeconds, spacingAndPunctuations,
                        locale, new DistracterFilterCheckingIsInDictionary(
                                mDistracterFilter, personalizationDict));
        if (languageModelParams == null || languageModelParams.isEmpty()) {
        if (inputEvents == null || inputEvents.isEmpty()) {
            if (callback != null) {
                callback.onFinished();
            }
            return;
        }
        personalizationDict.addMultipleDictionaryEntriesDynamically(languageModelParams, callback);
        personalizationDict.updateEntriesForInputEvents(inputEvents, callback);
    }

    public void addEntriesToPersonalizationDictionariesToUpdate(final Locale defaultLocale,
    public void updateEntriesOfPersonalizationDictionaries(final Locale defaultLocale,
            final PersonalizationDataChunk personalizationDataChunk,
            final SpacingAndPunctuations spacingAndPunctuations,
            final AddMultipleDictionaryEntriesCallback callback) {
            final UpdateEntriesForInputEventsCallback callback) {
        final String language = personalizationDataChunk.mDetectedLanguage;
        final HashSet<Locale> locales;
        if (mIsMonolingualUser && PersonalizationDataChunk.LANGUAGE_UNKNOWN.equals(language)
@@ -165,8 +165,8 @@ public class PersonalizationHelperForDictionaryFacilitator {
            return;
        }
        final AtomicInteger remainingTaskCount = new AtomicInteger(locales.size());
        final AddMultipleDictionaryEntriesCallback callbackForLocales =
                new AddMultipleDictionaryEntriesCallback() {
        final UpdateEntriesForInputEventsCallback callbackForLocales =
                new UpdateEntriesForInputEventsCallback() {
                    @Override
                    public void onFinished() {
                        if (remainingTaskCount.decrementAndGet() == 0) {
@@ -178,7 +178,7 @@ public class PersonalizationHelperForDictionaryFacilitator {
                    }
                };
        for (final Locale locale : locales) {
            addEntriesToPersonalizationDictionariesForLocale(locale, personalizationDataChunk,
            updateEntriesOfPersonalizationDictionariesForLocale(locale, personalizationDataChunk,
                    spacingAndPunctuations, callbackForLocales);
        }
    }
+117 −0
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ package com.android.inputmethod.latin.utils;
import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.NgramContext;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
@@ -30,63 +30,36 @@ import java.util.Locale;

// Note: this class is used as a parameter type of a native method. You should be careful when you
// rename this class or field name. See BinaryDictionary#addMultipleDictionaryEntriesNative().
public final class LanguageModelParam {
    private static final String TAG = LanguageModelParam.class.getSimpleName();
    private static final boolean DEBUG = false;
public final class WordInputEventForPersonalization {
    private static final String TAG = WordInputEventForPersonalization.class.getSimpleName();
    private static final boolean DEBUG_TOKEN = false;

    // For now, these probability values are being referred to only when we add new entries to
    // decaying dynamic binary dictionaries. When these are referred to, what matters is 0 or
    // non-0. Thus, it's not meaningful to compare 10, 100, and so on.
    // TODO: Revise the logic in ForgettingCurveUtils in native code.
    private static final int UNIGRAM_PROBABILITY_FOR_VALID_WORD = 100;
    private static final int UNIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY;
    private static final int BIGRAM_PROBABILITY_FOR_VALID_WORD = 10;
    private static final int BIGRAM_PROBABILITY_FOR_OOV_WORD = Dictionary.NOT_A_PROBABILITY;

    public final CharSequence mTargetWord;
    public final int[] mWord0;
    public final int[] mWord1;
    // TODO: this needs to be a list of shortcuts
    public final int[] mShortcutTarget;
    public final int mUnigramProbability;
    public final int mBigramProbability;
    public final int mShortcutProbability;
    public final boolean mIsNotAWord;
    public final boolean mIsPossiblyOffensive;
    public final int[] mTargetWord;
    public final int mPrevWordsCount;
    public final int[][] mPrevWordArray = new int[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM][];
    public final boolean[] mIsPrevWordBeginningOfSentenceArray =
            new boolean[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    public final boolean mIsValid;
    // Time stamp in seconds.
    public final int mTimestamp;

    // Constructor for unigram. TODO: support shortcuts
    @UsedForTesting
    public LanguageModelParam(final CharSequence word, final int unigramProbability,
            final int timestamp) {
        this(null /* word0 */, word, unigramProbability, Dictionary.NOT_A_PROBABILITY, timestamp);
    }

    // Constructor for unigram and bigram.
    @UsedForTesting
    public LanguageModelParam(final CharSequence word0, final CharSequence word1,
            final int unigramProbability, final int bigramProbability,
            final int timestamp) {
        mTargetWord = word1;
        mWord0 = (word0 == null) ? null : StringUtils.toCodePointArray(word0);
        mWord1 = StringUtils.toCodePointArray(word1);
        mShortcutTarget = null;
        mUnigramProbability = unigramProbability;
        mBigramProbability = bigramProbability;
        mShortcutProbability = Dictionary.NOT_A_PROBABILITY;
        mIsNotAWord = false;
        mIsPossiblyOffensive = false;
    public WordInputEventForPersonalization(final CharSequence targetWord,
            final NgramContext ngramContext, final boolean isValid, final int timestamp) {
        mTargetWord = StringUtils.toCodePointArray(targetWord);
        mPrevWordsCount = ngramContext.getPrevWordCount();
        ngramContext.outputToArray(mPrevWordArray, mIsPrevWordBeginningOfSentenceArray);
        mIsValid = isValid;
        mTimestamp = timestamp;
    }

    // Process a list of words and return a list of {@link LanguageModelParam} objects.
    public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
    // Process a list of words and return a list of {@link WordInputEventForPersonalization}
    // objects.
    public static ArrayList<WordInputEventForPersonalization> createInputEventFrom(
            final List<String> tokens, final int timestamp,
            final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
            final DistracterFilter distracterFilter) {
        final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>();
        final ArrayList<WordInputEventForPersonalization> inputEvents = new ArrayList<>();
        final int N = tokens.size();
        NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
        for (int i = 0; i < N; ++i) {
@@ -105,26 +78,26 @@ public final class LanguageModelParam {
                            + tempWord + "\"");
                }
                // Sentence terminator found. Split.
                // TODO: Detect whether the context is beginning-of-sentence.
                ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
                continue;
            }
            if (DEBUG_TOKEN) {
                Log.d(TAG, "--- word: \"" + tempWord + "\"");
            }
            final LanguageModelParam languageModelParam =
                    detectWhetherVaildWordOrNotAndGetLanguageModelParam(
            final WordInputEventForPersonalization inputEvent =
                    detectWhetherVaildWordOrNotAndGetInputEvent(
                            ngramContext, tempWord, timestamp, locale, distracterFilter);
            if (languageModelParam == null) {
            if (inputEvent == null) {
                continue;
            }
            languageModelParams.add(languageModelParam);
            ngramContext = ngramContext.getNextNgramContext(
                    new NgramContext.WordInfo(tempWord));
            inputEvents.add(inputEvent);
            ngramContext = ngramContext.getNextNgramContext(new NgramContext.WordInfo(tempWord));
        }
        return languageModelParams;
        return inputEvents;
    }

    private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
    private static WordInputEventForPersonalization detectWhetherVaildWordOrNotAndGetInputEvent(
            final NgramContext ngramContext, final String targetWord, final int timestamp,
            final Locale locale, final DistracterFilter distracterFilter) {
        if (locale == null) {
@@ -138,29 +111,7 @@ public final class LanguageModelParam {
            // The word is a distracter.
            return null;
        }
        return createAndGetLanguageModelParamOfWord(ngramContext, word, timestamp,
                !HandlingType.shouldBeHandledAsOov(wordHandlingType));
    }

    private static LanguageModelParam createAndGetLanguageModelParamOfWord(
            final NgramContext ngramContext, final String word, final int timestamp,
            final boolean isValidWord) {
        final int unigramProbability = isValidWord ?
                UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
        if (!ngramContext.isValid()) {
            if (DEBUG) {
                Log.d(TAG, "--- add unigram: current("
                        + (isValidWord ? "Valid" : "OOV") + ") = " + word);
            }
            return new LanguageModelParam(word, unigramProbability, timestamp);
        }
        if (DEBUG) {
            Log.d(TAG, "--- add bigram: prev = " + ngramContext + ", current("
                    + (isValidWord ? "Valid" : "OOV") + ") = " + word);
        }
        final int bigramProbability = isValidWord ?
                BIGRAM_PROBABILITY_FOR_VALID_WORD : BIGRAM_PROBABILITY_FOR_OOV_WORD;
        return new LanguageModelParam(ngramContext.getNthPrevWord(1 /* n */), word,
                unigramProbability, bigramProbability, timestamp);
        return new WordInputEventForPersonalization(word, ngramContext,
                !HandlingType.shouldBeHandledAsOov(wordHandlingType), timestamp);
    }
}
Loading