Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bc568850 authored by Satoshi Kataoka's avatar Satoshi Kataoka
Browse files

Optimize the store of bigram list

Bug: 4192129

Change-Id: Idcc62e4f9696b56b1d7013891b2da37b1784423e
parent e0952029
Loading
Loading
Loading
Loading
+80 −103
Original line number Diff line number Diff line
@@ -30,8 +30,6 @@ import android.util.Log;
import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;

/**
 * Locally gathers stats about the words user types and various other signals like auto-correction
@@ -39,6 +37,7 @@ import java.util.Iterator;
 */
public class UserHistoryDictionary extends ExpandableDictionary {
    private static final String TAG = "UserHistoryDictionary";
    public static final boolean DBG_SAVE_RESTORE = false;

    /** Any pair being typed or picked */
    private static final int FREQUENCY_FOR_TYPED = 2;
@@ -78,7 +77,8 @@ public class UserHistoryDictionary extends ExpandableDictionary {
    /** Locale for which this auto dictionary is storing words */
    private String mLocale;

    private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>();
    private UserHistoryDictionaryBigramList mBigramList =
            new UserHistoryDictionaryBigramList();
    private final Object mPendingWritesLock = new Object();
    private static volatile boolean sUpdatingDB = false;
    private final SharedPreferences mPrefs;
@@ -99,35 +99,6 @@ public class UserHistoryDictionary extends ExpandableDictionary {

    private static DatabaseHelper sOpenHelper = null;

    private static class Bigram {
        public final String mWord1;
        public final String mWord2;

        Bigram(String word1, String word2) {
            this.mWord1 = word1;
            this.mWord2 = word2;
        }

        @Override
        public boolean equals(Object bigram) {
            if (!(bigram instanceof Bigram)) {
                return false;
            }
            final Bigram bigram2 = (Bigram) bigram;
            final boolean eq1 =
                    mWord1 == null ? bigram2.mWord1 == null : mWord1.equals(bigram2.mWord1);
            if (!eq1) {
                return false;
            }
            return mWord2 == null ? bigram2.mWord2 == null : mWord2.equals(bigram2.mWord2);
        }

        @Override
        public int hashCode() {
            return (mWord1 + " " + mWord2).hashCode();
        }
    }

    public void setDatabaseMax(int maxHistoryBigram) {
        sMaxHistoryBigrams = maxHistoryBigram;
    }
@@ -190,21 +161,18 @@ public class UserHistoryDictionary extends ExpandableDictionary {
            freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams());
        }
        synchronized (mPendingWritesLock) {
            final Bigram bi = new Bigram(word1, word2);
            if (!mPendingWrites.contains(bi)) {
                mPendingWrites.add(bi);
            }
            mBigramList.addBigram(word1, word2);
        }

        return freq;
    }

    public boolean cancelAddingUserHistory(String word1, String word2) {
        final Bigram bi = new Bigram(word1, word2);
        if (mPendingWrites.contains(bi)) {
            mPendingWrites.remove(bi);
        synchronized (mPendingWritesLock) {
            if (mBigramList.removeBigram(word1, word2)) {
                return super.removeBigram(word1, word2);
            }
        }
        return false;
    }

@@ -214,11 +182,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
    private void flushPendingWrites() {
        synchronized (mPendingWritesLock) {
            // Nothing pending? Return
            if (mPendingWrites.isEmpty()) return;
            if (mBigramList.isEmpty()) return;
            // Create a background thread to write the pending entries
            new UpdateDbTask(sOpenHelper, mPendingWrites, mLocale, this).execute();
            new UpdateDbTask(sOpenHelper, mBigramList, mLocale, this).execute();
            // Create a new map for writing new entries into while the old one is written to db
            mPendingWrites = new HashSet<Bigram>();
            mBigramList = new UserHistoryDictionaryBigramList();
        }
    }

@@ -251,6 +219,9 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                    final String word1 = cursor.getString(word1Index);
                    final String word2 = cursor.getString(word2Index);
                    final int frequency = cursor.getInt(frequencyIndex);
                    if (DBG_SAVE_RESTORE) {
                        Log.d(TAG, "--- Load user history: " + word1 + ", " + word2);
                    }
                    // Safeguard against adding really long words. Stack may overflow due
                    // to recursive lookup
                    if (null == word1) {
@@ -259,8 +230,9 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                            && word2.length() < BinaryDictionary.MAX_WORD_LENGTH) {
                        super.setBigramAndGetFrequency(
                                word1, word2, new ForgettingCurveParams(frequency, now, last));
                        // TODO: optimize
                        mPendingWrites.add(new Bigram(word1, word2));
                    }
                    synchronized(mPendingWritesLock) {
                        mBigramList.addBigram(word1, word2);
                    }
                    cursor.moveToNext();
                }
@@ -339,14 +311,15 @@ public class UserHistoryDictionary extends ExpandableDictionary {
     * the in-memory trie.
     */
    private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
        private final HashSet<Bigram> mMap;
        private final UserHistoryDictionaryBigramList mBigramList;
        private final DatabaseHelper mDbHelper;
        private final String mLocale;
        private final UserHistoryDictionary mUserHistoryDictionary;

        public UpdateDbTask(DatabaseHelper openHelper, HashSet<Bigram> pendingWrites,
        public UpdateDbTask(
                DatabaseHelper openHelper, UserHistoryDictionaryBigramList pendingWrites,
                String locale, UserHistoryDictionary dict) {
            mMap = pendingWrites;
            mBigramList = pendingWrites;
            mLocale = locale;
            mDbHelper = openHelper;
            mUserHistoryDictionary = dict;
@@ -401,27 +374,27 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                return null;
            }
            db.execSQL("PRAGMA foreign_keys = ON;");
            final boolean addLevel0Bigram = mBigramList.size() <= sMaxHistoryBigrams;

            // Write all the entries to the db
            final Iterator<Bigram> iterator = mMap.iterator();
            while (iterator.hasNext()) {
            for (String word1 : mBigramList.keySet()) {
                for (String word2 : mBigramList.getBigrams(word1)) {
                    // TODO: this process of making a text search for each pair each time
                    // is terribly inefficient. Optimize this.
                final Bigram bi = iterator.next();

                    // find pair id
                    Cursor c = null;
                    try {
                    if (null != bi.mWord1) {
                        if (null != word1) {
                            c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
                                    MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
                                            + MAIN_COLUMN_LOCALE + "=?",
                                        new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null,
                                            new String[] { word1, word2, mLocale }, null, null,
                                            null);
                        } else {
                            c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
                                MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 + "=? AND "
                                        + MAIN_COLUMN_LOCALE + "=?",
                                        new String[] { bi.mWord2, mLocale }, null, null, null);
                                    MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2
                                            + "=? AND " + MAIN_COLUMN_LOCALE + "=?",
                                            new String[] { word2, mLocale }, null, null, null);
                        }

                        final int pairId;
@@ -433,21 +406,20 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                        } else {
                            // new pair
                            Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
                                getContentValues(bi.mWord1, bi.mWord2, mLocale));
                                    getContentValues(word1, word2, mLocale));
                            pairId = pairIdLong.intValue();
                        }
                        // insert new frequency
                        final int freq;
                    if (bi.mWord1 == null) {
                        if (word1 == null) {
                            freq = FREQUENCY_FOR_TYPED;
                        } else {
                        final NextWord nw = mUserHistoryDictionary.getBigramWord(
                                bi.mWord1, bi.mWord2);
                            final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
                            if (nw != null) {
                                final int tempFreq = nw.getFcValue();
                                // TODO: Check whether the word is valid or not
                                if (UserHistoryForgettingCurveUtils.needsToSave(
                                    (byte)tempFreq, false)) {
                                        (byte)tempFreq, false, addLevel0Bigram)) {
                                    freq = tempFreq;
                                } else {
                                    freq = -1;
@@ -457,7 +429,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                            }
                        }
                        if (freq > 0) {
                        db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, freq));
                            if (DBG_SAVE_RESTORE) {
                                Log.d(TAG, "--- Save user history: " + word1 + ", " + word2);
                            }
                            db.insert(FREQ_TABLE_NAME, null,
                                    getFrequencyContentValues(pairId, freq));
                        }
                    } finally {
                        if (c != null) {
@@ -465,6 +441,7 @@ public class UserHistoryDictionary extends ExpandableDictionary {
                        }
                    }
                }
            }

            checkPruneData(db);
            sUpdatingDB = false;
+91 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin;

import android.util.Log;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;

/**
 * A store of bigrams which will be updated when the user history dictionary is closed
 * All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale
 * bigrams when we write to the SQL DB.
 */
public class UserHistoryDictionaryBigramList {
    private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName();
    private static final HashSet<String> EMPTY_STRING_SET = new HashSet<String>();
    private final HashMap<String, HashSet<String>> mBigramMap =
            new HashMap<String, HashSet<String>>();
    private int mSize = 0;

    public void evictAll() {
        mSize = 0;
        mBigramMap.clear();
    }

    public void addBigram(String word1, String word2) {
        if (UserHistoryDictionary.DBG_SAVE_RESTORE) {
            Log.d(TAG, "--- add bigram: " + word1 + ", " + word2);
        }
        final HashSet<String> set;
        if (mBigramMap.containsKey(word1)) {
            set = mBigramMap.get(word1);
        } else {
            set = new HashSet<String>();
            mBigramMap.put(word1, set);
        }
        if (!set.contains(word2)) {
            ++mSize;
            set.add(word2);
        }
    }

    public int size() {
        return mSize;
    }

    public boolean isEmpty() {
        return mBigramMap.isEmpty();
    }

    public Set<String> keySet() {
        return mBigramMap.keySet();
    }

    public HashSet<String> getBigrams(String word1) {
        if (!mBigramMap.containsKey(word1)) {
            return EMPTY_STRING_SET;
        } else {
            return mBigramMap.get(word1);
        }
    }

    public boolean removeBigram(String word1, String word2) {
        final HashSet<String> set = getBigrams(word1);
        if (set.isEmpty()) {
            return false;
        }
        if (set.contains(word2)) {
            set.remove(word2);
            --mSize;
            return true;
        }
        return false;
    }
}
+8 −3
Original line number Diff line number Diff line
@@ -162,11 +162,16 @@ public class UserHistoryForgettingCurveUtils {

    // TODO: isValid should be false for a word whose frequency is 0,
    // or that is not in the dictionary.
    public static boolean needsToSave(byte fc, boolean isValid) {
    /**
     * Check wheather we should save the bigram to the SQL DB or not
     */
    public static boolean needsToSave(byte fc, boolean isValid, boolean addLevel0Bigram) {
        int level = fcToLevel(fc);
        if (isValid && level == 0) {
        if (level == 0) {
            if (isValid || !addLevel0Bigram) {
                return false;
            }
        }
        final int elapsedTime = fcToElapsedTime(fc);
        return (elapsedTime < ELAPSED_TIME_MAX - 1 || level > 0);
    }