Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 99b7242f authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Make dumpAllWordsForDebug() use getNextWordProperty()."

parents ffb12e76 d79fd29f
Loading
Loading
Loading
Loading
+17 −8
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor;
import com.android.inputmethod.latin.utils.WordProperty;

import java.io.File;
import java.util.ArrayList;
@@ -778,16 +779,24 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
    }

    @UsedForTesting
    protected void runAfterGcForDebug(final Runnable r) {
        getExecutor(mDictName).executePrioritized(new Runnable() {
    public void dumpAllWordsForDebug() {
        reloadDictionaryIfRequired();
        getExecutor(mDictName).execute(new Runnable() {
            @Override
            public void run() {
                try {
                    mBinaryDictionary.flushWithGC();
                    r.run();
                } finally {
                    mDictNameDictionaryUpdateController.mProcessingLargeTask.set(false);
                }
                Log.d(TAG, "dictionary=" + mDictName);
                int token = 0;
                do {
                    final BinaryDictionary.GetNextWordPropertyResult result =
                            mBinaryDictionary.getNextWordProperty(token);
                    final WordProperty wordProperty = result.mWordProperty;
                    if (wordProperty == null) {
                        Log.d(TAG, " dictionary is empty.");
                        break;
                    }
                    Log.d(TAG, wordProperty.toString());
                    token = result.mNextToken;
                } while (token != 0);
            }
        });
    }
+0 −60
Original line number Diff line number Diff line
@@ -17,21 +17,15 @@
package com.android.inputmethod.latin.personalization;

import android.content.Context;
import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
import com.android.inputmethod.latin.makedict.DictDecoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Locale;
@@ -44,7 +38,6 @@ import java.util.concurrent.TimeUnit;
 */
public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary {
    private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName();
    public static final boolean DBG_SAVE_RESTORE = false;
    private static final boolean DBG_DUMP_ON_CLOSE = false;

    /** Any pair being typed or picked */
@@ -53,8 +46,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
    public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
    public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;

    public static final int REQUIRED_BINARY_DICTIONARY_VERSION = FormatSpec.VERSION4;

    /** The locale for this dictionary. */
    public final Locale mLocale;

@@ -160,57 +151,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
        // Never loaded to memory in Java side.
    }

    @UsedForTesting
    public void dumpAllWordsForDebug() {
        runAfterGcForDebug(new Runnable() {
            @Override
            public void run() {
                dumpAllWordsForDebugLocked();
            }
        });
    }

    private void dumpAllWordsForDebugLocked() {
        Log.d(TAG, "dumpAllWordsForDebug started.");
        final OnAddWordListener listener = new OnAddWordListener() {
            @Override
            public void setUnigram(final String word, final String shortcutTarget,
                    final int frequency, final int shortcutFreq) {
                Log.d(TAG, "load unigram: " + word + "," + frequency);
            }

            @Override
            public void setBigram(final String word0, final String word1, final int frequency) {
                if (word0.length() < Constants.DICTIONARY_MAX_WORD_LENGTH
                        && word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) {
                    Log.d(TAG, "load bigram: " + word0 + "," + word1 + "," + frequency);
                } else {
                    Log.d(TAG, "Skip inserting a too long bigram: " + word0 + "," + word1 + ","
                            + frequency);
                }
            }
        };

        // Load the dictionary from binary file
        final File dictFile = new File(mContext.getFilesDir(), mDictName);
        final DictDecoder dictDecoder = FormatSpec.getDictDecoder(dictFile,
                DictDecoder.USE_BYTEARRAY);
        if (dictDecoder == null) {
            // This is an expected condition: we don't have a user history dictionary for this
            // language yet. It will be created sometime later.
            return;
        }

        try {
            dictDecoder.openDictBuffer();
            UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
        } catch (IOException e) {
            Log.d(TAG, "IOException on opening a bytebuffer", e);
        } catch (UnsupportedFormatException e) {
            Log.d(TAG, "Unsupported format, can't read the dictionary", e);
        }
    }

    @UsedForTesting
    public void clearAndFlushDictionary() {
        // Clear the node structure on memory
+0 −128
Original line number Diff line number Diff line
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.personalization;

import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.utils.CollectionUtils;

import java.util.HashMap;
import java.util.Set;

/**
 * A store of bigrams which will be updated when the user history dictionary is closed
 * All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale
 * bigrams when we write to the SQL DB.
 */
@UsedForTesting
public final class UserHistoryDictionaryBigramList {
    public static final byte FORGETTING_CURVE_INITIAL_VALUE = 0;
    private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName();
    private static final HashMap<String, Byte> EMPTY_BIGRAM_MAP = CollectionUtils.newHashMap();
    private final HashMap<String, HashMap<String, Byte>> mBigramMap = CollectionUtils.newHashMap();
    private int mSize = 0;

    public void evictAll() {
        mSize = 0;
        mBigramMap.clear();
    }

    /**
     * Called when the user typed a word.
     */
    @UsedForTesting
    public void addBigram(String word1, String word2) {
        addBigram(word1, word2, FORGETTING_CURVE_INITIAL_VALUE);
    }

    /**
     * Called when loaded from the SQL DB.
     */
    public void addBigram(String word1, String word2, byte fcValue) {
        if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) {
            Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue);
        }
        final HashMap<String, Byte> map;
        if (mBigramMap.containsKey(word1)) {
            map = mBigramMap.get(word1);
        } else {
            map = CollectionUtils.newHashMap();
            mBigramMap.put(word1, map);
        }
        if (!map.containsKey(word2)) {
            ++mSize;
            map.put(word2, fcValue);
        }
    }

    /**
     * Called when inserted to the SQL DB.
     */
    public void updateBigram(String word1, String word2, byte fcValue) {
        if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) {
            Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue);
        }
        final HashMap<String, Byte> map;
        if (mBigramMap.containsKey(word1)) {
            map = mBigramMap.get(word1);
        } else {
            return;
        }
        if (!map.containsKey(word2)) {
            return;
        }
        map.put(word2, fcValue);
    }

    public int size() {
        return mSize;
    }

    public boolean isEmpty() {
        return mBigramMap.isEmpty();
    }

    public boolean containsKey(String word) {
        return mBigramMap.containsKey(word);
    }

    public Set<String> keySet() {
        return mBigramMap.keySet();
    }

    public HashMap<String, Byte> getBigrams(String word1) {
        if (mBigramMap.containsKey(word1)) return mBigramMap.get(word1);
        // TODO: lower case according to locale
        final String lowerWord1 = word1.toLowerCase();
        if (mBigramMap.containsKey(lowerWord1)) return mBigramMap.get(lowerWord1);
        return EMPTY_BIGRAM_MAP;
    }

    public boolean removeBigram(String word1, String word2) {
        final HashMap<String, Byte> set = getBigrams(word1);
        if (set.isEmpty()) {
            return false;
        }
        if (set.containsKey(word2)) {
            set.remove(word2);
            --mSize;
            return true;
        }
        return false;
    }
}
+0 −181
Original line number Diff line number Diff line
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.utils;

import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.PendingAttribute;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;

/**
 * Reads and writes Binary files for a UserHistoryDictionary.
 *
 * All the methods in this class are static.
 */
public final class UserHistoryDictIOUtils {
    private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
    private static final boolean DEBUG = false;

    public interface OnAddWordListener {
        /**
         * Callback to be notified when a word is added to the dictionary.
         * @param word The added word.
         * @param shortcutTarget A shortcut target for this word, or null if none.
         * @param frequency The frequency for this word.
         * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
         *   Unspecified if shortcutTarget is null - do not rely on its value.
         */
        public void setUnigram(final String word, final String shortcutTarget, final int frequency,
                final int shortcutFreq);
        public void setBigram(final String word1, final String word2, final int frequency);
    }

    @UsedForTesting
    public interface BigramDictionaryInterface {
        public int getFrequency(final String word1, final String word2);
    }

    /**
     * Writes dictionary to file.
     */
    @UsedForTesting
    public static void writeDictionary(final DictEncoder dictEncoder,
            final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
            final FormatOptions formatOptions, final HashMap<String, String> options) {
        final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams, options);
        fusionDict.addOptionAttribute(FormatSpec.FileHeader.USES_FORGETTING_CURVE_KEY,
            FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
        fusionDict.addOptionAttribute(FormatSpec.FileHeader.DICTIONARY_DATE_KEY,
                String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
        try {
            dictEncoder.writeDictionary(fusionDict, formatOptions);
            Log.d(TAG, "end writing");
        } catch (IOException e) {
            Log.e(TAG, "IO exception while writing file", e);
        } catch (UnsupportedFormatException e) {
            Log.e(TAG, "Unsupported format", e);
        }
    }

    /**
     * Constructs a new FusionDictionary from BigramDictionaryInterface.
     */
    @UsedForTesting
    static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict,
            final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) {
        final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
                new FusionDictionary.DictionaryOptions(options));
        int profTotal = 0;
        for (final String word1 : bigrams.keySet()) {
            final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
            for (final String word2 : word1Bigrams.keySet()) {
                final int freq = dict.getFrequency(word1, word2);
                if (freq == -1) {
                    // don't add this bigram.
                    continue;
                }
                if (DEBUG) {
                    if (word1 == null) {
                        Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
                    } else {
                        Log.d(TAG, "add bigram: " + word1
                                + "," + word2 + "," + Integer.toString(freq));
                    }
                    profTotal++;
                }
                if (word1 == null) { // unigram
                    fusionDict.add(word2, freq, null, false /* isNotAWord */);
                } else { // bigram
                    if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
                        fusionDict.add(word1, 2, null, false /* isNotAWord */);
                    }
                    fusionDict.setBigram(word1, word2, freq);
                }
                bigrams.updateBigram(word1, word2, (byte)freq);
            }
        }
        if (DEBUG) {
            Log.d(TAG, "add " + profTotal + "words");
        }
        return fusionDict;
    }

    /**
     * Reads dictionary from file.
     */
    public static void readDictionaryBinary(final DictDecoder dictDecoder,
            final OnAddWordListener dict) {
        final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap();
        final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
        final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
        try {
            dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams);
        } catch (IOException e) {
            Log.e(TAG, "IO exception while reading file", e);
        } catch (UnsupportedFormatException e) {
            Log.e(TAG, "Unsupported format", e);
        } catch (ArrayIndexOutOfBoundsException e) {
            Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
        }
        addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
    }

    /**
     * Adds all unigrams and bigrams in maps to OnAddWordListener.
     */
    @UsedForTesting
    static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams,
            final TreeMap<Integer, Integer> frequencies,
            final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams,
            final OnAddWordListener to) {
        for (Entry<Integer, String> entry : unigrams.entrySet()) {
            final String word1 = entry.getValue();
            final int unigramFrequency = frequencies.get(entry.getKey());
            to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
            final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
            if (attrList != null) {
                for (final PendingAttribute attr : attrList) {
                    final String word2 = unigrams.get(attr.mAddress);
                    if (word1 == null || word2 == null) {
                        Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2);
                        continue;
                    }
                    to.setBigram(word1, word2,
                            BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
                                    attr.mFrequency));
                }
            }
        }

    }
}
+46 −1
Original line number Diff line number Diff line
@@ -41,7 +41,7 @@ public class WordProperty {
    // package.
    public static final class ProbabilityInfo {
        public final int mProbability;
        // wTimestamp, mLevel and mCount are historical info. These values are depend on the
        // mTimestamp, mLevel and mCount are historical info. These values are depend on the
        // implementation in native code; thus, we must not use them and have any assumptions about
        // them except for tests.
        public final int mTimestamp;
@@ -54,6 +54,11 @@ public class WordProperty {
            mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX];
            mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX];
        }

        @Override
        public String toString() {
            return mTimestamp + ":" + mLevel + ":" + mCount;
        }
    }

    private static int getCodePointCount(final int[] codePoints) {
@@ -105,4 +110,44 @@ public class WordProperty {
    public boolean isValid() {
        return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
    }

    @Override
    public String toString() {
        // TODO: Move this logic to CombinedInputOutput.
        final StringBuffer builder = new StringBuffer();
        builder.append(" word=" + mCodePoints);
        builder.append(",");
        builder.append("f=" + mProbabilityInfo.mProbability);
        if (mIsNotAWord) {
            builder.append(",");
            builder.append("not_a_word=true");
        }
        if (mIsBlacklisted) {
            builder.append(",");
            builder.append("blacklisted=true");
        }
        if (mProbabilityInfo.mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
            builder.append(",");
            builder.append("historicalInfo=" + mProbabilityInfo);
        }
        builder.append("\n");
        for (int i = 0; i < mBigramTargets.size(); i++) {
            builder.append("  bigram=" + mBigramTargets.get(i).mWord);
            builder.append(",");
            builder.append("f=" + mBigramTargets.get(i).mFrequency);
            if (mBigramProbabilityInfo.get(i).mTimestamp
                    != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
                builder.append(",");
                builder.append("historicalInfo=" + mBigramProbabilityInfo.get(i));
            }
            builder.append("\n");
        }
        for (int i = 0; i < mShortcutTargets.size(); i++) {
            builder.append("  shortcut=" + mShortcutTargets.get(i).mWord);
            builder.append(",");
            builder.append("f=" + mShortcutTargets.get(i).mFrequency);
            builder.append("\n");
        }
        return builder.toString();
    }
}
 No newline at end of file
Loading