Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d79fd29f authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Make dumpAllWordsForDebug() use getNextWordProperty().

Bug: 11736680
Bug: 12810574
Change-Id: I32a388e23ee7da5d7291e88a46c90b61d2f25dee
parent 94173469
Loading
Loading
Loading
Loading
+17 −8
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.FileUtils;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor;
import com.android.inputmethod.latin.utils.WordProperty;

import java.io.File;
import java.util.ArrayList;
@@ -778,16 +779,24 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
    }

    @UsedForTesting
    protected void runAfterGcForDebug(final Runnable r) {
        getExecutor(mDictName).executePrioritized(new Runnable() {
    public void dumpAllWordsForDebug() {
        reloadDictionaryIfRequired();
        getExecutor(mDictName).execute(new Runnable() {
            @Override
            public void run() {
                try {
                    mBinaryDictionary.flushWithGC();
                    r.run();
                } finally {
                    mDictNameDictionaryUpdateController.mProcessingLargeTask.set(false);
                }
                Log.d(TAG, "dictionary=" + mDictName);
                int token = 0;
                do {
                    final BinaryDictionary.GetNextWordPropertyResult result =
                            mBinaryDictionary.getNextWordProperty(token);
                    final WordProperty wordProperty = result.mWordProperty;
                    if (wordProperty == null) {
                        Log.d(TAG, " dictionary is empty.");
                        break;
                    }
                    Log.d(TAG, wordProperty.toString());
                    token = result.mNextToken;
                } while (token != 0);
            }
        });
    }
+0 −60
Original line number Diff line number Diff line
@@ -17,21 +17,15 @@
package com.android.inputmethod.latin.personalization;

import android.content.Context;
import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
import com.android.inputmethod.latin.makedict.DictDecoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.utils.LanguageModelParam;
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Locale;
@@ -44,7 +38,6 @@ import java.util.concurrent.TimeUnit;
 */
public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary {
    private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName();
    public static final boolean DBG_SAVE_RESTORE = false;
    private static final boolean DBG_DUMP_ON_CLOSE = false;

    /** Any pair being typed or picked */
@@ -53,8 +46,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
    public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
    public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;

    public static final int REQUIRED_BINARY_DICTIONARY_VERSION = FormatSpec.VERSION4;

    /** The locale for this dictionary. */
    public final Locale mLocale;

@@ -160,57 +151,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
        // Never loaded to memory in Java side.
    }

    @UsedForTesting
    public void dumpAllWordsForDebug() {
        runAfterGcForDebug(new Runnable() {
            @Override
            public void run() {
                dumpAllWordsForDebugLocked();
            }
        });
    }

    private void dumpAllWordsForDebugLocked() {
        Log.d(TAG, "dumpAllWordsForDebug started.");
        final OnAddWordListener listener = new OnAddWordListener() {
            @Override
            public void setUnigram(final String word, final String shortcutTarget,
                    final int frequency, final int shortcutFreq) {
                Log.d(TAG, "load unigram: " + word + "," + frequency);
            }

            @Override
            public void setBigram(final String word0, final String word1, final int frequency) {
                if (word0.length() < Constants.DICTIONARY_MAX_WORD_LENGTH
                        && word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) {
                    Log.d(TAG, "load bigram: " + word0 + "," + word1 + "," + frequency);
                } else {
                    Log.d(TAG, "Skip inserting a too long bigram: " + word0 + "," + word1 + ","
                            + frequency);
                }
            }
        };

        // Load the dictionary from binary file
        final File dictFile = new File(mContext.getFilesDir(), mDictName);
        final DictDecoder dictDecoder = FormatSpec.getDictDecoder(dictFile,
                DictDecoder.USE_BYTEARRAY);
        if (dictDecoder == null) {
            // This is an expected condition: we don't have a user history dictionary for this
            // language yet. It will be created sometime later.
            return;
        }

        try {
            dictDecoder.openDictBuffer();
            UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
        } catch (IOException e) {
            Log.d(TAG, "IOException on opening a bytebuffer", e);
        } catch (UnsupportedFormatException e) {
            Log.d(TAG, "Unsupported format, can't read the dictionary", e);
        }
    }

    @UsedForTesting
    public void clearAndFlushDictionary() {
        // Clear the node structure on memory
+0 −128
Original line number Diff line number Diff line
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.personalization;

import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.utils.CollectionUtils;

import java.util.HashMap;
import java.util.Set;

/**
 * A store of bigrams which will be updated when the user history dictionary is closed
 * All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale
 * bigrams when we write to the SQL DB.
 */
@UsedForTesting
public final class UserHistoryDictionaryBigramList {
    public static final byte FORGETTING_CURVE_INITIAL_VALUE = 0;
    private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName();
    private static final HashMap<String, Byte> EMPTY_BIGRAM_MAP = CollectionUtils.newHashMap();
    private final HashMap<String, HashMap<String, Byte>> mBigramMap = CollectionUtils.newHashMap();
    private int mSize = 0;

    public void evictAll() {
        mSize = 0;
        mBigramMap.clear();
    }

    /**
     * Called when the user typed a word.
     */
    @UsedForTesting
    public void addBigram(String word1, String word2) {
        addBigram(word1, word2, FORGETTING_CURVE_INITIAL_VALUE);
    }

    /**
     * Called when loaded from the SQL DB.
     */
    public void addBigram(String word1, String word2, byte fcValue) {
        if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) {
            Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue);
        }
        final HashMap<String, Byte> map;
        if (mBigramMap.containsKey(word1)) {
            map = mBigramMap.get(word1);
        } else {
            map = CollectionUtils.newHashMap();
            mBigramMap.put(word1, map);
        }
        if (!map.containsKey(word2)) {
            ++mSize;
            map.put(word2, fcValue);
        }
    }

    /**
     * Called when inserted to the SQL DB.
     */
    public void updateBigram(String word1, String word2, byte fcValue) {
        if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) {
            Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue);
        }
        final HashMap<String, Byte> map;
        if (mBigramMap.containsKey(word1)) {
            map = mBigramMap.get(word1);
        } else {
            return;
        }
        if (!map.containsKey(word2)) {
            return;
        }
        map.put(word2, fcValue);
    }

    public int size() {
        return mSize;
    }

    public boolean isEmpty() {
        return mBigramMap.isEmpty();
    }

    public boolean containsKey(String word) {
        return mBigramMap.containsKey(word);
    }

    public Set<String> keySet() {
        return mBigramMap.keySet();
    }

    public HashMap<String, Byte> getBigrams(String word1) {
        if (mBigramMap.containsKey(word1)) return mBigramMap.get(word1);
        // TODO: lower case according to locale
        final String lowerWord1 = word1.toLowerCase();
        if (mBigramMap.containsKey(lowerWord1)) return mBigramMap.get(lowerWord1);
        return EMPTY_BIGRAM_MAP;
    }

    public boolean removeBigram(String word1, String word2) {
        final HashMap<String, Byte> set = getBigrams(word1);
        if (set.isEmpty()) {
            return false;
        }
        if (set.containsKey(word2)) {
            set.remove(word2);
            --mSize;
            return true;
        }
        return false;
    }
}
+0 −181
Original line number Diff line number Diff line
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.utils;

import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.DictDecoder;
import com.android.inputmethod.latin.makedict.DictEncoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.PendingAttribute;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;

/**
 * Reads and writes Binary files for a UserHistoryDictionary.
 *
 * All the methods in this class are static.
 */
public final class UserHistoryDictIOUtils {
    private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
    private static final boolean DEBUG = false;

    public interface OnAddWordListener {
        /**
         * Callback to be notified when a word is added to the dictionary.
         * @param word The added word.
         * @param shortcutTarget A shortcut target for this word, or null if none.
         * @param frequency The frequency for this word.
         * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
         *   Unspecified if shortcutTarget is null - do not rely on its value.
         */
        public void setUnigram(final String word, final String shortcutTarget, final int frequency,
                final int shortcutFreq);
        public void setBigram(final String word1, final String word2, final int frequency);
    }

    @UsedForTesting
    public interface BigramDictionaryInterface {
        public int getFrequency(final String word1, final String word2);
    }

    /**
     * Writes dictionary to file.
     */
    @UsedForTesting
    public static void writeDictionary(final DictEncoder dictEncoder,
            final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
            final FormatOptions formatOptions, final HashMap<String, String> options) {
        final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams, options);
        fusionDict.addOptionAttribute(FormatSpec.FileHeader.USES_FORGETTING_CURVE_KEY,
            FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
        fusionDict.addOptionAttribute(FormatSpec.FileHeader.DICTIONARY_DATE_KEY,
                String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
        try {
            dictEncoder.writeDictionary(fusionDict, formatOptions);
            Log.d(TAG, "end writing");
        } catch (IOException e) {
            Log.e(TAG, "IO exception while writing file", e);
        } catch (UnsupportedFormatException e) {
            Log.e(TAG, "Unsupported format", e);
        }
    }

    /**
     * Constructs a new FusionDictionary from BigramDictionaryInterface.
     */
    @UsedForTesting
    static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict,
            final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) {
        final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
                new FusionDictionary.DictionaryOptions(options));
        int profTotal = 0;
        for (final String word1 : bigrams.keySet()) {
            final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
            for (final String word2 : word1Bigrams.keySet()) {
                final int freq = dict.getFrequency(word1, word2);
                if (freq == -1) {
                    // don't add this bigram.
                    continue;
                }
                if (DEBUG) {
                    if (word1 == null) {
                        Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
                    } else {
                        Log.d(TAG, "add bigram: " + word1
                                + "," + word2 + "," + Integer.toString(freq));
                    }
                    profTotal++;
                }
                if (word1 == null) { // unigram
                    fusionDict.add(word2, freq, null, false /* isNotAWord */);
                } else { // bigram
                    if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
                        fusionDict.add(word1, 2, null, false /* isNotAWord */);
                    }
                    fusionDict.setBigram(word1, word2, freq);
                }
                bigrams.updateBigram(word1, word2, (byte)freq);
            }
        }
        if (DEBUG) {
            Log.d(TAG, "add " + profTotal + "words");
        }
        return fusionDict;
    }

    /**
     * Reads dictionary from file.
     */
    public static void readDictionaryBinary(final DictDecoder dictDecoder,
            final OnAddWordListener dict) {
        final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap();
        final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
        final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
        try {
            dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams);
        } catch (IOException e) {
            Log.e(TAG, "IO exception while reading file", e);
        } catch (UnsupportedFormatException e) {
            Log.e(TAG, "Unsupported format", e);
        } catch (ArrayIndexOutOfBoundsException e) {
            Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
        }
        addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
    }

    /**
     * Adds all unigrams and bigrams in maps to OnAddWordListener.
     */
    @UsedForTesting
    static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams,
            final TreeMap<Integer, Integer> frequencies,
            final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams,
            final OnAddWordListener to) {
        for (Entry<Integer, String> entry : unigrams.entrySet()) {
            final String word1 = entry.getValue();
            final int unigramFrequency = frequencies.get(entry.getKey());
            to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
            final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
            if (attrList != null) {
                for (final PendingAttribute attr : attrList) {
                    final String word2 = unigrams.get(attr.mAddress);
                    if (word1 == null || word2 == null) {
                        Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2);
                        continue;
                    }
                    to.setBigram(word1, word2,
                            BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
                                    attr.mFrequency));
                }
            }
        }

    }
}
+46 −1
Original line number Diff line number Diff line
@@ -41,7 +41,7 @@ public class WordProperty {
    // package.
    public static final class ProbabilityInfo {
        public final int mProbability;
        // wTimestamp, mLevel and mCount are historical info. These values are depend on the
        // mTimestamp, mLevel and mCount are historical info. These values are depend on the
        // implementation in native code; thus, we must not use them and have any assumptions about
        // them except for tests.
        public final int mTimestamp;
@@ -54,6 +54,11 @@ public class WordProperty {
            mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX];
            mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX];
        }

        @Override
        public String toString() {
            return mTimestamp + ":" + mLevel + ":" + mCount;
        }
    }

    private static int getCodePointCount(final int[] codePoints) {
@@ -105,4 +110,44 @@ public class WordProperty {
    public boolean isValid() {
        return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
    }

    @Override
    public String toString() {
        // TODO: Move this logic to CombinedInputOutput.
        final StringBuffer builder = new StringBuffer();
        builder.append(" word=" + mCodePoints);
        builder.append(",");
        builder.append("f=" + mProbabilityInfo.mProbability);
        if (mIsNotAWord) {
            builder.append(",");
            builder.append("not_a_word=true");
        }
        if (mIsBlacklisted) {
            builder.append(",");
            builder.append("blacklisted=true");
        }
        if (mProbabilityInfo.mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
            builder.append(",");
            builder.append("historicalInfo=" + mProbabilityInfo);
        }
        builder.append("\n");
        for (int i = 0; i < mBigramTargets.size(); i++) {
            builder.append("  bigram=" + mBigramTargets.get(i).mWord);
            builder.append(",");
            builder.append("f=" + mBigramTargets.get(i).mFrequency);
            if (mBigramProbabilityInfo.get(i).mTimestamp
                    != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
                builder.append(",");
                builder.append("historicalInfo=" + mBigramProbabilityInfo.get(i));
            }
            builder.append("\n");
        }
        for (int i = 0; i < mShortcutTargets.size(); i++) {
            builder.append("  shortcut=" + mShortcutTargets.get(i).mWord);
            builder.append(",");
            builder.append("f=" + mShortcutTargets.get(i).mFrequency);
            builder.append("\n");
        }
        return builder.toString();
    }
}
 No newline at end of file
Loading