Merge "Make dumpAllWordsForDebug() use getNextWordProperty()." (99b7242f) · Commits · e / os / android_packages_inputmethods_LatinIME

java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java

+17 −8

Original line number	Diff line number	Diff line
		@@ -28,6 +28,7 @@ import com.android.inputmethod.latin.utils.CollectionUtils;
		import com.android.inputmethod.latin.utils.FileUtils;
		import com.android.inputmethod.latin.utils.LanguageModelParam;
		import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor;
		import com.android.inputmethod.latin.utils.WordProperty;

		import java.io.File;
		import java.util.ArrayList;
		@@ -778,16 +779,24 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
		}

		@UsedForTesting
		protected void runAfterGcForDebug(final Runnable r) {
		getExecutor(mDictName).executePrioritized(new Runnable() {
		public void dumpAllWordsForDebug() {
		reloadDictionaryIfRequired();
		getExecutor(mDictName).execute(new Runnable() {
		@Override
		public void run() {
		try {
		mBinaryDictionary.flushWithGC();
		r.run();
		} finally {
		mDictNameDictionaryUpdateController.mProcessingLargeTask.set(false);
		}
		Log.d(TAG, "dictionary=" + mDictName);
		int token = 0;
		do {
		final BinaryDictionary.GetNextWordPropertyResult result =
		mBinaryDictionary.getNextWordProperty(token);
		final WordProperty wordProperty = result.mWordProperty;
		if (wordProperty == null) {
		Log.d(TAG, " dictionary is empty.");
		break;
		}
		Log.d(TAG, wordProperty.toString());
		token = result.mNextToken;
		} while (token != 0);
		}
		});
		}

java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java

+0 −60

Original line number	Diff line number	Diff line
		@@ -17,21 +17,15 @@
		package com.android.inputmethod.latin.personalization;

		import android.content.Context;
		import android.util.Log;

		import com.android.inputmethod.annotations.UsedForTesting;
		import com.android.inputmethod.latin.Constants;
		import com.android.inputmethod.latin.Dictionary;
		import com.android.inputmethod.latin.ExpandableBinaryDictionary;
		import com.android.inputmethod.latin.makedict.DictDecoder;
		import com.android.inputmethod.latin.makedict.FormatSpec;
		import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
		import com.android.inputmethod.latin.utils.LanguageModelParam;
		import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
		import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;

		import java.io.File;
		import java.io.IOException;
		import java.util.ArrayList;
		import java.util.HashMap;
		import java.util.Locale;
		@@ -44,7 +38,6 @@ import java.util.concurrent.TimeUnit;
		*/
		public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary {
		private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName();
		public static final boolean DBG_SAVE_RESTORE = false;
		private static final boolean DBG_DUMP_ON_CLOSE = false;

		/** Any pair being typed or picked */
		@@ -53,8 +46,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
		public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
		public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;

		public static final int REQUIRED_BINARY_DICTIONARY_VERSION = FormatSpec.VERSION4;

		/** The locale for this dictionary. */
		public final Locale mLocale;

		@@ -160,57 +151,6 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
		// Never loaded to memory in Java side.
		}

		@UsedForTesting
		public void dumpAllWordsForDebug() {
		runAfterGcForDebug(new Runnable() {
		@Override
		public void run() {
		dumpAllWordsForDebugLocked();
		}
		});
		}

		private void dumpAllWordsForDebugLocked() {
		Log.d(TAG, "dumpAllWordsForDebug started.");
		final OnAddWordListener listener = new OnAddWordListener() {
		@Override
		public void setUnigram(final String word, final String shortcutTarget,
		final int frequency, final int shortcutFreq) {
		Log.d(TAG, "load unigram: " + word + "," + frequency);
		}

		@Override
		public void setBigram(final String word0, final String word1, final int frequency) {
		if (word0.length() < Constants.DICTIONARY_MAX_WORD_LENGTH
		&& word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) {
		Log.d(TAG, "load bigram: " + word0 + "," + word1 + "," + frequency);
		} else {
		Log.d(TAG, "Skip inserting a too long bigram: " + word0 + "," + word1 + ","
		+ frequency);
		}
		}
		};

		// Load the dictionary from binary file
		final File dictFile = new File(mContext.getFilesDir(), mDictName);
		final DictDecoder dictDecoder = FormatSpec.getDictDecoder(dictFile,
		DictDecoder.USE_BYTEARRAY);
		if (dictDecoder == null) {
		// This is an expected condition: we don't have a user history dictionary for this
		// language yet. It will be created sometime later.
		return;
		}

		try {
		dictDecoder.openDictBuffer();
		UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
		} catch (IOException e) {
		Log.d(TAG, "IOException on opening a bytebuffer", e);
		} catch (UnsupportedFormatException e) {
		Log.d(TAG, "Unsupported format, can't read the dictionary", e);
		}
		}

		@UsedForTesting
		public void clearAndFlushDictionary() {
		// Clear the node structure on memory

java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java

deleted100644 → 0

+0 −128

Original line number	Diff line number	Diff line
		/*
		* Copyright (C) 2012 The Android Open Source Project
		*
		* Licensed under the Apache License, Version 2.0 (the "License");
		* you may not use this file except in compliance with the License.
		* You may obtain a copy of the License at
		*
		* http://www.apache.org/licenses/LICENSE-2.0
		*
		* Unless required by applicable law or agreed to in writing, software
		* distributed under the License is distributed on an "AS IS" BASIS,
		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		* See the License for the specific language governing permissions and
		* limitations under the License.
		*/

		package com.android.inputmethod.latin.personalization;

		import android.util.Log;

		import com.android.inputmethod.annotations.UsedForTesting;
		import com.android.inputmethod.latin.utils.CollectionUtils;

		import java.util.HashMap;
		import java.util.Set;

		/**
		* A store of bigrams which will be updated when the user history dictionary is closed
		* All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale
		* bigrams when we write to the SQL DB.
		*/
		@UsedForTesting
		public final class UserHistoryDictionaryBigramList {
		public static final byte FORGETTING_CURVE_INITIAL_VALUE = 0;
		private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName();
		private static final HashMap<String, Byte> EMPTY_BIGRAM_MAP = CollectionUtils.newHashMap();
		private final HashMap<String, HashMap<String, Byte>> mBigramMap = CollectionUtils.newHashMap();
		private int mSize = 0;

		public void evictAll() {
		mSize = 0;
		mBigramMap.clear();
		}

		/**
		* Called when the user typed a word.
		*/
		@UsedForTesting
		public void addBigram(String word1, String word2) {
		addBigram(word1, word2, FORGETTING_CURVE_INITIAL_VALUE);
		}

		/**
		* Called when loaded from the SQL DB.
		*/
		public void addBigram(String word1, String word2, byte fcValue) {
		if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) {
		Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue);
		}
		final HashMap<String, Byte> map;
		if (mBigramMap.containsKey(word1)) {
		map = mBigramMap.get(word1);
		} else {
		map = CollectionUtils.newHashMap();
		mBigramMap.put(word1, map);
		}
		if (!map.containsKey(word2)) {
		++mSize;
		map.put(word2, fcValue);
		}
		}

		/**
		* Called when inserted to the SQL DB.
		*/
		public void updateBigram(String word1, String word2, byte fcValue) {
		if (DecayingExpandableBinaryDictionaryBase.DBG_SAVE_RESTORE) {
		Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue);
		}
		final HashMap<String, Byte> map;
		if (mBigramMap.containsKey(word1)) {
		map = mBigramMap.get(word1);
		} else {
		return;
		}
		if (!map.containsKey(word2)) {
		return;
		}
		map.put(word2, fcValue);
		}

		public int size() {
		return mSize;
		}

		public boolean isEmpty() {
		return mBigramMap.isEmpty();
		}

		public boolean containsKey(String word) {
		return mBigramMap.containsKey(word);
		}

		public Set<String> keySet() {
		return mBigramMap.keySet();
		}

		public HashMap<String, Byte> getBigrams(String word1) {
		if (mBigramMap.containsKey(word1)) return mBigramMap.get(word1);
		// TODO: lower case according to locale
		final String lowerWord1 = word1.toLowerCase();
		if (mBigramMap.containsKey(lowerWord1)) return mBigramMap.get(lowerWord1);
		return EMPTY_BIGRAM_MAP;
		}

		public boolean removeBigram(String word1, String word2) {
		final HashMap<String, Byte> set = getBigrams(word1);
		if (set.isEmpty()) {
		return false;
		}
		if (set.containsKey(word2)) {
		set.remove(word2);
		--mSize;
		return true;
		}
		return false;
		}
		}

java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java

deleted100644 → 0

+0 −181

Original line number	Diff line number	Diff line
		/*
		* Copyright (C) 2012 The Android Open Source Project
		*
		* Licensed under the Apache License, Version 2.0 (the "License");
		* you may not use this file except in compliance with the License.
		* You may obtain a copy of the License at
		*
		* http://www.apache.org/licenses/LICENSE-2.0
		*
		* Unless required by applicable law or agreed to in writing, software
		* distributed under the License is distributed on an "AS IS" BASIS,
		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		* See the License for the specific language governing permissions and
		* limitations under the License.
		*/

		package com.android.inputmethod.latin.utils;

		import android.util.Log;

		import com.android.inputmethod.annotations.UsedForTesting;
		import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
		import com.android.inputmethod.latin.makedict.DictDecoder;
		import com.android.inputmethod.latin.makedict.DictEncoder;
		import com.android.inputmethod.latin.makedict.FormatSpec;
		import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
		import com.android.inputmethod.latin.makedict.FusionDictionary;
		import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
		import com.android.inputmethod.latin.makedict.PendingAttribute;
		import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
		import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;

		import java.io.IOException;
		import java.util.ArrayList;
		import java.util.HashMap;
		import java.util.Map.Entry;
		import java.util.TreeMap;
		import java.util.concurrent.TimeUnit;

		/**
		* Reads and writes Binary files for a UserHistoryDictionary.
		*
		* All the methods in this class are static.
		*/
		public final class UserHistoryDictIOUtils {
		private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
		private static final boolean DEBUG = false;

		public interface OnAddWordListener {
		/**
		* Callback to be notified when a word is added to the dictionary.
		* @param word The added word.
		* @param shortcutTarget A shortcut target for this word, or null if none.
		* @param frequency The frequency for this word.
		* @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
		* Unspecified if shortcutTarget is null - do not rely on its value.
		*/
		public void setUnigram(final String word, final String shortcutTarget, final int frequency,
		final int shortcutFreq);
		public void setBigram(final String word1, final String word2, final int frequency);
		}

		@UsedForTesting
		public interface BigramDictionaryInterface {
		public int getFrequency(final String word1, final String word2);
		}

		/**
		* Writes dictionary to file.
		*/
		@UsedForTesting
		public static void writeDictionary(final DictEncoder dictEncoder,
		final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
		final FormatOptions formatOptions, final HashMap<String, String> options) {
		final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams, options);
		fusionDict.addOptionAttribute(FormatSpec.FileHeader.USES_FORGETTING_CURVE_KEY,
		FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
		fusionDict.addOptionAttribute(FormatSpec.FileHeader.DICTIONARY_DATE_KEY,
		String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
		try {
		dictEncoder.writeDictionary(fusionDict, formatOptions);
		Log.d(TAG, "end writing");
		} catch (IOException e) {
		Log.e(TAG, "IO exception while writing file", e);
		} catch (UnsupportedFormatException e) {
		Log.e(TAG, "Unsupported format", e);
		}
		}

		/**
		* Constructs a new FusionDictionary from BigramDictionaryInterface.
		*/
		@UsedForTesting
		static FusionDictionary constructFusionDictionary(final BigramDictionaryInterface dict,
		final UserHistoryDictionaryBigramList bigrams, final HashMap<String, String> options) {
		final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
		new FusionDictionary.DictionaryOptions(options));
		int profTotal = 0;
		for (final String word1 : bigrams.keySet()) {
		final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
		for (final String word2 : word1Bigrams.keySet()) {
		final int freq = dict.getFrequency(word1, word2);
		if (freq == -1) {
		// don't add this bigram.
		continue;
		}
		if (DEBUG) {
		if (word1 == null) {
		Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
		} else {
		Log.d(TAG, "add bigram: " + word1
		+ "," + word2 + "," + Integer.toString(freq));
		}
		profTotal++;
		}
		if (word1 == null) { // unigram
		fusionDict.add(word2, freq, null, false /* isNotAWord */);
		} else { // bigram
		if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
		fusionDict.add(word1, 2, null, false /* isNotAWord */);
		}
		fusionDict.setBigram(word1, word2, freq);
		}
		bigrams.updateBigram(word1, word2, (byte)freq);
		}
		}
		if (DEBUG) {
		Log.d(TAG, "add " + profTotal + "words");
		}
		return fusionDict;
		}

		/**
		* Reads dictionary from file.
		*/
		public static void readDictionaryBinary(final DictDecoder dictDecoder,
		final OnAddWordListener dict) {
		final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap();
		final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
		final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
		try {
		dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams);
		} catch (IOException e) {
		Log.e(TAG, "IO exception while reading file", e);
		} catch (UnsupportedFormatException e) {
		Log.e(TAG, "Unsupported format", e);
		} catch (ArrayIndexOutOfBoundsException e) {
		Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
		}
		addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
		}

		/**
		* Adds all unigrams and bigrams in maps to OnAddWordListener.
		*/
		@UsedForTesting
		static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams,
		final TreeMap<Integer, Integer> frequencies,
		final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams,
		final OnAddWordListener to) {
		for (Entry<Integer, String> entry : unigrams.entrySet()) {
		final String word1 = entry.getValue();
		final int unigramFrequency = frequencies.get(entry.getKey());
		to.setUnigram(word1, null /* shortcutTarget /, unigramFrequency, 0 / shortcutFreq */);
		final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
		if (attrList != null) {
		for (final PendingAttribute attr : attrList) {
		final String word2 = unigrams.get(attr.mAddress);
		if (word1 == null \|\| word2 == null) {
		Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2);
		continue;
		}
		to.setBigram(word1, word2,
		BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
		attr.mFrequency));
		}
		}
		}

		}
		}

java/src/com/android/inputmethod/latin/utils/WordProperty.java

+46 −1

Original line number	Diff line number	Diff line
		@@ -41,7 +41,7 @@ public class WordProperty {
		// package.
		public static final class ProbabilityInfo {
		public final int mProbability;
		// wTimestamp, mLevel and mCount are historical info. These values are depend on the
		// mTimestamp, mLevel and mCount are historical info. These values are depend on the
		// implementation in native code; thus, we must not use them and have any assumptions about
		// them except for tests.
		public final int mTimestamp;
		@@ -54,6 +54,11 @@ public class WordProperty {
		mLevel = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX];
		mCount = probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX];
		}

		@Override
		public String toString() {
		return mTimestamp + ":" + mLevel + ":" + mCount;
		}
		}

		private static int getCodePointCount(final int[] codePoints) {
		@@ -105,4 +110,44 @@ public class WordProperty {
		public boolean isValid() {
		return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
		}

		@Override
		public String toString() {
		// TODO: Move this logic to CombinedInputOutput.
		final StringBuffer builder = new StringBuffer();
		builder.append(" word=" + mCodePoints);
		builder.append(",");
		builder.append("f=" + mProbabilityInfo.mProbability);
		if (mIsNotAWord) {
		builder.append(",");
		builder.append("not_a_word=true");
		}
		if (mIsBlacklisted) {
		builder.append(",");
		builder.append("blacklisted=true");
		}
		if (mProbabilityInfo.mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
		builder.append(",");
		builder.append("historicalInfo=" + mProbabilityInfo);
		}
		builder.append("\n");
		for (int i = 0; i < mBigramTargets.size(); i++) {
		builder.append(" bigram=" + mBigramTargets.get(i).mWord);
		builder.append(",");
		builder.append("f=" + mBigramTargets.get(i).mFrequency);
		if (mBigramProbabilityInfo.get(i).mTimestamp
		!= BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
		builder.append(",");
		builder.append("historicalInfo=" + mBigramProbabilityInfo.get(i));
		}
		builder.append("\n");
		}
		for (int i = 0; i < mShortcutTargets.size(); i++) {
		builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
		builder.append(",");
		builder.append("f=" + mShortcutTargets.get(i).mFrequency);
		builder.append("\n");
		}
		return builder.toString();
		}
		}
		No newline at end of file