Loading java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java 0 → 100644 +193 −0 Original line number Diff line number Diff line /* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.android.inputmethod.latin; import android.util.Log; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.PendingAttribute; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; /** * Reads and writes Binary files for a UserHistoryDictionary. * * All the methods in this class are static. */ public class UserHistoryDictIOUtils { private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName(); private static final boolean DEBUG = false; public interface OnAddWordListener { public void setUnigram(final String word, final String shortcutTarget, final int frequency); public void setBigram(final String word1, final String word2, final int frequency); } public interface BigramDictionaryInterface { public int getFrequency(final String word1, final String word2); } public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface { private byte[] mBuffer; private int mPosition; ByteArrayWrapper(final byte[] buffer) { mBuffer = buffer; mPosition = 0; } @Override public int readUnsignedByte() { return ((int)mBuffer[mPosition++]) & 0xFF; } @Override public int readUnsignedShort() { final int retval = readUnsignedByte(); return (retval << 8) + readUnsignedByte(); } @Override public int readUnsignedInt24() { final int retval = readUnsignedShort(); return (retval << 8) + readUnsignedByte(); } @Override public int readInt() { final int retval = readUnsignedShort(); return (retval << 16) + readUnsignedShort(); } @Override public int position() { return mPosition; } @Override public void position(int position) { mPosition = position; } } /** * Writes dictionary to file. */ public static void writeDictionaryBinary(final OutputStream destination, final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, final int version) { final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); try { BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version); } catch (IOException e) { Log.e(TAG, "IO exception while writing file: " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported fomat: " + e); } } /** * Constructs a new FusionDictionary from BigramDictionaryInterface. */ /* packages for test */ static FusionDictionary constructFusionDictionary( final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { final FusionDictionary fusionDict = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions( new HashMap<String,String>(), false, false)); for (final String word1 : bigrams.keySet()) { final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1); for (final String word2 : word1Bigrams.keySet()) { final int freq = dict.getFrequency(word1, word2); if (DEBUG) { if (word1 == null) { Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); } else { Log.d(TAG, "add bigram: " + word1 + "," + word2 + "," + Integer.toString(freq)); } } if (word1 == null) { // unigram fusionDict.add(word2, freq, null); } else { // bigram fusionDict.setBigram(word1, word2, freq); } bigrams.updateBigram(word1, word2, (byte)freq); } } return fusionDict; } /** * Reads dictionary from file. */ public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer, final OnAddWordListener dict) { final Map<Integer, String> unigrams = CollectionUtils.newTreeMap(); final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); try { BinaryDictInputOutput.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies, bigrams); addWordsFromWordMap(unigrams, frequencies, bigrams, dict); } catch (IOException e) { Log.e(TAG, "IO exception while reading file: " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported format: " + e); } } /** * Adds all unigrams and bigrams in maps to OnAddWordListener. */ /* package for test */ static void addWordsFromWordMap(final Map<Integer, String> unigrams, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) { for (Map.Entry<Integer, String> entry : unigrams.entrySet()) { final String word1 = entry.getValue(); final int unigramFrequency = frequencies.get(entry.getKey()); to.setUnigram(word1, null, unigramFrequency); final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey()); if (attrList != null) { for (final PendingAttribute attr : attrList) { to.setBigram(word1, unigrams.get(attr.mAddress), BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency, attr.mFrequency)); } } } } } No newline at end of file java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +2 −11 Original line number Diff line number Diff line Loading @@ -189,7 +189,7 @@ public class BinaryDictInputOutput { // suspicion that a bug might be causing an infinite loop. private static final int MAX_PASSES = 24; private interface FusionDictionaryBufferInterface { public interface FusionDictionaryBufferInterface { public int readUnsignedByte(); public int readUnsignedShort(); public int readUnsignedInt24(); Loading Loading @@ -234,7 +234,6 @@ public class BinaryDictInputOutput { @Override public void position(int newPos) { mBuffer.position(newPos); return; } } Loading Loading @@ -1393,7 +1392,6 @@ public class BinaryDictInputOutput { final FusionDictionaryBufferInterface buffer, final int headerSize, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) { int[] pushedChars = new int[MAX_WORD_LENGTH + 1]; Stack<Position> stack = new Stack<Position>(); Loading Loading @@ -1443,8 +1441,6 @@ public class BinaryDictInputOutput { stack.push(childrenPos); } } return; } /** Loading @@ -1462,7 +1458,6 @@ public class BinaryDictInputOutput { final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header final int version = checkFormatVersion(buffer); final int optionsFlags = buffer.readUnsignedShort(); Loading Loading @@ -1507,10 +1502,8 @@ public class BinaryDictInputOutput { * @throws UnsupportedFormatException */ private static int readHeader(final FusionDictionaryBufferInterface buffer, final HashMap<String, String> options, final int version) final HashMap<String, String> options, final int version) throws IOException, UnsupportedFormatException { final int headerSize; if (version < FIRST_VERSION_WITH_HEADER_SIZE) { headerSize = buffer.position(); Loading @@ -1523,7 +1516,6 @@ public class BinaryDictInputOutput { if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } return headerSize; } Loading Loading @@ -1561,7 +1553,6 @@ public class BinaryDictInputOutput { public static FusionDictionary readDictionaryBinary( final FusionDictionaryBufferInterface buffer, final FusionDictionary dict) throws IOException, UnsupportedFormatException { // clear cache wordCache.clear(); Loading tests/src/com/android/inputmethod/latin/UserHistoryDictIOUtilsTests.java 0 → 100644 +249 −0 Original line number Diff line number Diff line /* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin; import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface; import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import android.content.Context; import android.test.AndroidTestCase; import android.util.Log; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; /** * Unit tests for UserHistoryDictIOUtils */ public class UserHistoryDictIOUtilsTests extends AndroidTestCase implements BigramDictionaryInterface { private static final String TAG = UserHistoryDictIOUtilsTests.class.getSimpleName(); private static final int UNIGRAM_FREQUENCY = 50; private static final int BIGRAM_FREQUENCY = 100; private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>(); /** * Return same frequency for all words and bigrams */ @Override public int getFrequency(String word1, String word2) { if (word1 == null) return UNIGRAM_FREQUENCY; return BIGRAM_FREQUENCY; } // Utilities for Testing private void addWord(final String word, final HashMap<String, ArrayList<String> > addedWords) { if (!addedWords.containsKey(word)) { addedWords.put(word, new ArrayList<String>()); } } private void addBigram(final String word1, final String word2, final HashMap<String, ArrayList<String> > addedWords) { addWord(word1, addedWords); addWord(word2, addedWords); addedWords.get(word1).add(word2); } private void addBigramToBigramList(final String word1, final String word2, final HashMap<String, ArrayList<String> > addedWords, final UserHistoryDictionaryBigramList bigramList) { bigramList.addBigram(null, word1); bigramList.addBigram(word1, word2); addBigram(word1, word2, addedWords); } private void checkWordInFusionDict(final FusionDictionary dict, final String word, final ArrayList<String> expectedBigrams) { final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word); assertNotNull(group); assertTrue(group.isTerminal()); for (final String bigram : expectedBigrams) { assertNotNull(group.getBigram(bigram)); } } private void checkWordsInFusionDict(final FusionDictionary dict, final HashMap<String, ArrayList<String> > bigrams) { for (final String word : bigrams.keySet()) { if (bigrams.containsKey(word)) { checkWordInFusionDict(dict, word, bigrams.get(word)); } else { checkWordInFusionDict(dict, word, NOT_HAVE_BIGRAM); } } } private void checkWordInBigramList( final UserHistoryDictionaryBigramList bigramList, final String word, final ArrayList<String> expectedBigrams) { // check unigram final HashMap<String,Byte> unigramMap = bigramList.getBigrams(null); assertTrue(unigramMap.containsKey(word)); // check bigrams final ArrayList<String> actualBigrams = new ArrayList<String>( bigramList.getBigrams(word).keySet()); Collections.sort(expectedBigrams); Collections.sort(actualBigrams); assertEquals(expectedBigrams, actualBigrams); } private void checkWordsInBigramList(final UserHistoryDictionaryBigramList bigramList, final HashMap<String, ArrayList<String> > addedWords) { for (final String word : addedWords.keySet()) { if (addedWords.containsKey(word)) { checkWordInBigramList(bigramList, word, addedWords.get(word)); } else { checkWordInBigramList(bigramList, word, NOT_HAVE_BIGRAM); } } } private void writeDictToFile(final File file, final UserHistoryDictionaryBigramList bigramList) { try { final FileOutputStream out = new FileOutputStream(file); UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, 2); out.flush(); out.close(); } catch (IOException e) { Log.e(TAG, "IO exception while writing file: " + e); } } private void readDictFromFile(final File file, final OnAddWordListener listener) { FileInputStream inStream = null; try { inStream = new FileInputStream(file); final byte[] buffer = new byte[(int)file.length()]; inStream.read(buffer); UserHistoryDictIOUtils.readDictionaryBinary( new UserHistoryDictIOUtils.ByteArrayWrapper(buffer), listener); } catch (FileNotFoundException e) { Log.e(TAG, "file not found: " + e); } catch (IOException e) { Log.e(TAG, "IOException: " + e); } finally { if (inStream != null) { try { inStream.close(); } catch (IOException e) { // do nothing } } } } public void testGenerateFusionDictionary() { final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); final HashMap<String, ArrayList<String> > addedWords = new HashMap<String, ArrayList<String>>(); addBigramToBigramList("this", "is", addedWords, originalList); addBigramToBigramList("this", "was", addedWords, originalList); addBigramToBigramList("hello", "world", addedWords, originalList); final FusionDictionary fusionDict = UserHistoryDictIOUtils.constructFusionDictionary(this, originalList); checkWordsInFusionDict(fusionDict, addedWords); } public void testReadAndWrite() { final Context context = getContext(); File file = null; try { file = File.createTempFile("testReadAndWrite", ".dict"); } catch (IOException e) { Log.d(TAG, "IOException while creating a temporary file: " + e); } assertNotNull(file); // make original dictionary final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); final HashMap<String, ArrayList<String>> addedWords = CollectionUtils.newHashMap(); addBigramToBigramList("this" , "is" , addedWords, originalList); addBigramToBigramList("this" , "was" , addedWords, originalList); addBigramToBigramList("is" , "not" , addedWords, originalList); addBigramToBigramList("hello", "world", addedWords, originalList); // write to file writeDictToFile(file, originalList); // make result dict. final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList(); final OnAddWordListener listener = new OnAddWordListener() { @Override public void setUnigram(final String word, final String shortcutTarget, final int frequency) { Log.d(TAG, "in: setUnigram: " + word + "," + frequency); resultList.addBigram(null, word, (byte)frequency); } @Override public void setBigram(final String word1, final String word2, final int frequency) { Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); resultList.addBigram(word1, word2, (byte)frequency); } }; // load from file readDictFromFile(file, listener); checkWordsInBigramList(resultList, addedWords); // add new bigram addBigramToBigramList("hello", "java", addedWords, resultList); // rewrite writeDictToFile(file, resultList); final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList(); final OnAddWordListener listener2 = new OnAddWordListener() { @Override public void setUnigram(final String word, final String shortcutTarget, final int frequency) { Log.d(TAG, "in: setUnigram: " + word + "," + frequency); resultList2.addBigram(null, word, (byte)frequency); } @Override public void setBigram(final String word1, final String word2, final int frequency) { Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); resultList2.addBigram(word1, word2, (byte)frequency); } }; // load from file readDictFromFile(file, listener2); checkWordsInBigramList(resultList2, addedWords); } } Loading
java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java 0 → 100644 +193 −0 Original line number Diff line number Diff line /* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.android.inputmethod.latin; import android.util.Log; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.PendingAttribute; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; /** * Reads and writes Binary files for a UserHistoryDictionary. * * All the methods in this class are static. */ public class UserHistoryDictIOUtils { private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName(); private static final boolean DEBUG = false; public interface OnAddWordListener { public void setUnigram(final String word, final String shortcutTarget, final int frequency); public void setBigram(final String word1, final String word2, final int frequency); } public interface BigramDictionaryInterface { public int getFrequency(final String word1, final String word2); } public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface { private byte[] mBuffer; private int mPosition; ByteArrayWrapper(final byte[] buffer) { mBuffer = buffer; mPosition = 0; } @Override public int readUnsignedByte() { return ((int)mBuffer[mPosition++]) & 0xFF; } @Override public int readUnsignedShort() { final int retval = readUnsignedByte(); return (retval << 8) + readUnsignedByte(); } @Override public int readUnsignedInt24() { final int retval = readUnsignedShort(); return (retval << 8) + readUnsignedByte(); } @Override public int readInt() { final int retval = readUnsignedShort(); return (retval << 16) + readUnsignedShort(); } @Override public int position() { return mPosition; } @Override public void position(int position) { mPosition = position; } } /** * Writes dictionary to file. */ public static void writeDictionaryBinary(final OutputStream destination, final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, final int version) { final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); try { BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version); } catch (IOException e) { Log.e(TAG, "IO exception while writing file: " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported fomat: " + e); } } /** * Constructs a new FusionDictionary from BigramDictionaryInterface. */ /* packages for test */ static FusionDictionary constructFusionDictionary( final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { final FusionDictionary fusionDict = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions( new HashMap<String,String>(), false, false)); for (final String word1 : bigrams.keySet()) { final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1); for (final String word2 : word1Bigrams.keySet()) { final int freq = dict.getFrequency(word1, word2); if (DEBUG) { if (word1 == null) { Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); } else { Log.d(TAG, "add bigram: " + word1 + "," + word2 + "," + Integer.toString(freq)); } } if (word1 == null) { // unigram fusionDict.add(word2, freq, null); } else { // bigram fusionDict.setBigram(word1, word2, freq); } bigrams.updateBigram(word1, word2, (byte)freq); } } return fusionDict; } /** * Reads dictionary from file. */ public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer, final OnAddWordListener dict) { final Map<Integer, String> unigrams = CollectionUtils.newTreeMap(); final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); try { BinaryDictInputOutput.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies, bigrams); addWordsFromWordMap(unigrams, frequencies, bigrams, dict); } catch (IOException e) { Log.e(TAG, "IO exception while reading file: " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported format: " + e); } } /** * Adds all unigrams and bigrams in maps to OnAddWordListener. */ /* package for test */ static void addWordsFromWordMap(final Map<Integer, String> unigrams, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) { for (Map.Entry<Integer, String> entry : unigrams.entrySet()) { final String word1 = entry.getValue(); final int unigramFrequency = frequencies.get(entry.getKey()); to.setUnigram(word1, null, unigramFrequency); final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey()); if (attrList != null) { for (final PendingAttribute attr : attrList) { to.setBigram(word1, unigrams.get(attr.mAddress), BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency, attr.mFrequency)); } } } } } No newline at end of file
java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +2 −11 Original line number Diff line number Diff line Loading @@ -189,7 +189,7 @@ public class BinaryDictInputOutput { // suspicion that a bug might be causing an infinite loop. private static final int MAX_PASSES = 24; private interface FusionDictionaryBufferInterface { public interface FusionDictionaryBufferInterface { public int readUnsignedByte(); public int readUnsignedShort(); public int readUnsignedInt24(); Loading Loading @@ -234,7 +234,6 @@ public class BinaryDictInputOutput { @Override public void position(int newPos) { mBuffer.position(newPos); return; } } Loading Loading @@ -1393,7 +1392,6 @@ public class BinaryDictInputOutput { final FusionDictionaryBufferInterface buffer, final int headerSize, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) { int[] pushedChars = new int[MAX_WORD_LENGTH + 1]; Stack<Position> stack = new Stack<Position>(); Loading Loading @@ -1443,8 +1441,6 @@ public class BinaryDictInputOutput { stack.push(childrenPos); } } return; } /** Loading @@ -1462,7 +1458,6 @@ public class BinaryDictInputOutput { final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header final int version = checkFormatVersion(buffer); final int optionsFlags = buffer.readUnsignedShort(); Loading Loading @@ -1507,10 +1502,8 @@ public class BinaryDictInputOutput { * @throws UnsupportedFormatException */ private static int readHeader(final FusionDictionaryBufferInterface buffer, final HashMap<String, String> options, final int version) final HashMap<String, String> options, final int version) throws IOException, UnsupportedFormatException { final int headerSize; if (version < FIRST_VERSION_WITH_HEADER_SIZE) { headerSize = buffer.position(); Loading @@ -1523,7 +1516,6 @@ public class BinaryDictInputOutput { if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } return headerSize; } Loading Loading @@ -1561,7 +1553,6 @@ public class BinaryDictInputOutput { public static FusionDictionary readDictionaryBinary( final FusionDictionaryBufferInterface buffer, final FusionDictionary dict) throws IOException, UnsupportedFormatException { // clear cache wordCache.clear(); Loading
tests/src/com/android/inputmethod/latin/UserHistoryDictIOUtilsTests.java 0 → 100644 +249 −0 Original line number Diff line number Diff line /* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin; import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface; import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import android.content.Context; import android.test.AndroidTestCase; import android.util.Log; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; /** * Unit tests for UserHistoryDictIOUtils */ public class UserHistoryDictIOUtilsTests extends AndroidTestCase implements BigramDictionaryInterface { private static final String TAG = UserHistoryDictIOUtilsTests.class.getSimpleName(); private static final int UNIGRAM_FREQUENCY = 50; private static final int BIGRAM_FREQUENCY = 100; private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>(); /** * Return same frequency for all words and bigrams */ @Override public int getFrequency(String word1, String word2) { if (word1 == null) return UNIGRAM_FREQUENCY; return BIGRAM_FREQUENCY; } // Utilities for Testing private void addWord(final String word, final HashMap<String, ArrayList<String> > addedWords) { if (!addedWords.containsKey(word)) { addedWords.put(word, new ArrayList<String>()); } } private void addBigram(final String word1, final String word2, final HashMap<String, ArrayList<String> > addedWords) { addWord(word1, addedWords); addWord(word2, addedWords); addedWords.get(word1).add(word2); } private void addBigramToBigramList(final String word1, final String word2, final HashMap<String, ArrayList<String> > addedWords, final UserHistoryDictionaryBigramList bigramList) { bigramList.addBigram(null, word1); bigramList.addBigram(word1, word2); addBigram(word1, word2, addedWords); } private void checkWordInFusionDict(final FusionDictionary dict, final String word, final ArrayList<String> expectedBigrams) { final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word); assertNotNull(group); assertTrue(group.isTerminal()); for (final String bigram : expectedBigrams) { assertNotNull(group.getBigram(bigram)); } } private void checkWordsInFusionDict(final FusionDictionary dict, final HashMap<String, ArrayList<String> > bigrams) { for (final String word : bigrams.keySet()) { if (bigrams.containsKey(word)) { checkWordInFusionDict(dict, word, bigrams.get(word)); } else { checkWordInFusionDict(dict, word, NOT_HAVE_BIGRAM); } } } private void checkWordInBigramList( final UserHistoryDictionaryBigramList bigramList, final String word, final ArrayList<String> expectedBigrams) { // check unigram final HashMap<String,Byte> unigramMap = bigramList.getBigrams(null); assertTrue(unigramMap.containsKey(word)); // check bigrams final ArrayList<String> actualBigrams = new ArrayList<String>( bigramList.getBigrams(word).keySet()); Collections.sort(expectedBigrams); Collections.sort(actualBigrams); assertEquals(expectedBigrams, actualBigrams); } private void checkWordsInBigramList(final UserHistoryDictionaryBigramList bigramList, final HashMap<String, ArrayList<String> > addedWords) { for (final String word : addedWords.keySet()) { if (addedWords.containsKey(word)) { checkWordInBigramList(bigramList, word, addedWords.get(word)); } else { checkWordInBigramList(bigramList, word, NOT_HAVE_BIGRAM); } } } private void writeDictToFile(final File file, final UserHistoryDictionaryBigramList bigramList) { try { final FileOutputStream out = new FileOutputStream(file); UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, 2); out.flush(); out.close(); } catch (IOException e) { Log.e(TAG, "IO exception while writing file: " + e); } } private void readDictFromFile(final File file, final OnAddWordListener listener) { FileInputStream inStream = null; try { inStream = new FileInputStream(file); final byte[] buffer = new byte[(int)file.length()]; inStream.read(buffer); UserHistoryDictIOUtils.readDictionaryBinary( new UserHistoryDictIOUtils.ByteArrayWrapper(buffer), listener); } catch (FileNotFoundException e) { Log.e(TAG, "file not found: " + e); } catch (IOException e) { Log.e(TAG, "IOException: " + e); } finally { if (inStream != null) { try { inStream.close(); } catch (IOException e) { // do nothing } } } } public void testGenerateFusionDictionary() { final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); final HashMap<String, ArrayList<String> > addedWords = new HashMap<String, ArrayList<String>>(); addBigramToBigramList("this", "is", addedWords, originalList); addBigramToBigramList("this", "was", addedWords, originalList); addBigramToBigramList("hello", "world", addedWords, originalList); final FusionDictionary fusionDict = UserHistoryDictIOUtils.constructFusionDictionary(this, originalList); checkWordsInFusionDict(fusionDict, addedWords); } public void testReadAndWrite() { final Context context = getContext(); File file = null; try { file = File.createTempFile("testReadAndWrite", ".dict"); } catch (IOException e) { Log.d(TAG, "IOException while creating a temporary file: " + e); } assertNotNull(file); // make original dictionary final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList(); final HashMap<String, ArrayList<String>> addedWords = CollectionUtils.newHashMap(); addBigramToBigramList("this" , "is" , addedWords, originalList); addBigramToBigramList("this" , "was" , addedWords, originalList); addBigramToBigramList("is" , "not" , addedWords, originalList); addBigramToBigramList("hello", "world", addedWords, originalList); // write to file writeDictToFile(file, originalList); // make result dict. final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList(); final OnAddWordListener listener = new OnAddWordListener() { @Override public void setUnigram(final String word, final String shortcutTarget, final int frequency) { Log.d(TAG, "in: setUnigram: " + word + "," + frequency); resultList.addBigram(null, word, (byte)frequency); } @Override public void setBigram(final String word1, final String word2, final int frequency) { Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); resultList.addBigram(word1, word2, (byte)frequency); } }; // load from file readDictFromFile(file, listener); checkWordsInBigramList(resultList, addedWords); // add new bigram addBigramToBigramList("hello", "java", addedWords, resultList); // rewrite writeDictToFile(file, resultList); final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList(); final OnAddWordListener listener2 = new OnAddWordListener() { @Override public void setUnigram(final String word, final String shortcutTarget, final int frequency) { Log.d(TAG, "in: setUnigram: " + word + "," + frequency); resultList2.addBigram(null, word, (byte)frequency); } @Override public void setBigram(final String word1, final String word2, final int frequency) { Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency); resultList2.addBigram(word1, word2, (byte)frequency); } }; // load from file readDictFromFile(file, listener2); checkWordsInBigramList(resultList2, addedWords); } }