Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +114 −9 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Stack; import java.util.TreeMap; /** Loading Loading @@ -197,20 +198,21 @@ public class BinaryDictInputOutput { public void position(int newPosition); } private static final class ByteBufferWrapper implements FusionDictionaryBufferInterface { private ByteBuffer buffer; ByteBufferWrapper(final ByteBuffer buffer) { this.buffer = buffer; public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface { private ByteBuffer mBuffer; public ByteBufferWrapper(final ByteBuffer buffer) { mBuffer = buffer; } @Override public int readUnsignedByte() { return ((int)buffer.get()) & 0xFF; return ((int)mBuffer.get()) & 0xFF; } @Override public int readUnsignedShort() { return ((int)buffer.getShort()) & 0xFFFF; return ((int)mBuffer.getShort()) & 0xFFFF; } @Override Loading @@ -221,17 +223,17 @@ public class BinaryDictInputOutput { @Override public int readInt() { return buffer.getInt(); return mBuffer.getInt(); } @Override public int position() { return buffer.position(); return mBuffer.position(); } @Override public void position(int newPos) { buffer.position(newPos); mBuffer.position(newPos); return; } } Loading Loading @@ -1367,6 +1369,109 @@ public class BinaryDictInputOutput { return node; } // TODO: move these methods (readUnigramsAndBigramsBinary(|Inner)) and an inner class (Position) // out of this class. private static class Position { public static final int NOT_READ_GROUPCOUNT = -1; public int mAddress; public int mNumOfCharGroup; public int mPosition; public int mLength; public Position(int address, int length) { mAddress = address; mLength = length; mNumOfCharGroup = NOT_READ_GROUPCOUNT; } } /** * Tours all node without recursive call. */ private static void readUnigramsAndBigramsBinaryInner( final FusionDictionaryBufferInterface buffer, final int headerSize, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) { int[] pushedChars = new int[MAX_WORD_LENGTH + 1]; Stack<Position> stack = new Stack<Position>(); int index = 0; Position initPos = new Position(headerSize, 0); stack.push(initPos); while (!stack.empty()) { Position p = stack.peek(); if (DBG) { MakedictLog.d("read: address=" + p.mAddress + ", numOfCharGroup=" + p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength); } if (buffer.position() != p.mAddress) buffer.position(p.mAddress); if (index != p.mLength) index = p.mLength; if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) { p.mNumOfCharGroup = readCharGroupCount(buffer); p.mAddress += getGroupCountSize(p.mNumOfCharGroup); p.mPosition = 0; } CharGroupInfo info = readCharGroup(buffer, p.mAddress - headerSize); for (int i = 0; i < info.mCharacters.length; ++i) { pushedChars[index++] = info.mCharacters[i]; } p.mPosition++; if (info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) { // found word words.put(info.mOriginalAddress, new String(pushedChars, 0, index)); frequencies.put(info.mOriginalAddress, info.mFrequency); if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams); } if (p.mPosition == p.mNumOfCharGroup) { stack.pop(); } else { // the node has more groups. p.mAddress = buffer.position(); } if (hasChildrenAddress(info.mChildrenAddress)) { Position childrenPos = new Position(info.mChildrenAddress + headerSize, index); stack.push(childrenPos); } } return; } /** * Reads unigrams and bigrams from the binary file. * Doesn't make the memory representation of the dictionary. * * @param buffer the buffer to read. * @param words the map to store the address as a key and the word as a value. * @param frequencies the map to store the address as a key and the frequency as a value. * @param bigrams the map to store the address as a key and the list of address as a value. * @throws IOException * @throws UnsupportedFormatException */ public static void readUnigramsAndBigramsBinary(final FusionDictionaryBufferInterface buffer, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header final int version = checkFormatVersion(buffer); final int optionsFlags = buffer.readUnsignedShort(); final HashMap<String, String> options = new HashMap<String, String>(); final int headerSize = readHeader(buffer, options, version); readUnigramsAndBigramsBinaryInner(buffer, headerSize, words, frequencies, bigrams); } /** * Helper function to get the binary format version from the header. * @throws IOException Loading tests/src/com/android/inputmethod/latin/BinaryDictIOTests.java +167 −20 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.PendingAttribute; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import android.test.AndroidTestCase; Loading @@ -34,7 +35,10 @@ import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.Set; Loading @@ -46,6 +50,7 @@ public class BinaryDictIOTests extends AndroidTestCase { private static final int MAX_UNIGRAMS = 1000; private static final int UNIGRAM_FREQ = 10; private static final int BIGRAM_FREQ = 50; private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; private static final String[] CHARACTERS = { Loading @@ -53,6 +58,7 @@ public class BinaryDictIOTests extends AndroidTestCase { "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" }; // Utilities for test /** * Generates a random word. */ Loading @@ -75,6 +81,9 @@ public class BinaryDictIOTests extends AndroidTestCase { return new ArrayList<String>(wordSet); } /** * Adds unigrams to the dictionary. */ private void addUnigrams(final int number, final FusionDictionary dict, final List<String> words) { Loading @@ -86,19 +95,17 @@ public class BinaryDictIOTests extends AndroidTestCase { private void addBigrams(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> sparseArray) { for (int i = 0; i < sparseArray.size(); ++i) { final int w1 = sparseArray.keyAt(i); for (int w2 : sparseArray.valueAt(i)) { final SparseArray<List<Integer>> bigrams) { for (int i = 0; i < bigrams.size(); ++i) { final int w1 = bigrams.keyAt(i); for (int w2 : bigrams.valueAt(i)) { dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ); } } } private long timeWritingDictToFile(final String fileName, final FusionDictionary dict) { private long timeWritingDictToFile(final File file, final FusionDictionary dict) { final File file = new File(getContext().getFilesDir(), fileName); long now = -1, diff = -1; try { Loading Loading @@ -140,15 +147,16 @@ public class BinaryDictIOTests extends AndroidTestCase { } } private long timeReadingAndCheckDict(final String fileName, final List<String> words, // Tests for readDictionaryBinary and writeDictionaryBinary private long timeReadingAndCheckDict(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams) { long now, diff = -1; FileInputStream inStream = null; try { final File file = new File(getContext().getFilesDir(), fileName); final FileInputStream inStream = new FileInputStream(file); inStream = new FileInputStream(file); final ByteBuffer buffer = inStream.getChannel().map( FileChannel.MapMode.READ_ONLY, 0, file.length()); Loading @@ -166,6 +174,14 @@ public class BinaryDictIOTests extends AndroidTestCase { Log.e(TAG, "raise IOException while reading file " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported format: " + e); } finally { if (inStream != null) { try { inStream.close(); } catch (IOException e) { // do nothing } } } return diff; Loading @@ -178,25 +194,26 @@ public class BinaryDictIOTests extends AndroidTestCase { new FusionDictionary.DictionaryOptions( new HashMap<String,String>(), false, false)); final String fileName = generateWord((int)System.currentTimeMillis()) + ".dict"; File file = null; try { file = File.createTempFile("runReadAndWrite", ".dict"); } catch (IOException e) { Log.e(TAG, "IOException: " + e); } assertNotNull(file); addUnigrams(words.size(), dict, words); addBigrams(dict, words, bigrams); // check original dictionary checkDictionary(dict, words, bigrams); final long write = timeWritingDictToFile(fileName, dict); final long read = timeReadingAndCheckDict(fileName, words, bigrams); deleteFile(fileName); final long write = timeWritingDictToFile(file, dict); final long read = timeReadingAndCheckDict(file, words, bigrams); return "PROF: read=" + read + "ms, write=" + write + "ms :" + message; } private void deleteFile(final String fileName) { final File file = new File(getContext().getFilesDir(), fileName); file.delete(); } public void testReadAndWrite() { final List<String> results = new ArrayList<String>(); Loading @@ -221,4 +238,134 @@ public class BinaryDictIOTests extends AndroidTestCase { Log.d(TAG, result); } } // Tests for readUnigramsAndBigramsBinary private void checkWordMap(final List<String> expectedWords, final SparseArray<List<Integer>> expectedBigrams, final Map<Integer, String> resultWords, final Map<Integer, Integer> resultFrequencies, final Map<Integer, ArrayList<PendingAttribute>> resultBigrams) { // check unigrams final Set<String> actualWordsSet = new HashSet<String>(resultWords.values()); final Set<String> expectedWordsSet = new HashSet<String>(expectedWords); assertEquals(actualWordsSet, expectedWordsSet); for (int freq : resultFrequencies.values()) { assertEquals(freq, UNIGRAM_FREQ); } // check bigrams final Map<String, List<String>> expBigrams = new HashMap<String, List<String>>(); for (int i = 0; i < expectedBigrams.size(); ++i) { final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); for (int w2 : expectedBigrams.valueAt(i)) { if (expBigrams.get(word1) == null) { expBigrams.put(word1, new ArrayList<String>()); } expBigrams.get(word1).add(expectedWords.get(w2)); } } final Map<String, List<String>> actBigrams = new HashMap<String, List<String>>(); for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { final String word1 = resultWords.get(entry.getKey()); final int unigramFreq = resultFrequencies.get(entry.getKey()); for (PendingAttribute attr : entry.getValue()) { final String word2 = resultWords.get(attr.mAddress); if (actBigrams.get(word1) == null) { actBigrams.put(word1, new ArrayList<String>()); } actBigrams.get(word1).add(word2); final int bigramFreq = BinaryDictInputOutput.reconstructBigramFrequency( unigramFreq, attr.mFrequency); assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); } } assertEquals(actBigrams, expBigrams); } private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams) { FileInputStream inStream = null; final Map<Integer, String> resultWords = CollectionUtils.newTreeMap(); final Map<Integer, ArrayList<PendingAttribute>> resultBigrams = CollectionUtils.newTreeMap(); final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); long now = -1, diff = -1; try { inStream = new FileInputStream(file); final ByteBuffer buffer = inStream.getChannel().map( FileChannel.MapMode.READ_ONLY, 0, file.length()); now = System.currentTimeMillis(); BinaryDictInputOutput.readUnigramsAndBigramsBinary( new BinaryDictInputOutput.ByteBufferWrapper(buffer), resultWords, resultFreqs, resultBigrams); diff = System.currentTimeMillis() - now; checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); } catch (IOException e) { Log.e(TAG, "IOException " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "UnsupportedFormatException: " + e); } finally { if (inStream != null) { try { inStream.close(); } catch (IOException e) { // do nothing } } } return diff; } private void runReadUnigramsAndBigramsBinary(final List<String> words, final SparseArray<List<Integer>> bigrams) { // making the dictionary from lists of words. final FusionDictionary dict = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions( new HashMap<String, String>(), false, false)); File file = null; try { file = File.createTempFile("runReadUnigrams", ".dict"); } catch (IOException e) { Log.e(TAG, "IOException: " + e); } assertNotNull(file); addUnigrams(words.size(), dict, words); addBigrams(dict, words, bigrams); timeWritingDictToFile(file, dict); long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams); long fullReading = timeReadingAndCheckDict(file, words, bigrams); Log.d(TAG, "read=" + fullReading + ", bytearray=" + wordMap); } public void testReadUnigramsAndBigramsBinary() { final List<String> results = new ArrayList<String>(); final Random random = new Random(123456); final List<String> words = generateWords(MAX_UNIGRAMS, random); final SparseArray<List<Integer>> emptyArray = CollectionUtils.newSparseArray(); runReadUnigramsAndBigramsBinary(words, emptyArray); final SparseArray<List<Integer>> star = CollectionUtils.newSparseArray(); for (int i = 1; i < words.size(); ++i) { star.put(i-1, new ArrayList<Integer>()); star.get(i-1).add(i); } runReadUnigramsAndBigramsBinary(words, star); } } Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +114 −9 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Stack; import java.util.TreeMap; /** Loading Loading @@ -197,20 +198,21 @@ public class BinaryDictInputOutput { public void position(int newPosition); } private static final class ByteBufferWrapper implements FusionDictionaryBufferInterface { private ByteBuffer buffer; ByteBufferWrapper(final ByteBuffer buffer) { this.buffer = buffer; public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface { private ByteBuffer mBuffer; public ByteBufferWrapper(final ByteBuffer buffer) { mBuffer = buffer; } @Override public int readUnsignedByte() { return ((int)buffer.get()) & 0xFF; return ((int)mBuffer.get()) & 0xFF; } @Override public int readUnsignedShort() { return ((int)buffer.getShort()) & 0xFFFF; return ((int)mBuffer.getShort()) & 0xFFFF; } @Override Loading @@ -221,17 +223,17 @@ public class BinaryDictInputOutput { @Override public int readInt() { return buffer.getInt(); return mBuffer.getInt(); } @Override public int position() { return buffer.position(); return mBuffer.position(); } @Override public void position(int newPos) { buffer.position(newPos); mBuffer.position(newPos); return; } } Loading Loading @@ -1367,6 +1369,109 @@ public class BinaryDictInputOutput { return node; } // TODO: move these methods (readUnigramsAndBigramsBinary(|Inner)) and an inner class (Position) // out of this class. private static class Position { public static final int NOT_READ_GROUPCOUNT = -1; public int mAddress; public int mNumOfCharGroup; public int mPosition; public int mLength; public Position(int address, int length) { mAddress = address; mLength = length; mNumOfCharGroup = NOT_READ_GROUPCOUNT; } } /** * Tours all node without recursive call. */ private static void readUnigramsAndBigramsBinaryInner( final FusionDictionaryBufferInterface buffer, final int headerSize, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) { int[] pushedChars = new int[MAX_WORD_LENGTH + 1]; Stack<Position> stack = new Stack<Position>(); int index = 0; Position initPos = new Position(headerSize, 0); stack.push(initPos); while (!stack.empty()) { Position p = stack.peek(); if (DBG) { MakedictLog.d("read: address=" + p.mAddress + ", numOfCharGroup=" + p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength); } if (buffer.position() != p.mAddress) buffer.position(p.mAddress); if (index != p.mLength) index = p.mLength; if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) { p.mNumOfCharGroup = readCharGroupCount(buffer); p.mAddress += getGroupCountSize(p.mNumOfCharGroup); p.mPosition = 0; } CharGroupInfo info = readCharGroup(buffer, p.mAddress - headerSize); for (int i = 0; i < info.mCharacters.length; ++i) { pushedChars[index++] = info.mCharacters[i]; } p.mPosition++; if (info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) { // found word words.put(info.mOriginalAddress, new String(pushedChars, 0, index)); frequencies.put(info.mOriginalAddress, info.mFrequency); if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams); } if (p.mPosition == p.mNumOfCharGroup) { stack.pop(); } else { // the node has more groups. p.mAddress = buffer.position(); } if (hasChildrenAddress(info.mChildrenAddress)) { Position childrenPos = new Position(info.mChildrenAddress + headerSize, index); stack.push(childrenPos); } } return; } /** * Reads unigrams and bigrams from the binary file. * Doesn't make the memory representation of the dictionary. * * @param buffer the buffer to read. * @param words the map to store the address as a key and the word as a value. * @param frequencies the map to store the address as a key and the frequency as a value. * @param bigrams the map to store the address as a key and the list of address as a value. * @throws IOException * @throws UnsupportedFormatException */ public static void readUnigramsAndBigramsBinary(final FusionDictionaryBufferInterface buffer, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header final int version = checkFormatVersion(buffer); final int optionsFlags = buffer.readUnsignedShort(); final HashMap<String, String> options = new HashMap<String, String>(); final int headerSize = readHeader(buffer, options, version); readUnigramsAndBigramsBinaryInner(buffer, headerSize, words, frequencies, bigrams); } /** * Helper function to get the binary format version from the header. * @throws IOException Loading
tests/src/com/android/inputmethod/latin/BinaryDictIOTests.java +167 −20 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.PendingAttribute; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import android.test.AndroidTestCase; Loading @@ -34,7 +35,10 @@ import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.Set; Loading @@ -46,6 +50,7 @@ public class BinaryDictIOTests extends AndroidTestCase { private static final int MAX_UNIGRAMS = 1000; private static final int UNIGRAM_FREQ = 10; private static final int BIGRAM_FREQ = 50; private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; private static final String[] CHARACTERS = { Loading @@ -53,6 +58,7 @@ public class BinaryDictIOTests extends AndroidTestCase { "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" }; // Utilities for test /** * Generates a random word. */ Loading @@ -75,6 +81,9 @@ public class BinaryDictIOTests extends AndroidTestCase { return new ArrayList<String>(wordSet); } /** * Adds unigrams to the dictionary. */ private void addUnigrams(final int number, final FusionDictionary dict, final List<String> words) { Loading @@ -86,19 +95,17 @@ public class BinaryDictIOTests extends AndroidTestCase { private void addBigrams(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> sparseArray) { for (int i = 0; i < sparseArray.size(); ++i) { final int w1 = sparseArray.keyAt(i); for (int w2 : sparseArray.valueAt(i)) { final SparseArray<List<Integer>> bigrams) { for (int i = 0; i < bigrams.size(); ++i) { final int w1 = bigrams.keyAt(i); for (int w2 : bigrams.valueAt(i)) { dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ); } } } private long timeWritingDictToFile(final String fileName, final FusionDictionary dict) { private long timeWritingDictToFile(final File file, final FusionDictionary dict) { final File file = new File(getContext().getFilesDir(), fileName); long now = -1, diff = -1; try { Loading Loading @@ -140,15 +147,16 @@ public class BinaryDictIOTests extends AndroidTestCase { } } private long timeReadingAndCheckDict(final String fileName, final List<String> words, // Tests for readDictionaryBinary and writeDictionaryBinary private long timeReadingAndCheckDict(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams) { long now, diff = -1; FileInputStream inStream = null; try { final File file = new File(getContext().getFilesDir(), fileName); final FileInputStream inStream = new FileInputStream(file); inStream = new FileInputStream(file); final ByteBuffer buffer = inStream.getChannel().map( FileChannel.MapMode.READ_ONLY, 0, file.length()); Loading @@ -166,6 +174,14 @@ public class BinaryDictIOTests extends AndroidTestCase { Log.e(TAG, "raise IOException while reading file " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported format: " + e); } finally { if (inStream != null) { try { inStream.close(); } catch (IOException e) { // do nothing } } } return diff; Loading @@ -178,25 +194,26 @@ public class BinaryDictIOTests extends AndroidTestCase { new FusionDictionary.DictionaryOptions( new HashMap<String,String>(), false, false)); final String fileName = generateWord((int)System.currentTimeMillis()) + ".dict"; File file = null; try { file = File.createTempFile("runReadAndWrite", ".dict"); } catch (IOException e) { Log.e(TAG, "IOException: " + e); } assertNotNull(file); addUnigrams(words.size(), dict, words); addBigrams(dict, words, bigrams); // check original dictionary checkDictionary(dict, words, bigrams); final long write = timeWritingDictToFile(fileName, dict); final long read = timeReadingAndCheckDict(fileName, words, bigrams); deleteFile(fileName); final long write = timeWritingDictToFile(file, dict); final long read = timeReadingAndCheckDict(file, words, bigrams); return "PROF: read=" + read + "ms, write=" + write + "ms :" + message; } private void deleteFile(final String fileName) { final File file = new File(getContext().getFilesDir(), fileName); file.delete(); } public void testReadAndWrite() { final List<String> results = new ArrayList<String>(); Loading @@ -221,4 +238,134 @@ public class BinaryDictIOTests extends AndroidTestCase { Log.d(TAG, result); } } // Tests for readUnigramsAndBigramsBinary private void checkWordMap(final List<String> expectedWords, final SparseArray<List<Integer>> expectedBigrams, final Map<Integer, String> resultWords, final Map<Integer, Integer> resultFrequencies, final Map<Integer, ArrayList<PendingAttribute>> resultBigrams) { // check unigrams final Set<String> actualWordsSet = new HashSet<String>(resultWords.values()); final Set<String> expectedWordsSet = new HashSet<String>(expectedWords); assertEquals(actualWordsSet, expectedWordsSet); for (int freq : resultFrequencies.values()) { assertEquals(freq, UNIGRAM_FREQ); } // check bigrams final Map<String, List<String>> expBigrams = new HashMap<String, List<String>>(); for (int i = 0; i < expectedBigrams.size(); ++i) { final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); for (int w2 : expectedBigrams.valueAt(i)) { if (expBigrams.get(word1) == null) { expBigrams.put(word1, new ArrayList<String>()); } expBigrams.get(word1).add(expectedWords.get(w2)); } } final Map<String, List<String>> actBigrams = new HashMap<String, List<String>>(); for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { final String word1 = resultWords.get(entry.getKey()); final int unigramFreq = resultFrequencies.get(entry.getKey()); for (PendingAttribute attr : entry.getValue()) { final String word2 = resultWords.get(attr.mAddress); if (actBigrams.get(word1) == null) { actBigrams.put(word1, new ArrayList<String>()); } actBigrams.get(word1).add(word2); final int bigramFreq = BinaryDictInputOutput.reconstructBigramFrequency( unigramFreq, attr.mFrequency); assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); } } assertEquals(actBigrams, expBigrams); } private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams) { FileInputStream inStream = null; final Map<Integer, String> resultWords = CollectionUtils.newTreeMap(); final Map<Integer, ArrayList<PendingAttribute>> resultBigrams = CollectionUtils.newTreeMap(); final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); long now = -1, diff = -1; try { inStream = new FileInputStream(file); final ByteBuffer buffer = inStream.getChannel().map( FileChannel.MapMode.READ_ONLY, 0, file.length()); now = System.currentTimeMillis(); BinaryDictInputOutput.readUnigramsAndBigramsBinary( new BinaryDictInputOutput.ByteBufferWrapper(buffer), resultWords, resultFreqs, resultBigrams); diff = System.currentTimeMillis() - now; checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); } catch (IOException e) { Log.e(TAG, "IOException " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "UnsupportedFormatException: " + e); } finally { if (inStream != null) { try { inStream.close(); } catch (IOException e) { // do nothing } } } return diff; } private void runReadUnigramsAndBigramsBinary(final List<String> words, final SparseArray<List<Integer>> bigrams) { // making the dictionary from lists of words. final FusionDictionary dict = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions( new HashMap<String, String>(), false, false)); File file = null; try { file = File.createTempFile("runReadUnigrams", ".dict"); } catch (IOException e) { Log.e(TAG, "IOException: " + e); } assertNotNull(file); addUnigrams(words.size(), dict, words); addBigrams(dict, words, bigrams); timeWritingDictToFile(file, dict); long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams); long fullReading = timeReadingAndCheckDict(file, words, bigrams); Log.d(TAG, "read=" + fullReading + ", bytearray=" + wordMap); } public void testReadUnigramsAndBigramsBinary() { final List<String> results = new ArrayList<String>(); final Random random = new Random(123456); final List<String> words = generateWords(MAX_UNIGRAMS, random); final SparseArray<List<Integer>> emptyArray = CollectionUtils.newSparseArray(); runReadUnigramsAndBigramsBinary(words, emptyArray); final SparseArray<List<Integer>> star = CollectionUtils.newSparseArray(); for (int i = 1; i < words.size(); ++i) { star.put(i-1, new ArrayList<Integer>()); star.get(i-1).add(i); } runReadUnigramsAndBigramsBinary(words, star); } }