Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +67 −1 Original line number Diff line number Diff line Loading @@ -16,10 +16,11 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import java.io.IOException; import java.util.ArrayList; Loading Loading @@ -124,4 +125,69 @@ public class BinaryDictIOUtils { readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams, header.mFormatOptions); } /** * Gets the address of the last CharGroup of the exact matching word in the dictionary. * If no match is found, returns NOT_VALID_WORD. * * @param buffer the buffer to read. * @param word the word we search for. * @return the address of the terminal node. * @throws IOException * @throws UnsupportedFormatException */ public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer, final String word) throws IOException, UnsupportedFormatException { if (word == null) return FormatSpec.NOT_VALID_WORD; if (buffer.position() != 0) buffer.position(0); final FileHeader header = BinaryDictInputOutput.readHeader(buffer); int wordPos = 0; final int wordLen = word.codePointCount(0, word.length()); for (int depth = 0; depth < Constants.Dictionary.MAX_WORD_LENGTH; ++depth) { if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD; int groupOffset = buffer.position() - header.mHeaderSize; final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer); groupOffset += BinaryDictInputOutput.getGroupCountSize(charGroupCount); for (int i = 0; i < charGroupCount; ++i) { final int charGroupPos = buffer.position(); final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, buffer.position(), header.mFormatOptions); boolean same = true; for (int p = 0, j = word.offsetByCodePoints(0, wordPos); p < currentInfo.mCharacters.length; ++p, j = word.offsetByCodePoints(j, 1)) { if (wordPos + p >= wordLen || word.codePointAt(j) != currentInfo.mCharacters[p]) { same = false; break; } } if (same) { if (wordPos + currentInfo.mCharacters.length == wordLen) { if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL) { return FormatSpec.NOT_VALID_WORD; } else { return charGroupPos; } } wordPos += currentInfo.mCharacters.length; if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { return FormatSpec.NOT_VALID_WORD; } buffer.position(currentInfo.mChildrenAddress); break; } groupOffset = currentInfo.mEndAddress; // not found if (i >= charGroupCount - 1) { return FormatSpec.NOT_VALID_WORD; } } } return FormatSpec.NOT_VALID_WORD; } } java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +3 −2 Original line number Diff line number Diff line Loading @@ -1242,8 +1242,9 @@ public class BinaryDictInputOutput { * @param formatOptions file format options. * @return the word, as a string. */ private static String getWordAtAddress(final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, final FormatOptions formatOptions) { /* packages for tests */ static String getWordAtAddress( final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, final FormatOptions formatOptions) { final String cachedString = wordCache.get(address); if (null != cachedString) return cachedString; Loading java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +3 −0 Original line number Diff line number Diff line Loading @@ -207,6 +207,9 @@ public final class FormatSpec { static final int MAX_TERMINAL_FREQUENCY = 255; static final int MAX_BIGRAM_FREQUENCY = 15; // This option needs to be the same numeric value as the one in binary_format.h. static final int NOT_VALID_WORD = -99; /** * Options about file format. */ Loading tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java +90 −1 Original line number Diff line number Diff line Loading @@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.latin.CollectionUtils; import com.android.inputmethod.latin.UserHistoryDictIOUtils; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; Loading Loading @@ -475,4 +475,93 @@ public class BinaryDictIOTests extends AndroidTestCase { Log.d(TAG, result); } } // Tests for getTerminalPosition private String getWordFromBinary(final FusionDictionaryBufferInterface buffer, final int address) { if (buffer.position() != 0) buffer.position(0); FileHeader header = null; try { header = BinaryDictInputOutput.readHeader(buffer); } catch (IOException e) { return null; } catch (UnsupportedFormatException e) { return null; } if (header == null) return null; return BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize, address - header.mHeaderSize, header.mFormatOptions); } private long runGetTerminalPosition(final FusionDictionaryBufferInterface buffer, final String word, int index, boolean contained) { final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; long diff = -1; int position = -1; try { final long now = System.nanoTime(); position = BinaryDictIOUtils.getTerminalPosition(buffer, word); diff = System.nanoTime() - now; } catch (IOException e) { Log.e(TAG, "IOException while getTerminalPosition: " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "UnsupportedFormatException while getTermianlPosition: " + e); } assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); if (contained) assertEquals(getWordFromBinary(buffer, position), word); return diff; } public void testGetTerminalPosition() { File file = null; try { file = File.createTempFile("runReadUnigrams", ".dict"); } catch (IOException e) { // do nothing } assertNotNull(file); final FusionDictionary dict = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions( new HashMap<String, String>(), false, false)); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); timeWritingDictToFile(file, dict, VERSION3_WITH_LINKEDLIST_NODE); final FusionDictionaryBufferInterface buffer = getBuffer(file, USE_BYTE_ARRAY); try { // too long word final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; assertEquals(FormatSpec.NOT_VALID_WORD, BinaryDictIOUtils.getTerminalPosition(buffer, longWord)); // null assertEquals(FormatSpec.NOT_VALID_WORD, BinaryDictIOUtils.getTerminalPosition(buffer, null)); // empty string assertEquals(FormatSpec.NOT_VALID_WORD, BinaryDictIOUtils.getTerminalPosition(buffer, "")); } catch (IOException e) { } catch (UnsupportedFormatException e) { } // Test a word that is contained within the dictionary. long sum = 0; for (int i = 0; i < sWords.size(); ++i) { final long time = runGetTerminalPosition(buffer, sWords.get(i), i, true); sum += time == -1 ? 0 : time; } Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000)); // Test a word that isn't contained within the dictionary. final Random random = new Random((int)System.currentTimeMillis()); for (int i = 0; i < 1000; ++i) { final String word = generateWord(random.nextInt()); if (sWords.indexOf(word) != -1) continue; runGetTerminalPosition(buffer, word, i, false); } } } Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +67 −1 Original line number Diff line number Diff line Loading @@ -16,10 +16,11 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import java.io.IOException; import java.util.ArrayList; Loading Loading @@ -124,4 +125,69 @@ public class BinaryDictIOUtils { readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams, header.mFormatOptions); } /** * Gets the address of the last CharGroup of the exact matching word in the dictionary. * If no match is found, returns NOT_VALID_WORD. * * @param buffer the buffer to read. * @param word the word we search for. * @return the address of the terminal node. * @throws IOException * @throws UnsupportedFormatException */ public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer, final String word) throws IOException, UnsupportedFormatException { if (word == null) return FormatSpec.NOT_VALID_WORD; if (buffer.position() != 0) buffer.position(0); final FileHeader header = BinaryDictInputOutput.readHeader(buffer); int wordPos = 0; final int wordLen = word.codePointCount(0, word.length()); for (int depth = 0; depth < Constants.Dictionary.MAX_WORD_LENGTH; ++depth) { if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD; int groupOffset = buffer.position() - header.mHeaderSize; final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer); groupOffset += BinaryDictInputOutput.getGroupCountSize(charGroupCount); for (int i = 0; i < charGroupCount; ++i) { final int charGroupPos = buffer.position(); final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, buffer.position(), header.mFormatOptions); boolean same = true; for (int p = 0, j = word.offsetByCodePoints(0, wordPos); p < currentInfo.mCharacters.length; ++p, j = word.offsetByCodePoints(j, 1)) { if (wordPos + p >= wordLen || word.codePointAt(j) != currentInfo.mCharacters[p]) { same = false; break; } } if (same) { if (wordPos + currentInfo.mCharacters.length == wordLen) { if (currentInfo.mFrequency == CharGroup.NOT_A_TERMINAL) { return FormatSpec.NOT_VALID_WORD; } else { return charGroupPos; } } wordPos += currentInfo.mCharacters.length; if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) { return FormatSpec.NOT_VALID_WORD; } buffer.position(currentInfo.mChildrenAddress); break; } groupOffset = currentInfo.mEndAddress; // not found if (i >= charGroupCount - 1) { return FormatSpec.NOT_VALID_WORD; } } } return FormatSpec.NOT_VALID_WORD; } }
java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +3 −2 Original line number Diff line number Diff line Loading @@ -1242,8 +1242,9 @@ public class BinaryDictInputOutput { * @param formatOptions file format options. * @return the word, as a string. */ private static String getWordAtAddress(final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, final FormatOptions formatOptions) { /* packages for tests */ static String getWordAtAddress( final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, final FormatOptions formatOptions) { final String cachedString = wordCache.get(address); if (null != cachedString) return cachedString; Loading
java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +3 −0 Original line number Diff line number Diff line Loading @@ -207,6 +207,9 @@ public final class FormatSpec { static final int MAX_TERMINAL_FREQUENCY = 255; static final int MAX_BIGRAM_FREQUENCY = 15; // This option needs to be the same numeric value as the one in binary_format.h. static final int NOT_VALID_WORD = -99; /** * Options about file format. */ Loading
tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java +90 −1 Original line number Diff line number Diff line Loading @@ -19,7 +19,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.latin.CollectionUtils; import com.android.inputmethod.latin.UserHistoryDictIOUtils; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; Loading Loading @@ -475,4 +475,93 @@ public class BinaryDictIOTests extends AndroidTestCase { Log.d(TAG, result); } } // Tests for getTerminalPosition private String getWordFromBinary(final FusionDictionaryBufferInterface buffer, final int address) { if (buffer.position() != 0) buffer.position(0); FileHeader header = null; try { header = BinaryDictInputOutput.readHeader(buffer); } catch (IOException e) { return null; } catch (UnsupportedFormatException e) { return null; } if (header == null) return null; return BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize, address - header.mHeaderSize, header.mFormatOptions); } private long runGetTerminalPosition(final FusionDictionaryBufferInterface buffer, final String word, int index, boolean contained) { final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; long diff = -1; int position = -1; try { final long now = System.nanoTime(); position = BinaryDictIOUtils.getTerminalPosition(buffer, word); diff = System.nanoTime() - now; } catch (IOException e) { Log.e(TAG, "IOException while getTerminalPosition: " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "UnsupportedFormatException while getTermianlPosition: " + e); } assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); if (contained) assertEquals(getWordFromBinary(buffer, position), word); return diff; } public void testGetTerminalPosition() { File file = null; try { file = File.createTempFile("runReadUnigrams", ".dict"); } catch (IOException e) { // do nothing } assertNotNull(file); final FusionDictionary dict = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions( new HashMap<String, String>(), false, false)); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); timeWritingDictToFile(file, dict, VERSION3_WITH_LINKEDLIST_NODE); final FusionDictionaryBufferInterface buffer = getBuffer(file, USE_BYTE_ARRAY); try { // too long word final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; assertEquals(FormatSpec.NOT_VALID_WORD, BinaryDictIOUtils.getTerminalPosition(buffer, longWord)); // null assertEquals(FormatSpec.NOT_VALID_WORD, BinaryDictIOUtils.getTerminalPosition(buffer, null)); // empty string assertEquals(FormatSpec.NOT_VALID_WORD, BinaryDictIOUtils.getTerminalPosition(buffer, "")); } catch (IOException e) { } catch (UnsupportedFormatException e) { } // Test a word that is contained within the dictionary. long sum = 0; for (int i = 0; i < sWords.size(); ++i) { final long time = runGetTerminalPosition(buffer, sWords.get(i), i, true); sum += time == -1 ? 0 : time; } Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000)); // Test a word that isn't contained within the dictionary. final Random random = new Random((int)System.currentTimeMillis()); for (int i = 0; i < 1000; ++i) { final String word = generateWord(random.nextInt()); if (sWords.indexOf(word) != -1) continue; runGetTerminalPosition(buffer, word, i, false); } } }