Loading java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +1 −1 Original line number Diff line number Diff line Loading @@ -159,7 +159,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { // TODO: Create "cache dictionary" to cache fresh words for frequently updated dictionaries, // considering performance regression. protected void addWord(final String word, final int frequency) { mFusionDictionary.add(word, frequency, null, null); mFusionDictionary.add(word, frequency, null /* shortcutTargets */); } /** Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +10 −2 Original line number Diff line number Diff line Loading @@ -1317,8 +1317,16 @@ public class BinaryDictInputOutput { 0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG))); if (null != dict) { for (Word w : dict) { newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mBigrams); for (final Word w : dict) { newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets); } for (final Word w : dict) { // By construction a binary dictionary may not have bigrams pointing to // words that are not also registered as unigrams so we don't have to avoid // them explicitly here. for (final WeightedString bigram : w.mBigrams) { newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency); } } } Loading java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +18 −23 Original line number Diff line number Diff line Loading @@ -286,7 +286,7 @@ public class FusionDictionary implements Iterable<Word> { for (WeightedString word : words) { final CharGroup t = findWordInTree(mRoot, word.mWord); if (null == t) { add(getCodePoints(word.mWord), 0, null, null); add(getCodePoints(word.mWord), 0, null); } } } Loading @@ -305,12 +305,8 @@ public class FusionDictionary implements Iterable<Word> { * @param bigrams a list of bigrams, or null. */ public void add(final String word, final int frequency, final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> bigrams) { if (null != bigrams) { addNeutralWords(bigrams); } add(getCodePoints(word), frequency, shortcutTargets, bigrams); final ArrayList<WeightedString> shortcutTargets) { add(getCodePoints(word), frequency, shortcutTargets); } /** Loading Loading @@ -344,7 +340,7 @@ public class FusionDictionary implements Iterable<Word> { final CharGroup charGroup2 = findWordInTree(mRoot, word2); if (charGroup2 == null) { // TODO: refactor with the identical code in addNeutralWords add(getCodePoints(word2), 0, null, null); add(getCodePoints(word2), 0, null); } charGroup.addBigram(word2, frequency); } else { Loading @@ -355,17 +351,15 @@ public class FusionDictionary implements Iterable<Word> { /** * Add a word to this dictionary. * * The shortcuts and bigrams, if any, have to be in the dictionary already. If they aren't, * The shortcuts, if any, have to be in the dictionary already. If they aren't, * an exception is thrown. * * @param word the word, as an int array. * @param frequency the frequency of the word, in the range [0..255]. * @param shortcutTargets an optional list of shortcut targets for this word (null if none). * @param bigrams an optional list of bigrams for this word (null if none). */ private void add(final int[] word, final int frequency, final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> bigrams) { final ArrayList<WeightedString> shortcutTargets) { assert(frequency >= 0 && frequency <= 255); Node currentNode = mRoot; int charIndex = 0; Loading @@ -390,7 +384,7 @@ public class FusionDictionary implements Iterable<Word> { final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final CharGroup newGroup = new CharGroup( Arrays.copyOfRange(word, charIndex, word.length), shortcutTargets, bigrams, frequency); shortcutTargets, null /* bigrams */, frequency); currentNode.mData.add(insertionIndex, newGroup); checkStack(currentNode); } else { Loading @@ -400,21 +394,21 @@ public class FusionDictionary implements Iterable<Word> { // The new word is a prefix of an existing word, but the node on which it // should end already exists as is. Since the old CharNode was not a terminal, // make it one by filling in its frequency and other attributes currentGroup.update(frequency, shortcutTargets, bigrams); currentGroup.update(frequency, shortcutTargets, null); } else { // The new word matches the full old word and extends past it. // We only have to create a new node and add it to the end of this. final CharGroup newNode = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), shortcutTargets, bigrams, frequency); shortcutTargets, null /* bigrams */, frequency); currentGroup.mChildren = new Node(); currentGroup.mChildren.mData.add(newNode); } } else { if (0 == differentCharIndex) { // Exact same word. Update the frequency if higher. This will also add the // new bigrams to the existing bigram list if it already exists. currentGroup.update(frequency, shortcutTargets, bigrams); // new shortcuts to the existing shortcut list if it already exists. currentGroup.update(frequency, shortcutTargets, null); } else { // Partial prefix match only. We have to replace the current node with a node // containing the current prefix and create two new ones for the tails. Loading @@ -429,14 +423,14 @@ public class FusionDictionary implements Iterable<Word> { if (charIndex + differentCharIndex >= word.length) { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), shortcutTargets, bigrams, frequency, newChildren); shortcutTargets, null /* bigrams */, frequency, newChildren); } else { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), null, null, -1, newChildren); final CharGroup newWord = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), shortcutTargets, bigrams, frequency); null /* shortcutTargets */, null /* bigrams */, -1, newChildren); final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), shortcutTargets, null /* bigrams */, frequency); final int addIndex = word[charIndex + differentCharIndex] > currentGroup.mChars[differentCharIndex] ? 1 : 0; newChildren.mData.add(addIndex, newWord); Loading Loading @@ -494,7 +488,8 @@ public class FusionDictionary implements Iterable<Word> { */ private static int findInsertionIndex(final Node node, int character) { final ArrayList<CharGroup> data = node.mData; final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0); final CharGroup reference = new CharGroup(new int[] { character }, null /* shortcutTargets */, null /* bigrams */, 0); int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); return result >= 0 ? result : -result - 1; } Loading tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java +17 −10 Original line number Diff line number Diff line Loading @@ -72,19 +72,15 @@ public class XmlDictInputOutput { int mFreq; // the currently read freq String mWord; // the current word final HashMap<String, ArrayList<WeightedString>> mShortcutsMap; final HashMap<String, ArrayList<WeightedString>> mBigramsMap; /** * Create the handler. * * @param shortcuts the shortcuts as a map. This may be empty, but may not be null. * @param bigrams the bigrams as a map. This may be empty, but may not be null. */ public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts, final HashMap<String, ArrayList<WeightedString>> bigrams) { public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts) { mDictionary = null; mShortcutsMap = shortcuts; mBigramsMap = bigrams; mWord = ""; mState = START; mFreq = 0; Loading @@ -94,7 +90,6 @@ public class XmlDictInputOutput { final FusionDictionary dict = mDictionary; mDictionary = null; mShortcutsMap.clear(); mBigramsMap.clear(); mWord = ""; mState = START; mFreq = 0; Loading Loading @@ -143,7 +138,7 @@ public class XmlDictInputOutput { @Override public void endElement(String uri, String localName, String qName) { if (WORD == mState) { mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), mBigramsMap.get(mWord)); mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord)); mState = START; } } Loading Loading @@ -191,6 +186,7 @@ public class XmlDictInputOutput { } } // This may return an empty map, but will never return null. public HashMap<String, ArrayList<WeightedString>> getAssocMap() { return mAssocMap; } Loading @@ -211,6 +207,7 @@ public class XmlDictInputOutput { BIGRAM_FREQ_ATTRIBUTE); } // As per getAssocMap(), this never returns null. public HashMap<String, ArrayList<WeightedString>> getBigramMap() { return getAssocMap(); } Loading @@ -231,6 +228,7 @@ public class XmlDictInputOutput { TARGET_PRIORITY_ATTRIBUTE); } // As per getAssocMap(), this never returns null. public HashMap<String, ArrayList<WeightedString>> getShortcutMap() { return getAssocMap(); } Loading Loading @@ -260,10 +258,19 @@ public class XmlDictInputOutput { if (null != shortcuts) parser.parse(shortcuts, shortcutHandler); final UnigramHandler unigramHandler = new UnigramHandler(shortcutHandler.getShortcutMap(), bigramHandler.getBigramMap()); new UnigramHandler(shortcutHandler.getShortcutMap()); parser.parse(unigrams, unigramHandler); return unigramHandler.getFinalDictionary(); final FusionDictionary dict = unigramHandler.getFinalDictionary(); final HashMap<String, ArrayList<WeightedString>> bigramMap = bigramHandler.getBigramMap(); for (final String firstWord : bigramMap.keySet()) { if (!dict.hasWord(firstWord)) continue; final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord); for (final WeightedString bigram : bigramList) { if (!dict.hasWord(bigram.mWord)) continue; dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency); } } return dict; } /** Loading tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java +5 −5 Original line number Diff line number Diff line Loading @@ -43,11 +43,11 @@ public class BinaryDictInputOutputTest extends TestCase { final FusionDictionary dict = new FusionDictionary(new Node(), new DictionaryOptions(new HashMap<String, String>(), false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); dict.add("foo", 1, null, null); dict.add("fta", 1, null, null); dict.add("ftb", 1, null, null); dict.add("bar", 1, null, null); dict.add("fool", 1, null, null); dict.add("foo", 1, null); dict.add("fta", 1, null); dict.add("ftb", 1, null); dict.add("bar", 1, null); dict.add("fool", 1, null); final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot); assertEquals(4, result.size()); while (!result.isEmpty()) { Loading Loading
java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +1 −1 Original line number Diff line number Diff line Loading @@ -159,7 +159,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { // TODO: Create "cache dictionary" to cache fresh words for frequently updated dictionaries, // considering performance regression. protected void addWord(final String word, final int frequency) { mFusionDictionary.add(word, frequency, null, null); mFusionDictionary.add(word, frequency, null /* shortcutTargets */); } /** Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +10 −2 Original line number Diff line number Diff line Loading @@ -1317,8 +1317,16 @@ public class BinaryDictInputOutput { 0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG))); if (null != dict) { for (Word w : dict) { newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mBigrams); for (final Word w : dict) { newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets); } for (final Word w : dict) { // By construction a binary dictionary may not have bigrams pointing to // words that are not also registered as unigrams so we don't have to avoid // them explicitly here. for (final WeightedString bigram : w.mBigrams) { newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency); } } } Loading
java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +18 −23 Original line number Diff line number Diff line Loading @@ -286,7 +286,7 @@ public class FusionDictionary implements Iterable<Word> { for (WeightedString word : words) { final CharGroup t = findWordInTree(mRoot, word.mWord); if (null == t) { add(getCodePoints(word.mWord), 0, null, null); add(getCodePoints(word.mWord), 0, null); } } } Loading @@ -305,12 +305,8 @@ public class FusionDictionary implements Iterable<Word> { * @param bigrams a list of bigrams, or null. */ public void add(final String word, final int frequency, final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> bigrams) { if (null != bigrams) { addNeutralWords(bigrams); } add(getCodePoints(word), frequency, shortcutTargets, bigrams); final ArrayList<WeightedString> shortcutTargets) { add(getCodePoints(word), frequency, shortcutTargets); } /** Loading Loading @@ -344,7 +340,7 @@ public class FusionDictionary implements Iterable<Word> { final CharGroup charGroup2 = findWordInTree(mRoot, word2); if (charGroup2 == null) { // TODO: refactor with the identical code in addNeutralWords add(getCodePoints(word2), 0, null, null); add(getCodePoints(word2), 0, null); } charGroup.addBigram(word2, frequency); } else { Loading @@ -355,17 +351,15 @@ public class FusionDictionary implements Iterable<Word> { /** * Add a word to this dictionary. * * The shortcuts and bigrams, if any, have to be in the dictionary already. If they aren't, * The shortcuts, if any, have to be in the dictionary already. If they aren't, * an exception is thrown. * * @param word the word, as an int array. * @param frequency the frequency of the word, in the range [0..255]. * @param shortcutTargets an optional list of shortcut targets for this word (null if none). * @param bigrams an optional list of bigrams for this word (null if none). */ private void add(final int[] word, final int frequency, final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> bigrams) { final ArrayList<WeightedString> shortcutTargets) { assert(frequency >= 0 && frequency <= 255); Node currentNode = mRoot; int charIndex = 0; Loading @@ -390,7 +384,7 @@ public class FusionDictionary implements Iterable<Word> { final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final CharGroup newGroup = new CharGroup( Arrays.copyOfRange(word, charIndex, word.length), shortcutTargets, bigrams, frequency); shortcutTargets, null /* bigrams */, frequency); currentNode.mData.add(insertionIndex, newGroup); checkStack(currentNode); } else { Loading @@ -400,21 +394,21 @@ public class FusionDictionary implements Iterable<Word> { // The new word is a prefix of an existing word, but the node on which it // should end already exists as is. Since the old CharNode was not a terminal, // make it one by filling in its frequency and other attributes currentGroup.update(frequency, shortcutTargets, bigrams); currentGroup.update(frequency, shortcutTargets, null); } else { // The new word matches the full old word and extends past it. // We only have to create a new node and add it to the end of this. final CharGroup newNode = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), shortcutTargets, bigrams, frequency); shortcutTargets, null /* bigrams */, frequency); currentGroup.mChildren = new Node(); currentGroup.mChildren.mData.add(newNode); } } else { if (0 == differentCharIndex) { // Exact same word. Update the frequency if higher. This will also add the // new bigrams to the existing bigram list if it already exists. currentGroup.update(frequency, shortcutTargets, bigrams); // new shortcuts to the existing shortcut list if it already exists. currentGroup.update(frequency, shortcutTargets, null); } else { // Partial prefix match only. We have to replace the current node with a node // containing the current prefix and create two new ones for the tails. Loading @@ -429,14 +423,14 @@ public class FusionDictionary implements Iterable<Word> { if (charIndex + differentCharIndex >= word.length) { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), shortcutTargets, bigrams, frequency, newChildren); shortcutTargets, null /* bigrams */, frequency, newChildren); } else { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), null, null, -1, newChildren); final CharGroup newWord = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), shortcutTargets, bigrams, frequency); null /* shortcutTargets */, null /* bigrams */, -1, newChildren); final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), shortcutTargets, null /* bigrams */, frequency); final int addIndex = word[charIndex + differentCharIndex] > currentGroup.mChars[differentCharIndex] ? 1 : 0; newChildren.mData.add(addIndex, newWord); Loading Loading @@ -494,7 +488,8 @@ public class FusionDictionary implements Iterable<Word> { */ private static int findInsertionIndex(final Node node, int character) { final ArrayList<CharGroup> data = node.mData; final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0); final CharGroup reference = new CharGroup(new int[] { character }, null /* shortcutTargets */, null /* bigrams */, 0); int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); return result >= 0 ? result : -result - 1; } Loading
tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java +17 −10 Original line number Diff line number Diff line Loading @@ -72,19 +72,15 @@ public class XmlDictInputOutput { int mFreq; // the currently read freq String mWord; // the current word final HashMap<String, ArrayList<WeightedString>> mShortcutsMap; final HashMap<String, ArrayList<WeightedString>> mBigramsMap; /** * Create the handler. * * @param shortcuts the shortcuts as a map. This may be empty, but may not be null. * @param bigrams the bigrams as a map. This may be empty, but may not be null. */ public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts, final HashMap<String, ArrayList<WeightedString>> bigrams) { public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts) { mDictionary = null; mShortcutsMap = shortcuts; mBigramsMap = bigrams; mWord = ""; mState = START; mFreq = 0; Loading @@ -94,7 +90,6 @@ public class XmlDictInputOutput { final FusionDictionary dict = mDictionary; mDictionary = null; mShortcutsMap.clear(); mBigramsMap.clear(); mWord = ""; mState = START; mFreq = 0; Loading Loading @@ -143,7 +138,7 @@ public class XmlDictInputOutput { @Override public void endElement(String uri, String localName, String qName) { if (WORD == mState) { mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), mBigramsMap.get(mWord)); mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord)); mState = START; } } Loading Loading @@ -191,6 +186,7 @@ public class XmlDictInputOutput { } } // This may return an empty map, but will never return null. public HashMap<String, ArrayList<WeightedString>> getAssocMap() { return mAssocMap; } Loading @@ -211,6 +207,7 @@ public class XmlDictInputOutput { BIGRAM_FREQ_ATTRIBUTE); } // As per getAssocMap(), this never returns null. public HashMap<String, ArrayList<WeightedString>> getBigramMap() { return getAssocMap(); } Loading @@ -231,6 +228,7 @@ public class XmlDictInputOutput { TARGET_PRIORITY_ATTRIBUTE); } // As per getAssocMap(), this never returns null. public HashMap<String, ArrayList<WeightedString>> getShortcutMap() { return getAssocMap(); } Loading Loading @@ -260,10 +258,19 @@ public class XmlDictInputOutput { if (null != shortcuts) parser.parse(shortcuts, shortcutHandler); final UnigramHandler unigramHandler = new UnigramHandler(shortcutHandler.getShortcutMap(), bigramHandler.getBigramMap()); new UnigramHandler(shortcutHandler.getShortcutMap()); parser.parse(unigrams, unigramHandler); return unigramHandler.getFinalDictionary(); final FusionDictionary dict = unigramHandler.getFinalDictionary(); final HashMap<String, ArrayList<WeightedString>> bigramMap = bigramHandler.getBigramMap(); for (final String firstWord : bigramMap.keySet()) { if (!dict.hasWord(firstWord)) continue; final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord); for (final WeightedString bigram : bigramList) { if (!dict.hasWord(bigram.mWord)) continue; dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency); } } return dict; } /** Loading
tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java +5 −5 Original line number Diff line number Diff line Loading @@ -43,11 +43,11 @@ public class BinaryDictInputOutputTest extends TestCase { final FusionDictionary dict = new FusionDictionary(new Node(), new DictionaryOptions(new HashMap<String, String>(), false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); dict.add("foo", 1, null, null); dict.add("fta", 1, null, null); dict.add("ftb", 1, null, null); dict.add("bar", 1, null, null); dict.add("fool", 1, null, null); dict.add("foo", 1, null); dict.add("fta", 1, null); dict.add("ftb", 1, null); dict.add("bar", 1, null); dict.add("fool", 1, null); final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot); assertEquals(4, result.size()); while (!result.isEmpty()) { Loading