Loading java/src/com/android/inputmethod/latin/DictionaryWriter.java +2 −2 Original line number Diff line number Diff line Loading @@ -23,7 +23,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.makedict.BinaryDictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.utils.CollectionUtils; Loading Loading @@ -51,7 +51,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter { @Override public void clear() { final HashMap<String, String> attributes = CollectionUtils.newHashMap(); mFusionDictionary = new FusionDictionary(new Node(), mFusionDictionary = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(attributes, false, false)); } Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java +28 −30 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.JniUtils; Loading Loading @@ -548,31 +548,31 @@ public final class BinaryDictDecoder { } /** * Reads a single node from a buffer. * Reads a single node array from a buffer. * * This methods reads the file at the current position. A node is fully expected to start at * the current position. * This will recursively read other nodes into the structure, populating the reverse * This methods reads the file at the current position. A node array is fully expected to start * at the current position. * This will recursively read other node arrays into the structure, populating the reverse * maps on the fly and using them to keep track of already read nodes. * * @param buffer the buffer, correctly positioned at the start of a node. * @param buffer the buffer, correctly positioned at the start of a node array. * @param headerSize the size, in bytes, of the file header. * @param reverseNodeMap a mapping from addresses to already read nodes. * @param reverseNodeArrayMap a mapping from addresses to already read node arrays. * @param reverseGroupMap a mapping from addresses to already read character groups. * @param options file format options. * @return the read node with all his children already read. * @return the read node array with all his children already read. */ private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize, final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options) private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer, final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap, final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options) throws IOException { final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>(); final int nodeOrigin = buffer.position() - headerSize; final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>(); final int nodeArrayOrigin = buffer.position() - headerSize; do { // Scan the linked-list node. final int nodeHeadPosition = buffer.position() - headerSize; final int nodeArrayHeadPosition = buffer.position() - headerSize; final int count = readCharGroupCount(buffer); int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); for (int i = count; i > 0; --i) { // Scan the array of CharGroup. CharGroupInfo info = readCharGroup(buffer, groupOffset, options); if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; Loading @@ -589,21 +589,21 @@ public final class BinaryDictDecoder { } } if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { Node children = reverseNodeMap.get(info.mChildrenAddress); PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress); if (null == children) { final int currentPosition = buffer.position(); buffer.position(info.mChildrenAddress + headerSize); children = readNode( buffer, headerSize, reverseNodeMap, reverseGroupMap, options); children = readNodeArray( buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options); buffer.position(currentPosition); } nodeContents.add( nodeArrayContents.add( new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); } else { nodeContents.add( nodeArrayContents.add( new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), Loading @@ -624,11 +624,11 @@ public final class BinaryDictDecoder { } while (options.mSupportsDynamicUpdate && buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); final Node node = new Node(nodeContents); node.mCachedAddressBeforeUpdate = nodeOrigin; node.mCachedAddressAfterUpdate = nodeOrigin; reverseNodeMap.put(node.mCachedAddressAfterUpdate, node); return node; final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents); nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin; nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin; reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray); return nodeArray; } /** Loading Loading @@ -733,10 +733,10 @@ public final class BinaryDictDecoder { // Read header final FileHeader header = readHeader(reader.getBuffer()); Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>(); Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping, reverseGroupMapping, header.mFormatOptions); final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize, reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions); FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); if (null != dict) { Loading Loading @@ -803,8 +803,6 @@ public final class BinaryDictDecoder { /** * Calculate bigram frequency from compressed value * * @see #makeBigramFlags * * @param unigramFrequency * @param bigramFrequency compressed frequency * @return approximate bigram frequency Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java +177 −164 File changed.Preview size limit exceeded, changes collapsed. Show changes java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +21 −20 Original line number Diff line number Diff line Loading @@ -59,7 +59,7 @@ public final class BinaryDictIOUtils { } /** * Tours all node without recursive call. * Retrieves all node arrays without recursive call. */ private static void readUnigramsAndBigramsBinaryInner( final FusionDictionaryBufferInterface buffer, final int headerSize, Loading Loading @@ -116,7 +116,7 @@ public final class BinaryDictIOUtils { if (formatOptions.mSupportsDynamicUpdate) { final int forwardLinkAddress = buffer.readUnsignedInt24(); if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) { // the node has a forward link. // The node array has a forward link. p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT; p.mAddress = forwardLinkAddress; } else { Loading @@ -126,7 +126,7 @@ public final class BinaryDictIOUtils { stack.pop(); } } else { // the node has more groups. // The node array has more groups. p.mAddress = buffer.position(); } Loading @@ -139,14 +139,14 @@ public final class BinaryDictIOUtils { /** * Reads unigrams and bigrams from the binary file. * Doesn't make the memory representation of the dictionary. * Doesn't store a full memory representation of the dictionary. * * @param reader the reader. * @param words the map to store the address as a key and the word as a value. * @param frequencies the map to store the address as a key and the frequency as a value. * @param bigrams the map to store the address as a key and the list of address as a value. * @throws IOException * @throws UnsupportedFormatException * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, Loading @@ -165,8 +165,8 @@ public final class BinaryDictIOUtils { * @param buffer the buffer to read. * @param word the word we search for. * @return the address of the terminal node. * @throws IOException * @throws UnsupportedFormatException * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ @UsedForTesting public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer, Loading Loading @@ -224,9 +224,9 @@ public final class BinaryDictIOUtils { } // If we found the next char group, it is under the file pointer. // But if not, we are at the end of this node so we expect to have // But if not, we are at the end of this node array so we expect to have // a forward link address that we need to consult and possibly resume // search on the next node in the linked list. // search on the next node array in the linked list. if (foundNextCharGroup) break; if (!header.mFormatOptions.mSupportsDynamicUpdate) { return FormatSpec.NOT_VALID_WORD; Loading Loading @@ -365,9 +365,10 @@ public final class BinaryDictIOUtils { } /** * Write a char group to an output stream. * A char group is an in-memory representation of a node in trie. * A char group info is an on-disk representation of a node. * Write a char group to an output stream from a CharGroupInfo. * A char group is an in-memory representation of a node in the patricia trie. * A char group info is a container for low-level information about how the * char group is stored in the binary format. * * @param destination the stream to write. * @param info the char group info to be written. Loading Loading @@ -427,7 +428,7 @@ public final class BinaryDictIOUtils { if (info.mBigrams != null) { // TODO: Consolidate this code with the code that computes the size of the bigram list // in BinaryDictEncoder#computeActualNodeSize // in BinaryDictEncoder#computeActualNodeArraySize for (int i = 0; i < info.mBigrams.size(); ++i) { final int bigramFrequency = info.mBigrams.get(i).mFrequency; Loading Loading @@ -479,14 +480,14 @@ public final class BinaryDictIOUtils { } /** * Write a node to the stream. * Write a node array to the stream. * * @param destination the stream to write. * @param infos groups to be written. * @param infos an array of CharGroupInfo to be written. * @return the size written, in bytes. * @throws IOException */ static int writeNode(final OutputStream destination, final CharGroupInfo[] infos) static int writeNodes(final OutputStream destination, final CharGroupInfo[] infos) throws IOException { int size = getGroupCountSize(infos.length); switch (getGroupCountSize(infos.length)) { Loading Loading @@ -604,12 +605,12 @@ public final class BinaryDictIOUtils { public static int getGroupCountSize(final int count) { if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) { return 1; } else if (FormatSpec.MAX_CHARGROUPS_IN_A_NODE >= count) { } else if (FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY >= count) { return 2; } else { throw new RuntimeException("Can't have more than " + FormatSpec.MAX_CHARGROUPS_IN_A_NODE + " groups in a node (found " + count + ")"); + FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY + " groups in a node (found " + count + ")"); } } Loading java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +45 −42 Original line number Diff line number Diff line Loading @@ -86,7 +86,7 @@ public final class DynamicBinaryDictIOUtils { } final int flags = buffer.readUnsignedByte(); if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) { // if the group is moved, the parent address is stored in the destination group. // If the group is moved, the parent address is stored in the destination group. // We are guaranteed to process the destination group later, so there is no need to // update anything here. buffer.position(originalPosition); Loading @@ -101,10 +101,10 @@ public final class DynamicBinaryDictIOUtils { } /** * Update parent addresses in a Node that is referred to by nodeOriginAddress. * Update parent addresses in a node array stored at nodeOriginAddress. * * @param buffer the buffer to be modified. * @param nodeOriginAddress the address of a modified Node. * @param nodeOriginAddress the address of the node array to update. * @param newParentAddress the address to be written. * @param formatOptions file format options. */ Loading Loading @@ -154,7 +154,7 @@ public final class DynamicBinaryDictIOUtils { */ private static int moveCharGroup(final OutputStream destination, final FusionDictionaryBufferInterface buffer, final CharGroupInfo info, final int nodeOriginAddress, final int oldGroupAddress, final int nodeArrayOriginAddress, final int oldGroupAddress, final FormatOptions formatOptions) throws IOException { updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions); buffer.position(oldGroupAddress); Loading @@ -163,15 +163,16 @@ public final class DynamicBinaryDictIOUtils { buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); int size = FormatSpec.GROUP_FLAGS_SIZE; updateForwardLink(buffer, nodeOriginAddress, buffer.limit(), formatOptions); size += BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { info }); updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions); size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info }); return size; } @SuppressWarnings("unused") private static void updateForwardLink(final FusionDictionaryBufferInterface buffer, final int nodeOriginAddress, final int newNodeAddress, final int nodeArrayOriginAddress, final int newNodeArrayAddress, final FormatOptions formatOptions) { buffer.position(nodeOriginAddress); buffer.position(nodeArrayOriginAddress); int jumpCount = 0; while (jumpCount++ < MAX_JUMPS) { final int count = BinaryDictDecoder.readCharGroupCount(buffer); Loading @@ -179,7 +180,7 @@ public final class DynamicBinaryDictIOUtils { final int forwardLinkAddress = buffer.readUnsignedInt24(); if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE); BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress); BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress); return; } buffer.position(forwardLinkAddress); Loading @@ -190,57 +191,59 @@ public final class DynamicBinaryDictIOUtils { } /** * Move a group that is referred to by oldGroupOrigin to the tail of the file. * And set the children address to the byte after the group. * Move a group that is referred to by oldGroupOrigin to the tail of the file, and set the * children address to the byte after the group * * @param nodeOrigin the address of the tail of the file. * @param characters * @param length * @param flags * @param frequency * @param parentAddress * @param shortcutTargets * @param bigrams * @param fileEndAddress the address of the tail of the file. * @param codePoints the characters to put inside the group. * @param length how many code points to read from codePoints. * @param flags the flags for this group. * @param frequency the frequency of this terminal. * @param parentAddress the address of the parent group of this group. * @param shortcutTargets the shortcut targets for this group. * @param bigrams the bigrams for this group. * @param destination the stream representing the tail of the file. * @param buffer the buffer representing the (constant-size) body of the file. * @param oldNodeOrigin * @param oldGroupOrigin * @param formatOptions * @param oldNodeArrayOrigin the origin of the old node array this group was a part of. * @param oldGroupOrigin the old origin where this group used to be stored. * @param formatOptions format options for this dictionary. * @return the size written, in bytes. * @throws IOException * @throws IOException if the file can't be accessed */ private static int moveGroup(final int nodeOrigin, final int[] characters, final int length, final int flags, final int frequency, final int parentAddress, private static int moveGroup(final int fileEndAddress, final int[] codePoints, final int length, final int flags, final int frequency, final int parentAddress, final ArrayList<WeightedString> shortcutTargets, final ArrayList<PendingAttribute> bigrams, final OutputStream destination, final FusionDictionaryBufferInterface buffer, final int oldNodeOrigin, final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin, final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException { int size = 0; final int newGroupOrigin = nodeOrigin + 1; final int[] writtenCharacters = Arrays.copyOfRange(characters, 0, length); final int newGroupOrigin = fileEndAddress + 1; final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length); final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */, flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS, shortcutTargets, bigrams); size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size, flags, writtenCharacters, frequency, parentAddress, nodeOrigin + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, bigrams); moveCharGroup(destination, buffer, newInfo, oldNodeOrigin, oldGroupOrigin, formatOptions); moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin, formatOptions); return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } /** * Insert a word into a binary dictionary. * * @param buffer * @param destination * @param word * @param frequency * @param bigramStrings * @param shortcuts * @throws IOException * @throws UnsupportedFormatException * @param buffer the buffer containing the existing dictionary. * @param destination a stream to the underlying file, with the pointer at the end of the file. * @param word the word to insert. * @param frequency the frequency of the new word. * @param bigramStrings bigram list, or null if none. * @param shortcuts shortcut list, or null if none. * @param isBlackListEntry whether this should be a blacklist entry. * @throws IOException if the file can't be accessed. * @throws UnsupportedFormatException if the existing dictionary is in an unexpected format. */ // TODO: Support batch insertion. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. Loading Loading @@ -323,7 +326,7 @@ public final class DynamicBinaryDictIOUtils { currentInfo.mFlags, characters2, currentInfo.mFrequency, newNodeAddress + 1, currentInfo.mChildrenAddress, currentInfo.mShortcutTargets, currentInfo.mBigrams); BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo2 }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo2 }); return; } else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) { if (p > 0) { Loading Loading @@ -386,7 +389,7 @@ public final class DynamicBinaryDictIOUtils { newNodeAddress + written, -1 /* endAddress */, flags, newCharacters, frequency, newNodeAddress + 1, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); BinaryDictIOUtils.writeNode(destination, BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { suffixInfo, newInfo }); return; } Loading Loading @@ -438,7 +441,7 @@ public final class DynamicBinaryDictIOUtils { final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags, characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo }); return; } buffer.position(currentInfo.mChildrenAddress); Loading Loading @@ -482,7 +485,7 @@ public final class DynamicBinaryDictIOUtils { final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, -1 /* endAddress */, flags, characters, frequency, nodeParentAddress, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[]{ newInfo }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[]{ newInfo }); return; } else { depth--; Loading Loading
java/src/com/android/inputmethod/latin/DictionaryWriter.java +2 −2 Original line number Diff line number Diff line Loading @@ -23,7 +23,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.makedict.BinaryDictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.utils.CollectionUtils; Loading Loading @@ -51,7 +51,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter { @Override public void clear() { final HashMap<String, String> attributes = CollectionUtils.newHashMap(); mFusionDictionary = new FusionDictionary(new Node(), mFusionDictionary = new FusionDictionary(new PtNodeArray(), new FusionDictionary.DictionaryOptions(attributes, false, false)); } Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java +28 −30 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.JniUtils; Loading Loading @@ -548,31 +548,31 @@ public final class BinaryDictDecoder { } /** * Reads a single node from a buffer. * Reads a single node array from a buffer. * * This methods reads the file at the current position. A node is fully expected to start at * the current position. * This will recursively read other nodes into the structure, populating the reverse * This methods reads the file at the current position. A node array is fully expected to start * at the current position. * This will recursively read other node arrays into the structure, populating the reverse * maps on the fly and using them to keep track of already read nodes. * * @param buffer the buffer, correctly positioned at the start of a node. * @param buffer the buffer, correctly positioned at the start of a node array. * @param headerSize the size, in bytes, of the file header. * @param reverseNodeMap a mapping from addresses to already read nodes. * @param reverseNodeArrayMap a mapping from addresses to already read node arrays. * @param reverseGroupMap a mapping from addresses to already read character groups. * @param options file format options. * @return the read node with all his children already read. * @return the read node array with all his children already read. */ private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize, final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options) private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer, final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap, final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options) throws IOException { final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>(); final int nodeOrigin = buffer.position() - headerSize; final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>(); final int nodeArrayOrigin = buffer.position() - headerSize; do { // Scan the linked-list node. final int nodeHeadPosition = buffer.position() - headerSize; final int nodeArrayHeadPosition = buffer.position() - headerSize; final int count = readCharGroupCount(buffer); int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); for (int i = count; i > 0; --i) { // Scan the array of CharGroup. CharGroupInfo info = readCharGroup(buffer, groupOffset, options); if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; Loading @@ -589,21 +589,21 @@ public final class BinaryDictDecoder { } } if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { Node children = reverseNodeMap.get(info.mChildrenAddress); PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress); if (null == children) { final int currentPosition = buffer.position(); buffer.position(info.mChildrenAddress + headerSize); children = readNode( buffer, headerSize, reverseNodeMap, reverseGroupMap, options); children = readNodeArray( buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options); buffer.position(currentPosition); } nodeContents.add( nodeArrayContents.add( new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); } else { nodeContents.add( nodeArrayContents.add( new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), Loading @@ -624,11 +624,11 @@ public final class BinaryDictDecoder { } while (options.mSupportsDynamicUpdate && buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); final Node node = new Node(nodeContents); node.mCachedAddressBeforeUpdate = nodeOrigin; node.mCachedAddressAfterUpdate = nodeOrigin; reverseNodeMap.put(node.mCachedAddressAfterUpdate, node); return node; final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents); nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin; nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin; reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray); return nodeArray; } /** Loading Loading @@ -733,10 +733,10 @@ public final class BinaryDictDecoder { // Read header final FileHeader header = readHeader(reader.getBuffer()); Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>(); Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping, reverseGroupMapping, header.mFormatOptions); final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize, reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions); FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); if (null != dict) { Loading Loading @@ -803,8 +803,6 @@ public final class BinaryDictDecoder { /** * Calculate bigram frequency from compressed value * * @see #makeBigramFlags * * @param unigramFrequency * @param bigramFrequency compressed frequency * @return approximate bigram frequency Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java +177 −164 File changed.Preview size limit exceeded, changes collapsed. Show changes
java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +21 −20 Original line number Diff line number Diff line Loading @@ -59,7 +59,7 @@ public final class BinaryDictIOUtils { } /** * Tours all node without recursive call. * Retrieves all node arrays without recursive call. */ private static void readUnigramsAndBigramsBinaryInner( final FusionDictionaryBufferInterface buffer, final int headerSize, Loading Loading @@ -116,7 +116,7 @@ public final class BinaryDictIOUtils { if (formatOptions.mSupportsDynamicUpdate) { final int forwardLinkAddress = buffer.readUnsignedInt24(); if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) { // the node has a forward link. // The node array has a forward link. p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT; p.mAddress = forwardLinkAddress; } else { Loading @@ -126,7 +126,7 @@ public final class BinaryDictIOUtils { stack.pop(); } } else { // the node has more groups. // The node array has more groups. p.mAddress = buffer.position(); } Loading @@ -139,14 +139,14 @@ public final class BinaryDictIOUtils { /** * Reads unigrams and bigrams from the binary file. * Doesn't make the memory representation of the dictionary. * Doesn't store a full memory representation of the dictionary. * * @param reader the reader. * @param words the map to store the address as a key and the word as a value. * @param frequencies the map to store the address as a key and the frequency as a value. * @param bigrams the map to store the address as a key and the list of address as a value. * @throws IOException * @throws UnsupportedFormatException * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, Loading @@ -165,8 +165,8 @@ public final class BinaryDictIOUtils { * @param buffer the buffer to read. * @param word the word we search for. * @return the address of the terminal node. * @throws IOException * @throws UnsupportedFormatException * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ @UsedForTesting public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer, Loading Loading @@ -224,9 +224,9 @@ public final class BinaryDictIOUtils { } // If we found the next char group, it is under the file pointer. // But if not, we are at the end of this node so we expect to have // But if not, we are at the end of this node array so we expect to have // a forward link address that we need to consult and possibly resume // search on the next node in the linked list. // search on the next node array in the linked list. if (foundNextCharGroup) break; if (!header.mFormatOptions.mSupportsDynamicUpdate) { return FormatSpec.NOT_VALID_WORD; Loading Loading @@ -365,9 +365,10 @@ public final class BinaryDictIOUtils { } /** * Write a char group to an output stream. * A char group is an in-memory representation of a node in trie. * A char group info is an on-disk representation of a node. * Write a char group to an output stream from a CharGroupInfo. * A char group is an in-memory representation of a node in the patricia trie. * A char group info is a container for low-level information about how the * char group is stored in the binary format. * * @param destination the stream to write. * @param info the char group info to be written. Loading Loading @@ -427,7 +428,7 @@ public final class BinaryDictIOUtils { if (info.mBigrams != null) { // TODO: Consolidate this code with the code that computes the size of the bigram list // in BinaryDictEncoder#computeActualNodeSize // in BinaryDictEncoder#computeActualNodeArraySize for (int i = 0; i < info.mBigrams.size(); ++i) { final int bigramFrequency = info.mBigrams.get(i).mFrequency; Loading Loading @@ -479,14 +480,14 @@ public final class BinaryDictIOUtils { } /** * Write a node to the stream. * Write a node array to the stream. * * @param destination the stream to write. * @param infos groups to be written. * @param infos an array of CharGroupInfo to be written. * @return the size written, in bytes. * @throws IOException */ static int writeNode(final OutputStream destination, final CharGroupInfo[] infos) static int writeNodes(final OutputStream destination, final CharGroupInfo[] infos) throws IOException { int size = getGroupCountSize(infos.length); switch (getGroupCountSize(infos.length)) { Loading Loading @@ -604,12 +605,12 @@ public final class BinaryDictIOUtils { public static int getGroupCountSize(final int count) { if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) { return 1; } else if (FormatSpec.MAX_CHARGROUPS_IN_A_NODE >= count) { } else if (FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY >= count) { return 2; } else { throw new RuntimeException("Can't have more than " + FormatSpec.MAX_CHARGROUPS_IN_A_NODE + " groups in a node (found " + count + ")"); + FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY + " groups in a node (found " + count + ")"); } } Loading
java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +45 −42 Original line number Diff line number Diff line Loading @@ -86,7 +86,7 @@ public final class DynamicBinaryDictIOUtils { } final int flags = buffer.readUnsignedByte(); if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) { // if the group is moved, the parent address is stored in the destination group. // If the group is moved, the parent address is stored in the destination group. // We are guaranteed to process the destination group later, so there is no need to // update anything here. buffer.position(originalPosition); Loading @@ -101,10 +101,10 @@ public final class DynamicBinaryDictIOUtils { } /** * Update parent addresses in a Node that is referred to by nodeOriginAddress. * Update parent addresses in a node array stored at nodeOriginAddress. * * @param buffer the buffer to be modified. * @param nodeOriginAddress the address of a modified Node. * @param nodeOriginAddress the address of the node array to update. * @param newParentAddress the address to be written. * @param formatOptions file format options. */ Loading Loading @@ -154,7 +154,7 @@ public final class DynamicBinaryDictIOUtils { */ private static int moveCharGroup(final OutputStream destination, final FusionDictionaryBufferInterface buffer, final CharGroupInfo info, final int nodeOriginAddress, final int oldGroupAddress, final int nodeArrayOriginAddress, final int oldGroupAddress, final FormatOptions formatOptions) throws IOException { updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions); buffer.position(oldGroupAddress); Loading @@ -163,15 +163,16 @@ public final class DynamicBinaryDictIOUtils { buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); int size = FormatSpec.GROUP_FLAGS_SIZE; updateForwardLink(buffer, nodeOriginAddress, buffer.limit(), formatOptions); size += BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { info }); updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions); size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info }); return size; } @SuppressWarnings("unused") private static void updateForwardLink(final FusionDictionaryBufferInterface buffer, final int nodeOriginAddress, final int newNodeAddress, final int nodeArrayOriginAddress, final int newNodeArrayAddress, final FormatOptions formatOptions) { buffer.position(nodeOriginAddress); buffer.position(nodeArrayOriginAddress); int jumpCount = 0; while (jumpCount++ < MAX_JUMPS) { final int count = BinaryDictDecoder.readCharGroupCount(buffer); Loading @@ -179,7 +180,7 @@ public final class DynamicBinaryDictIOUtils { final int forwardLinkAddress = buffer.readUnsignedInt24(); if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE); BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress); BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress); return; } buffer.position(forwardLinkAddress); Loading @@ -190,57 +191,59 @@ public final class DynamicBinaryDictIOUtils { } /** * Move a group that is referred to by oldGroupOrigin to the tail of the file. * And set the children address to the byte after the group. * Move a group that is referred to by oldGroupOrigin to the tail of the file, and set the * children address to the byte after the group * * @param nodeOrigin the address of the tail of the file. * @param characters * @param length * @param flags * @param frequency * @param parentAddress * @param shortcutTargets * @param bigrams * @param fileEndAddress the address of the tail of the file. * @param codePoints the characters to put inside the group. * @param length how many code points to read from codePoints. * @param flags the flags for this group. * @param frequency the frequency of this terminal. * @param parentAddress the address of the parent group of this group. * @param shortcutTargets the shortcut targets for this group. * @param bigrams the bigrams for this group. * @param destination the stream representing the tail of the file. * @param buffer the buffer representing the (constant-size) body of the file. * @param oldNodeOrigin * @param oldGroupOrigin * @param formatOptions * @param oldNodeArrayOrigin the origin of the old node array this group was a part of. * @param oldGroupOrigin the old origin where this group used to be stored. * @param formatOptions format options for this dictionary. * @return the size written, in bytes. * @throws IOException * @throws IOException if the file can't be accessed */ private static int moveGroup(final int nodeOrigin, final int[] characters, final int length, final int flags, final int frequency, final int parentAddress, private static int moveGroup(final int fileEndAddress, final int[] codePoints, final int length, final int flags, final int frequency, final int parentAddress, final ArrayList<WeightedString> shortcutTargets, final ArrayList<PendingAttribute> bigrams, final OutputStream destination, final FusionDictionaryBufferInterface buffer, final int oldNodeOrigin, final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin, final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException { int size = 0; final int newGroupOrigin = nodeOrigin + 1; final int[] writtenCharacters = Arrays.copyOfRange(characters, 0, length); final int newGroupOrigin = fileEndAddress + 1; final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length); final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */, flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS, shortcutTargets, bigrams); size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size, flags, writtenCharacters, frequency, parentAddress, nodeOrigin + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, bigrams); moveCharGroup(destination, buffer, newInfo, oldNodeOrigin, oldGroupOrigin, formatOptions); moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin, formatOptions); return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } /** * Insert a word into a binary dictionary. * * @param buffer * @param destination * @param word * @param frequency * @param bigramStrings * @param shortcuts * @throws IOException * @throws UnsupportedFormatException * @param buffer the buffer containing the existing dictionary. * @param destination a stream to the underlying file, with the pointer at the end of the file. * @param word the word to insert. * @param frequency the frequency of the new word. * @param bigramStrings bigram list, or null if none. * @param shortcuts shortcut list, or null if none. * @param isBlackListEntry whether this should be a blacklist entry. * @throws IOException if the file can't be accessed. * @throws UnsupportedFormatException if the existing dictionary is in an unexpected format. */ // TODO: Support batch insertion. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. Loading Loading @@ -323,7 +326,7 @@ public final class DynamicBinaryDictIOUtils { currentInfo.mFlags, characters2, currentInfo.mFrequency, newNodeAddress + 1, currentInfo.mChildrenAddress, currentInfo.mShortcutTargets, currentInfo.mBigrams); BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo2 }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo2 }); return; } else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) { if (p > 0) { Loading Loading @@ -386,7 +389,7 @@ public final class DynamicBinaryDictIOUtils { newNodeAddress + written, -1 /* endAddress */, flags, newCharacters, frequency, newNodeAddress + 1, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); BinaryDictIOUtils.writeNode(destination, BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { suffixInfo, newInfo }); return; } Loading Loading @@ -438,7 +441,7 @@ public final class DynamicBinaryDictIOUtils { final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags, characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo }); return; } buffer.position(currentInfo.mChildrenAddress); Loading Loading @@ -482,7 +485,7 @@ public final class DynamicBinaryDictIOUtils { final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, -1 /* endAddress */, flags, characters, frequency, nodeParentAddress, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[]{ newInfo }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[]{ newInfo }); return; } else { depth--; Loading