Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit af30cbf0 authored by Jean Chalard's avatar Jean Chalard
Browse files

Rename Node to PtNodeArray

Bug: 10247660
Change-Id: I1a0ac19f58f96adb5efac5fd35c6404831618c99
parent a440aa39
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -23,7 +23,7 @@ import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import com.android.inputmethod.latin.utils.CollectionUtils;
@@ -51,7 +51,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter {
    @Override
    public void clear() {
        final HashMap<String, String> attributes = CollectionUtils.newHashMap();
        mFusionDictionary = new FusionDictionary(new Node(),
        mFusionDictionary = new FusionDictionary(new PtNodeArray(),
                new FusionDictionary.DictionaryOptions(attributes, false, false));
    }

+28 −30
Original line number Diff line number Diff line
@@ -20,7 +20,7 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.JniUtils;

@@ -548,31 +548,31 @@ public final class BinaryDictDecoder {
    }

    /**
     * Reads a single node from a buffer.
     * Reads a single node array from a buffer.
     *
     * This methods reads the file at the current position. A node is fully expected to start at
     * the current position.
     * This will recursively read other nodes into the structure, populating the reverse
     * This methods reads the file at the current position. A node array is fully expected to start
     * at the current position.
     * This will recursively read other node arrays into the structure, populating the reverse
     * maps on the fly and using them to keep track of already read nodes.
     *
     * @param buffer the buffer, correctly positioned at the start of a node.
     * @param buffer the buffer, correctly positioned at the start of a node array.
     * @param headerSize the size, in bytes, of the file header.
     * @param reverseNodeMap a mapping from addresses to already read nodes.
     * @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
     * @param reverseGroupMap a mapping from addresses to already read character groups.
     * @param options file format options.
     * @return the read node with all his children already read.
     * @return the read node array with all his children already read.
     */
    private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize,
            final Map<Integer, Node> reverseNodeMap, final Map<Integer, CharGroup> reverseGroupMap,
            final FormatOptions options)
    private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer,
            final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
            final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
            throws IOException {
        final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>();
        final int nodeOrigin = buffer.position() - headerSize;
        final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
        final int nodeArrayOrigin = buffer.position() - headerSize;

        do { // Scan the linked-list node.
            final int nodeHeadPosition = buffer.position() - headerSize;
            final int nodeArrayHeadPosition = buffer.position() - headerSize;
            final int count = readCharGroupCount(buffer);
            int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
            int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
            for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
                CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
                if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
@@ -589,21 +589,21 @@ public final class BinaryDictDecoder {
                    }
                }
                if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
                    Node children = reverseNodeMap.get(info.mChildrenAddress);
                    PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
                    if (null == children) {
                        final int currentPosition = buffer.position();
                        buffer.position(info.mChildrenAddress + headerSize);
                        children = readNode(
                                buffer, headerSize, reverseNodeMap, reverseGroupMap, options);
                        children = readNodeArray(
                                buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options);
                        buffer.position(currentPosition);
                    }
                    nodeContents.add(
                    nodeArrayContents.add(
                            new CharGroup(info.mCharacters, shortcutTargets, bigrams,
                                    info.mFrequency,
                                    0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
                                    0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
                } else {
                    nodeContents.add(
                    nodeArrayContents.add(
                            new CharGroup(info.mCharacters, shortcutTargets, bigrams,
                                    info.mFrequency,
                                    0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
@@ -624,11 +624,11 @@ public final class BinaryDictDecoder {
        } while (options.mSupportsDynamicUpdate &&
                buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);

        final Node node = new Node(nodeContents);
        node.mCachedAddressBeforeUpdate = nodeOrigin;
        node.mCachedAddressAfterUpdate = nodeOrigin;
        reverseNodeMap.put(node.mCachedAddressAfterUpdate, node);
        return node;
        final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
        nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
        nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
        reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
        return nodeArray;
    }

    /**
@@ -733,10 +733,10 @@ public final class BinaryDictDecoder {
        // Read header
        final FileHeader header = readHeader(reader.getBuffer());

        Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
        Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
        Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
        final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping,
                reverseGroupMapping, header.mFormatOptions);
        final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize,
                reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions);

        FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
        if (null != dict) {
@@ -803,8 +803,6 @@ public final class BinaryDictDecoder {
    /**
     * Calculate bigram frequency from compressed value
     *
     * @see #makeBigramFlags
     *
     * @param unigramFrequency
     * @param bigramFrequency compressed frequency
     * @return approximate bigram frequency
+177 −164

File changed.

Preview size limit exceeded, changes collapsed.

+21 −20
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ public final class BinaryDictIOUtils {
    }

    /**
     * Tours all node without recursive call.
     * Retrieves all node arrays without recursive call.
     */
    private static void readUnigramsAndBigramsBinaryInner(
            final FusionDictionaryBufferInterface buffer, final int headerSize,
@@ -116,7 +116,7 @@ public final class BinaryDictIOUtils {
                if (formatOptions.mSupportsDynamicUpdate) {
                    final int forwardLinkAddress = buffer.readUnsignedInt24();
                    if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
                        // the node has a forward link.
                        // The node array has a forward link.
                        p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
                        p.mAddress = forwardLinkAddress;
                    } else {
@@ -126,7 +126,7 @@ public final class BinaryDictIOUtils {
                    stack.pop();
                }
            } else {
                // the node has more groups.
                // The node array has more groups.
                p.mAddress = buffer.position();
            }

@@ -139,14 +139,14 @@ public final class BinaryDictIOUtils {

    /**
     * Reads unigrams and bigrams from the binary file.
     * Doesn't make the memory representation of the dictionary.
     * Doesn't store a full memory representation of the dictionary.
     *
     * @param reader the reader.
     * @param words the map to store the address as a key and the word as a value.
     * @param frequencies the map to store the address as a key and the frequency as a value.
     * @param bigrams the map to store the address as a key and the list of address as a value.
     * @throws IOException
     * @throws UnsupportedFormatException
     * @throws IOException if the file can't be read.
     * @throws UnsupportedFormatException if the format of the file is not recognized.
     */
    public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader,
            final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
@@ -165,8 +165,8 @@ public final class BinaryDictIOUtils {
     * @param buffer the buffer to read.
     * @param word the word we search for.
     * @return the address of the terminal node.
     * @throws IOException
     * @throws UnsupportedFormatException
     * @throws IOException if the file can't be read.
     * @throws UnsupportedFormatException if the format of the file is not recognized.
     */
    @UsedForTesting
    public static int getTerminalPosition(final FusionDictionaryBufferInterface buffer,
@@ -224,9 +224,9 @@ public final class BinaryDictIOUtils {
                }

                // If we found the next char group, it is under the file pointer.
                // But if not, we are at the end of this node so we expect to have
                // But if not, we are at the end of this node array so we expect to have
                // a forward link address that we need to consult and possibly resume
                // search on the next node in the linked list.
                // search on the next node array in the linked list.
                if (foundNextCharGroup) break;
                if (!header.mFormatOptions.mSupportsDynamicUpdate) {
                    return FormatSpec.NOT_VALID_WORD;
@@ -365,9 +365,10 @@ public final class BinaryDictIOUtils {
    }

    /**
     * Write a char group to an output stream.
     * A char group is an in-memory representation of a node in trie.
     * A char group info is an on-disk representation of a node.
     * Write a char group to an output stream from a CharGroupInfo.
     * A char group is an in-memory representation of a node in the patricia trie.
     * A char group info is a container for low-level information about how the
     * char group is stored in the binary format.
     *
     * @param destination the stream to write.
     * @param info the char group info to be written.
@@ -427,7 +428,7 @@ public final class BinaryDictIOUtils {

        if (info.mBigrams != null) {
            // TODO: Consolidate this code with the code that computes the size of the bigram list
            //        in BinaryDictEncoder#computeActualNodeSize
            //        in BinaryDictEncoder#computeActualNodeArraySize
            for (int i = 0; i < info.mBigrams.size(); ++i) {

                final int bigramFrequency = info.mBigrams.get(i).mFrequency;
@@ -479,14 +480,14 @@ public final class BinaryDictIOUtils {
    }

    /**
     * Write a node to the stream.
     * Write a node array to the stream.
     *
     * @param destination the stream to write.
     * @param infos groups to be written.
     * @param infos an array of CharGroupInfo to be written.
     * @return the size written, in bytes.
     * @throws IOException
     */
    static int writeNode(final OutputStream destination, final CharGroupInfo[] infos)
    static int writeNodes(final OutputStream destination, final CharGroupInfo[] infos)
            throws IOException {
        int size = getGroupCountSize(infos.length);
        switch (getGroupCountSize(infos.length)) {
@@ -604,12 +605,12 @@ public final class BinaryDictIOUtils {
    public static int getGroupCountSize(final int count) {
        if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
            return 1;
        } else if (FormatSpec.MAX_CHARGROUPS_IN_A_NODE >= count) {
        } else if (FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY >= count) {
            return 2;
        } else {
            throw new RuntimeException("Can't have more than "
                    + FormatSpec.MAX_CHARGROUPS_IN_A_NODE + " groups in a node (found " + count
                    + ")");
                    + FormatSpec.MAX_CHARGROUPS_IN_A_PT_NODE_ARRAY + " groups in a node (found "
                    + count + ")");
        }
    }

+45 −42
Original line number Diff line number Diff line
@@ -86,7 +86,7 @@ public final class DynamicBinaryDictIOUtils {
        }
        final int flags = buffer.readUnsignedByte();
        if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
            // if the group is moved, the parent address is stored in the destination group.
            // If the group is moved, the parent address is stored in the destination group.
            // We are guaranteed to process the destination group later, so there is no need to
            // update anything here.
            buffer.position(originalPosition);
@@ -101,10 +101,10 @@ public final class DynamicBinaryDictIOUtils {
    }

    /**
     * Update parent addresses in a Node that is referred to by nodeOriginAddress.
     * Update parent addresses in a node array stored at nodeOriginAddress.
     *
     * @param buffer the buffer to be modified.
     * @param nodeOriginAddress the address of a modified Node.
     * @param nodeOriginAddress the address of the node array to update.
     * @param newParentAddress the address to be written.
     * @param formatOptions file format options.
     */
@@ -154,7 +154,7 @@ public final class DynamicBinaryDictIOUtils {
     */
    private static int moveCharGroup(final OutputStream destination,
            final FusionDictionaryBufferInterface buffer, final CharGroupInfo info,
            final int nodeOriginAddress, final int oldGroupAddress,
            final int nodeArrayOriginAddress, final int oldGroupAddress,
            final FormatOptions formatOptions) throws IOException {
        updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions);
        buffer.position(oldGroupAddress);
@@ -163,15 +163,16 @@ public final class DynamicBinaryDictIOUtils {
        buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
                & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
        int size = FormatSpec.GROUP_FLAGS_SIZE;
        updateForwardLink(buffer, nodeOriginAddress, buffer.limit(), formatOptions);
        size += BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { info });
        updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions);
        size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
        return size;
    }

    @SuppressWarnings("unused")
    private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
            final int nodeOriginAddress, final int newNodeAddress,
            final int nodeArrayOriginAddress, final int newNodeArrayAddress,
            final FormatOptions formatOptions) {
        buffer.position(nodeOriginAddress);
        buffer.position(nodeArrayOriginAddress);
        int jumpCount = 0;
        while (jumpCount++ < MAX_JUMPS) {
            final int count = BinaryDictDecoder.readCharGroupCount(buffer);
@@ -179,7 +180,7 @@ public final class DynamicBinaryDictIOUtils {
            final int forwardLinkAddress = buffer.readUnsignedInt24();
            if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
                buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
                BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress);
                BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress);
                return;
            }
            buffer.position(forwardLinkAddress);
@@ -190,57 +191,59 @@ public final class DynamicBinaryDictIOUtils {
    }

    /**
     * Move a group that is referred to by oldGroupOrigin to the tail of the file.
     * And set the children address to the byte after the group.
     * Move a group that is referred to by oldGroupOrigin to the tail of the file, and set the
     * children address to the byte after the group
     *
     * @param nodeOrigin the address of the tail of the file.
     * @param characters
     * @param length
     * @param flags
     * @param frequency
     * @param parentAddress
     * @param shortcutTargets
     * @param bigrams
     * @param fileEndAddress the address of the tail of the file.
     * @param codePoints the characters to put inside the group.
     * @param length how many code points to read from codePoints.
     * @param flags the flags for this group.
     * @param frequency the frequency of this terminal.
     * @param parentAddress the address of the parent group of this group.
     * @param shortcutTargets the shortcut targets for this group.
     * @param bigrams the bigrams for this group.
     * @param destination the stream representing the tail of the file.
     * @param buffer the buffer representing the (constant-size) body of the file.
     * @param oldNodeOrigin
     * @param oldGroupOrigin
     * @param formatOptions
     * @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
     * @param oldGroupOrigin the old origin where this group used to be stored.
     * @param formatOptions format options for this dictionary.
     * @return the size written, in bytes.
     * @throws IOException
     * @throws IOException if the file can't be accessed
     */
    private static int moveGroup(final int nodeOrigin, final int[] characters, final int length,
            final int flags, final int frequency, final int parentAddress,
    private static int moveGroup(final int fileEndAddress, final int[] codePoints,
            final int length, final int flags, final int frequency, final int parentAddress,
            final ArrayList<WeightedString> shortcutTargets,
            final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
            final FusionDictionaryBufferInterface buffer, final int oldNodeOrigin,
            final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin,
            final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
        int size = 0;
        final int newGroupOrigin = nodeOrigin + 1;
        final int[] writtenCharacters = Arrays.copyOfRange(characters, 0, length);
        final int newGroupOrigin = fileEndAddress + 1;
        final int[] writtenCharacters = Arrays.copyOfRange(codePoints, 0, length);
        final CharGroupInfo tmpInfo = new CharGroupInfo(newGroupOrigin, -1 /* endAddress */,
                flags, writtenCharacters, frequency, parentAddress, FormatSpec.NO_CHILDREN_ADDRESS,
                shortcutTargets, bigrams);
        size = BinaryDictIOUtils.computeGroupSize(tmpInfo, formatOptions);
        final CharGroupInfo newInfo = new CharGroupInfo(newGroupOrigin, newGroupOrigin + size,
                flags, writtenCharacters, frequency, parentAddress,
                nodeOrigin + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
                fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
                bigrams);
        moveCharGroup(destination, buffer, newInfo, oldNodeOrigin, oldGroupOrigin, formatOptions);
        moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
                formatOptions);
        return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
    }

    /**
     * Insert a word into a binary dictionary.
     *
     * @param buffer
     * @param destination
     * @param word
     * @param frequency
     * @param bigramStrings
     * @param shortcuts
     * @throws IOException
     * @throws UnsupportedFormatException
     * @param buffer the buffer containing the existing dictionary.
     * @param destination a stream to the underlying file, with the pointer at the end of the file.
     * @param word the word to insert.
     * @param frequency the frequency of the new word.
     * @param bigramStrings bigram list, or null if none.
     * @param shortcuts shortcut list, or null if none.
     * @param isBlackListEntry whether this should be a blacklist entry.
     * @throws IOException if the file can't be accessed.
     * @throws UnsupportedFormatException if the existing dictionary is in an unexpected format.
     */
    // TODO: Support batch insertion.
    // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@@ -323,7 +326,7 @@ public final class DynamicBinaryDictIOUtils {
                                currentInfo.mFlags, characters2, currentInfo.mFrequency,
                                newNodeAddress + 1, currentInfo.mChildrenAddress,
                                currentInfo.mShortcutTargets, currentInfo.mBigrams);
                        BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo2 });
                        BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo2 });
                        return;
                    } else if (codePoints[wordPos + p] != currentInfo.mCharacters[p]) {
                        if (p > 0) {
@@ -386,7 +389,7 @@ public final class DynamicBinaryDictIOUtils {
                                    newNodeAddress + written, -1 /* endAddress */, flags,
                                    newCharacters, frequency, newNodeAddress + 1,
                                    FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
                            BinaryDictIOUtils.writeNode(destination,
                            BinaryDictIOUtils.writeNodes(destination,
                                    new CharGroupInfo[] { suffixInfo, newInfo });
                            return;
                        }
@@ -438,7 +441,7 @@ public final class DynamicBinaryDictIOUtils {
                        final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags,
                                characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
                                shortcuts, bigrams);
                        BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[] { newInfo });
                        BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
                        return;
                    }
                    buffer.position(currentInfo.mChildrenAddress);
@@ -482,7 +485,7 @@ public final class DynamicBinaryDictIOUtils {
                final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
                        -1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
                        FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
                BinaryDictIOUtils.writeNode(destination, new CharGroupInfo[]{ newInfo });
                BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[]{ newInfo });
                return;
            } else {
                depth--;
Loading