Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 91cbe356 authored by Jean Chalard's avatar Jean Chalard
Browse files

[FD2] Separate cached address before/after update for nodes.

Bug: 8526576
Change-Id: Ib9f8594a9e12dc75eba296faff2612c4bd7483d3
parent 62f3b4e9
Loading
Loading
Loading
Loading
+59 −37
Original line number Original line Diff line number Diff line
@@ -384,12 +384,13 @@ public final class BinaryDictInputOutput {


    /**
    /**
     * Compute the maximum size of a node, assuming 3-byte addresses for everything, and caches
     * Compute the maximum size of a node, assuming 3-byte addresses for everything, and caches
     * it in the 'actualSize' member of the node.
     * it in the 'actualSize' member of the node, then returns it.
     *
     *
     * @param node the node to compute the maximum size of.
     * @param node the node to compute the maximum size of.
     * @param options file format options.
     * @param options file format options.
     * @return the size of the node.
     */
     */
    private static void setNodeMaximumSize(final Node node, final FormatOptions options) {
    private static int calculateNodeMaximumSize(final Node node, final FormatOptions options) {
        int size = getGroupCountSize(node);
        int size = getGroupCountSize(node);
        for (CharGroup g : node.mData) {
        for (CharGroup g : node.mData) {
            final int groupSize = getCharGroupMaximumSize(g, options);
            final int groupSize = getCharGroupMaximumSize(g, options);
@@ -400,6 +401,7 @@ public final class BinaryDictInputOutput {
            size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
            size += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
        }
        }
        node.mCachedSize = size;
        node.mCachedSize = size;
        return size;
    }
    }


    /**
    /**
@@ -548,17 +550,17 @@ public final class BinaryDictInputOutput {
        boolean changed = false;
        boolean changed = false;
        int size = getGroupCountSize(node);
        int size = getGroupCountSize(node);
        for (CharGroup group : node.mData) {
        for (CharGroup group : node.mData) {
            if (group.mCachedAddress != node.mCachedAddress + size) {
            if (group.mCachedAddress != node.mCachedAddressBeforeUpdate + size) {
                changed = true;
                changed = true;
                group.mCachedAddress = node.mCachedAddress + size;
                group.mCachedAddress = node.mCachedAddressBeforeUpdate + size;
            }
            }
            int groupSize = getGroupHeaderSize(group, formatOptions);
            int groupSize = getGroupHeaderSize(group, formatOptions);
            if (group.isTerminal()) groupSize += FormatSpec.GROUP_FREQUENCY_SIZE;
            if (group.isTerminal()) groupSize += FormatSpec.GROUP_FREQUENCY_SIZE;
            if (null == group.mChildren && formatOptions.mSupportsDynamicUpdate) {
            if (null == group.mChildren && formatOptions.mSupportsDynamicUpdate) {
                groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
                groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
            } else if (null != group.mChildren) {
            } else if (null != group.mChildren) {
                final int offsetBasePoint = groupSize + node.mCachedAddress + size;
                final int offsetBasePoint = groupSize + node.mCachedAddressBeforeUpdate + size;
                final int offset = group.mChildren.mCachedAddress - offsetBasePoint;
                final int offset = group.mChildren.mCachedAddressBeforeUpdate - offsetBasePoint;
                if (formatOptions.mSupportsDynamicUpdate) {
                if (formatOptions.mSupportsDynamicUpdate) {
                    groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
                    groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
                } else {
                } else {
@@ -568,7 +570,7 @@ public final class BinaryDictInputOutput {
            groupSize += getShortcutListSize(group.mShortcutTargets);
            groupSize += getShortcutListSize(group.mShortcutTargets);
            if (null != group.mBigrams) {
            if (null != group.mBigrams) {
                for (WeightedString bigram : group.mBigrams) {
                for (WeightedString bigram : group.mBigrams) {
                    final int offsetBasePoint = groupSize + node.mCachedAddress + size
                    final int offsetBasePoint = groupSize + node.mCachedAddressBeforeUpdate + size
                            + FormatSpec.GROUP_FLAGS_SIZE;
                            + FormatSpec.GROUP_FLAGS_SIZE;
                    final int addressOfBigram = findAddressOfWord(dict, bigram.mWord);
                    final int addressOfBigram = findAddressOfWord(dict, bigram.mWord);
                    final int offset = addressOfBigram - offsetBasePoint;
                    final int offset = addressOfBigram - offsetBasePoint;
@@ -595,11 +597,13 @@ public final class BinaryDictInputOutput {
     * @param formatOptions file format options.
     * @param formatOptions file format options.
     * @return the byte size of the entire stack.
     * @return the byte size of the entire stack.
     */
     */
    // TODO: rename this method when all it does is fill back the cached addresses before update
    // with cached addresses after update.
    private static int stackNodes(final ArrayList<Node> flatNodes,
    private static int stackNodes(final ArrayList<Node> flatNodes,
            final FormatOptions formatOptions) {
            final FormatOptions formatOptions) {
        int nodeOffset = 0;
        int nodeOffset = 0;
        for (Node n : flatNodes) {
        for (final Node n : flatNodes) {
            n.mCachedAddress = nodeOffset;
            n.mCachedAddressBeforeUpdate = n.mCachedAddressAfterUpdate;
            int groupCountSize = getGroupCountSize(n);
            int groupCountSize = getGroupCountSize(n);
            int groupOffset = 0;
            int groupOffset = 0;
            for (CharGroup g : n.mData) {
            for (CharGroup g : n.mData) {
@@ -612,6 +616,10 @@ public final class BinaryDictInputOutput {
            if (nodeSize != n.mCachedSize) {
            if (nodeSize != n.mCachedSize) {
                throw new RuntimeException("Bug : Stored and computed node size differ");
                throw new RuntimeException("Bug : Stored and computed node size differ");
            }
            }
            if (nodeOffset != n.mCachedAddressAfterUpdate) {
                // TODO: remove this test when the code is well tested
                throw new RuntimeException("Bug : Stored and computed node address differ");
            }
            nodeOffset += n.mCachedSize;
            nodeOffset += n.mCachedSize;
        }
        }
        return nodeOffset;
        return nodeOffset;
@@ -627,11 +635,13 @@ public final class BinaryDictInputOutput {
     */
     */
    private static void computeParentAddresses(final ArrayList<Node> flatNodes) {
    private static void computeParentAddresses(final ArrayList<Node> flatNodes) {
        for (final Node node : flatNodes) {
        for (final Node node : flatNodes) {
            for (CharGroup group : node.mData) {
            for (final CharGroup group : node.mData) {
                if (null != group.mChildren) {
                if (null != group.mChildren) {
                    // assign my address to children's parent address
                    // Assign my address to children's parent address
                    // Here BeforeUpdate and AfterUpdate addresses have the same value, so it
                    // does not matter which we use.
                    group.mChildren.mCachedParentAddress = group.mCachedAddress
                    group.mChildren.mCachedParentAddress = group.mCachedAddress
                            - group.mChildren.mCachedAddress;
                            - group.mChildren.mCachedAddressAfterUpdate;
                }
                }
            }
            }
        }
        }
@@ -654,9 +664,13 @@ public final class BinaryDictInputOutput {
     */
     */
    private static ArrayList<Node> computeAddresses(final FusionDictionary dict,
    private static ArrayList<Node> computeAddresses(final FusionDictionary dict,
            final ArrayList<Node> flatNodes, final FormatOptions formatOptions) {
            final ArrayList<Node> flatNodes, final FormatOptions formatOptions) {
        // First get the worst sizes and offsets
        // First get the worst possible sizes and offsets
        for (Node n : flatNodes) setNodeMaximumSize(n, formatOptions);
        int offset = 0;
        final int offset = stackNodes(flatNodes, formatOptions);
        for (final Node n : flatNodes) {
            n.mCachedAddressAfterUpdate = offset;
            offset += calculateNodeMaximumSize(n, formatOptions);
        }
        offset = stackNodes(flatNodes, formatOptions);


        MakedictLog.i("Compressing the array addresses. Original size : " + offset);
        MakedictLog.i("Compressing the array addresses. Original size : " + offset);
        MakedictLog.i("(Recursively seen size : " + offset + ")");
        MakedictLog.i("(Recursively seen size : " + offset + ")");
@@ -665,11 +679,14 @@ public final class BinaryDictInputOutput {
        boolean changesDone = false;
        boolean changesDone = false;
        do {
        do {
            changesDone = false;
            changesDone = false;
            for (Node n : flatNodes) {
            int nodeStartOffset = 0;
            for (final Node n : flatNodes) {
                n.mCachedAddressAfterUpdate = nodeStartOffset;
                final int oldNodeSize = n.mCachedSize;
                final int oldNodeSize = n.mCachedSize;
                final boolean changed = computeActualNodeSize(n, dict, formatOptions);
                final boolean changed = computeActualNodeSize(n, dict, formatOptions);
                final int newNodeSize = n.mCachedSize;
                final int newNodeSize = n.mCachedSize;
                if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!");
                if (oldNodeSize < newNodeSize) throw new RuntimeException("Increased size ?!");
                nodeStartOffset += newNodeSize;
                changesDone |= changed;
                changesDone |= changed;
            }
            }
            stackNodes(flatNodes, formatOptions);
            stackNodes(flatNodes, formatOptions);
@@ -683,7 +700,7 @@ public final class BinaryDictInputOutput {
        final Node lastNode = flatNodes.get(flatNodes.size() - 1);
        final Node lastNode = flatNodes.get(flatNodes.size() - 1);
        MakedictLog.i("Compression complete in " + passes + " passes.");
        MakedictLog.i("Compression complete in " + passes + " passes.");
        MakedictLog.i("After address compression : "
        MakedictLog.i("After address compression : "
                + (lastNode.mCachedAddress + lastNode.mCachedSize));
                + (lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize));


        return flatNodes;
        return flatNodes;
    }
    }
@@ -701,10 +718,12 @@ public final class BinaryDictInputOutput {
    private static void checkFlatNodeArray(final ArrayList<Node> array) {
    private static void checkFlatNodeArray(final ArrayList<Node> array) {
        int offset = 0;
        int offset = 0;
        int index = 0;
        int index = 0;
        for (Node n : array) {
        for (final Node n : array) {
            if (n.mCachedAddress != offset) {
            // BeforeUpdate and AfterUpdate addresses are the same here, so it does not matter
            // which we use.
            if (n.mCachedAddressAfterUpdate != offset) {
                throw new RuntimeException("Wrong address for node " + index
                throw new RuntimeException("Wrong address for node " + index
                        + " : expected " + offset + ", got " + n.mCachedAddress);
                        + " : expected " + offset + ", got " + n.mCachedAddressAfterUpdate);
            }
            }
            ++index;
            ++index;
            offset += n.mCachedSize;
            offset += n.mCachedSize;
@@ -946,7 +965,7 @@ public final class BinaryDictInputOutput {
    private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
    private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
            final Node node, final FormatOptions formatOptions) {
            final Node node, final FormatOptions formatOptions) {
        // TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup
        // TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup
        int index = node.mCachedAddress;
        int index = node.mCachedAddressAfterUpdate;


        final int groupCount = node.mData.size();
        final int groupCount = node.mData.size();
        final int countSize = getGroupCountSize(node);
        final int countSize = getGroupCountSize(node);
@@ -977,7 +996,7 @@ public final class BinaryDictInputOutput {
            if (group.mFrequency >= 0) groupAddress += FormatSpec.GROUP_FREQUENCY_SIZE;
            if (group.mFrequency >= 0) groupAddress += FormatSpec.GROUP_FREQUENCY_SIZE;
            final int childrenOffset = null == group.mChildren
            final int childrenOffset = null == group.mChildren
                    ? FormatSpec.NO_CHILDREN_ADDRESS
                    ? FormatSpec.NO_CHILDREN_ADDRESS
                            : group.mChildren.mCachedAddress - groupAddress;
                            : group.mChildren.mCachedAddressAfterUpdate - groupAddress;
            byte flags = makeCharGroupFlags(group, groupAddress, childrenOffset, formatOptions);
            byte flags = makeCharGroupFlags(group, groupAddress, childrenOffset, formatOptions);
            buffer[index++] = flags;
            buffer[index++] = flags;


@@ -985,7 +1004,7 @@ public final class BinaryDictInputOutput {
                index = writeParentAddress(buffer, index, parentAddress, formatOptions);
                index = writeParentAddress(buffer, index, parentAddress, formatOptions);
            } else {
            } else {
                index = writeParentAddress(buffer, index,
                index = writeParentAddress(buffer, index,
                        parentAddress + (node.mCachedAddress - group.mCachedAddress),
                        parentAddress + (node.mCachedAddressAfterUpdate - group.mCachedAddress),
                        formatOptions);
                        formatOptions);
            }
            }


@@ -1055,9 +1074,9 @@ public final class BinaryDictInputOutput {
                    = FormatSpec.NO_FORWARD_LINK_ADDRESS;
                    = FormatSpec.NO_FORWARD_LINK_ADDRESS;
            index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
            index += FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
        }
        }
        if (index != node.mCachedAddress + node.mCachedSize) throw new RuntimeException(
        if (index != node.mCachedAddressAfterUpdate + node.mCachedSize) throw new RuntimeException(
                "Not the same size : written "
                "Not the same size : written "
                + (index - node.mCachedAddress) + " bytes out of a node that should have "
                + (index - node.mCachedAddressAfterUpdate) + " bytes from a node that should have "
                + node.mCachedSize + " bytes");
                + node.mCachedSize + " bytes");
        return index;
        return index;
    }
    }
@@ -1077,25 +1096,27 @@ public final class BinaryDictInputOutput {
        int charGroups = 0;
        int charGroups = 0;
        int maxGroups = 0;
        int maxGroups = 0;
        int maxRuns = 0;
        int maxRuns = 0;
        for (Node n : nodes) {
        for (final Node n : nodes) {
            if (maxGroups < n.mData.size()) maxGroups = n.mData.size();
            if (maxGroups < n.mData.size()) maxGroups = n.mData.size();
            for (CharGroup cg : n.mData) {
            for (final CharGroup cg : n.mData) {
                ++charGroups;
                ++charGroups;
                if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length;
                if (cg.mChars.length > maxRuns) maxRuns = cg.mChars.length;
                if (cg.mFrequency >= 0) {
                if (cg.mFrequency >= 0) {
                    if (n.mCachedAddress < firstTerminalAddress)
                    if (n.mCachedAddressAfterUpdate < firstTerminalAddress)
                        firstTerminalAddress = n.mCachedAddress;
                        firstTerminalAddress = n.mCachedAddressAfterUpdate;
                    if (n.mCachedAddress > lastTerminalAddress)
                    if (n.mCachedAddressAfterUpdate > lastTerminalAddress)
                        lastTerminalAddress = n.mCachedAddress;
                        lastTerminalAddress = n.mCachedAddressAfterUpdate;
                }
            }
            }
            if (n.mCachedAddressAfterUpdate + n.mCachedSize > size) {
                size = n.mCachedAddressAfterUpdate + n.mCachedSize;
            }
            }
            if (n.mCachedAddress + n.mCachedSize > size) size = n.mCachedAddress + n.mCachedSize;
        }
        }
        final int[] groupCounts = new int[maxGroups + 1];
        final int[] groupCounts = new int[maxGroups + 1];
        final int[] runCounts = new int[maxRuns + 1];
        final int[] runCounts = new int[maxRuns + 1];
        for (Node n : nodes) {
        for (final Node n : nodes) {
            ++groupCounts[n.mData.size()];
            ++groupCounts[n.mData.size()];
            for (CharGroup cg : n.mData) {
            for (final CharGroup cg : n.mData) {
                ++runCounts[cg.mChars.length];
                ++runCounts[cg.mChars.length];
            }
            }
        }
        }
@@ -1205,7 +1226,7 @@ public final class BinaryDictInputOutput {


        // Create a buffer that matches the final dictionary size.
        // Create a buffer that matches the final dictionary size.
        final Node lastNode = flatNodes.get(flatNodes.size() - 1);
        final Node lastNode = flatNodes.get(flatNodes.size() - 1);
        final int bufferSize = lastNode.mCachedAddress + lastNode.mCachedSize;
        final int bufferSize = lastNode.mCachedAddressAfterUpdate + lastNode.mCachedSize;
        final byte[] buffer = new byte[bufferSize];
        final byte[] buffer = new byte[bufferSize];
        int index = 0;
        int index = 0;


@@ -1584,8 +1605,9 @@ public final class BinaryDictInputOutput {
                buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
                buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);


        final Node node = new Node(nodeContents);
        final Node node = new Node(nodeContents);
        node.mCachedAddress = nodeOrigin;
        node.mCachedAddressBeforeUpdate = nodeOrigin;
        reverseNodeMap.put(node.mCachedAddress, node);
        node.mCachedAddressAfterUpdate = nodeOrigin;
        reverseNodeMap.put(node.mCachedAddressAfterUpdate, node);
        return node;
        return node;
    }
    }


+7 −1
Original line number Original line Diff line number Diff line
@@ -46,7 +46,13 @@ public final class FusionDictionary implements Iterable<Word> {
        ArrayList<CharGroup> mData;
        ArrayList<CharGroup> mData;
        // To help with binary generation
        // To help with binary generation
        int mCachedSize = Integer.MIN_VALUE;
        int mCachedSize = Integer.MIN_VALUE;
        int mCachedAddress = Integer.MIN_VALUE;
        // mCachedAddressBefore/AfterUpdate are helpers for binary dictionary generation. They
        // always hold the same value except between dictionary address compression, during which
        // the update process needs to know about both values at the same time. Updating will
        // update the AfterUpdate value, and the code will move them to BeforeUpdate before
        // the next update pass.
        int mCachedAddressBeforeUpdate = Integer.MIN_VALUE;
        int mCachedAddressAfterUpdate = Integer.MIN_VALUE;
        int mCachedParentAddress = 0;
        int mCachedParentAddress = 0;


        public Node() {
        public Node() {