Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8ec0064c authored by Yuichiro Hanada's avatar Yuichiro Hanada
Browse files

Make children addresses and parent addresses use signed addresses.

Signed addresses are used only in version 3 with dynamic update.

bug: 6669677

Change-Id: Iadaeab199b5019d2330b4573c24da74d64f0945e
parent 0d4d1096
Loading
Loading
Loading
Loading
+153 −59
Original line number Original line Diff line number Diff line
@@ -36,7 +36,6 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Iterator;
import java.util.Map;
import java.util.Map;
import java.util.Stack;
import java.util.TreeMap;
import java.util.TreeMap;


/**
/**
@@ -412,6 +411,10 @@ public class BinaryDictInputOutput {
        }
        }
    }
    }


    private static final int UINT8_MAX = 0xFF;
    private static final int UINT16_MAX = 0xFFFF;
    private static final int UINT24_MAX = 0xFFFFFF;

    /**
    /**
     * Compute the size, in bytes, that an address will occupy.
     * Compute the size, in bytes, that an address will occupy.
     *
     *
@@ -423,17 +426,25 @@ public class BinaryDictInputOutput {
     * @return the byte size.
     * @return the byte size.
     */
     */
    private static int getByteSize(final int address) {
    private static int getByteSize(final int address) {
        assert(address < 0x1000000);
        assert(address <= UINT24_MAX);
        if (!hasChildrenAddress(address)) {
        if (!hasChildrenAddress(address)) {
            return 0;
            return 0;
        } else if (Math.abs(address) < 0x100) {
        } else if (Math.abs(address) <= UINT8_MAX) {
            return 1;
            return 1;
        } else if (Math.abs(address) < 0x10000) {
        } else if (Math.abs(address) <= UINT16_MAX) {
            return 2;
            return 2;
        } else {
        } else {
            return 3;
            return 3;
        }
        }
    }
    }

    private static final int SINT8_MAX = 0x7F;
    private static final int SINT16_MAX = 0x7FFF;
    private static final int SINT24_MAX = 0x7FFFFF;
    private static final int MSB8 = 0x80;
    private static final int MSB16 = 0x8000;
    private static final int MSB24 = 0x800000;

    // End utility methods.
    // End utility methods.


    // This method is responsible for finding a nice ordering of the nodes that favors run-time
    // This method is responsible for finding a nice ordering of the nodes that favors run-time
@@ -509,14 +520,20 @@ public class BinaryDictInputOutput {
            }
            }
            int groupSize = getGroupHeaderSize(group, formatOptions);
            int groupSize = getGroupHeaderSize(group, formatOptions);
            if (group.isTerminal()) groupSize += FormatSpec.GROUP_FREQUENCY_SIZE;
            if (group.isTerminal()) groupSize += FormatSpec.GROUP_FREQUENCY_SIZE;
            if (null != group.mChildren) {
            if (null == group.mChildren && formatOptions.mSupportsDynamicUpdate) {
                groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
            } else if (null != group.mChildren) {
                final int offsetBasePoint = groupSize + node.mCachedAddress + size;
                final int offsetBasePoint = groupSize + node.mCachedAddress + size;
                final int offset = group.mChildren.mCachedAddress - offsetBasePoint;
                final int offset = group.mChildren.mCachedAddress - offsetBasePoint;
                // assign my address to children's parent address
                // assign my address to children's parent address
                group.mChildren.mCachedParentAddress = group.mCachedAddress
                group.mChildren.mCachedParentAddress = group.mCachedAddress
                        - group.mChildren.mCachedAddress;
                        - group.mChildren.mCachedAddress;
                if (formatOptions.mSupportsDynamicUpdate) {
                    groupSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
                } else {
                    groupSize += getByteSize(offset);
                    groupSize += getByteSize(offset);
                }
                }
            }
            groupSize += getShortcutListSize(group.mShortcutTargets);
            groupSize += getShortcutListSize(group.mShortcutTargets);
            if (null != group.mBigrams) {
            if (null != group.mBigrams) {
                for (WeightedString bigram : group.mBigrams) {
                for (WeightedString bigram : group.mBigrams) {
@@ -669,15 +686,38 @@ public class BinaryDictInputOutput {
        }
        }
    }
    }


    /**
     * Helper method to write a variable-size signed address to a file.
     *
     * @param buffer the buffer to write to.
     * @param index the index in the buffer to write the address to.
     * @param address the address to write.
     * @return the size in bytes the address actually took.
     */
    private static int writeVariableSignedAddress(final byte[] buffer, int index,
            final int address) {
        if (!hasChildrenAddress(address)) {
            buffer[index] = buffer[index + 1] = buffer[index + 2] = 0;
        } else {
            final int absAddress = Math.abs(address);
            buffer[index++] = (byte)((address < 0 ? MSB8 : 0) | (0xFF & (absAddress >> 16)));
            buffer[index++] = (byte)(0xFF & (absAddress >> 8));
            buffer[index++] = (byte)(0xFF & absAddress);
        }
        return 3;
    }

    private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
    private static byte makeCharGroupFlags(final CharGroup group, final int groupAddress,
            final int childrenOffset) {
            final int childrenOffset, final FormatOptions formatOptions) {
        byte flags = 0;
        byte flags = 0;
        if (group.mChars.length > 1) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
        if (group.mChars.length > 1) flags |= FormatSpec.FLAG_HAS_MULTIPLE_CHARS;
        if (group.mFrequency >= 0) {
        if (group.mFrequency >= 0) {
            flags |= FormatSpec.FLAG_IS_TERMINAL;
            flags |= FormatSpec.FLAG_IS_TERMINAL;
        }
        }
        if (null != group.mChildren) {
        if (null != group.mChildren) {
            switch (getByteSize(childrenOffset)) {
            final int byteSize = formatOptions.mSupportsDynamicUpdate
                    ? FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE : getByteSize(childrenOffset);
            switch (byteSize) {
            case 1:
            case 1:
                flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
                flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
                break;
                break;
@@ -690,6 +730,8 @@ public class BinaryDictInputOutput {
            default:
            default:
                throw new RuntimeException("Node with a strange address");
                throw new RuntimeException("Node with a strange address");
            }
            }
        } else if (formatOptions.mSupportsDynamicUpdate) {
            flags |= FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
        }
        }
        if (null != group.mShortcutTargets) {
        if (null != group.mShortcutTargets) {
            if (DBG && 0 == group.mShortcutTargets.size()) {
            if (DBG && 0 == group.mShortcutTargets.size()) {
@@ -808,6 +850,25 @@ public class BinaryDictInputOutput {
                + (frequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY);
                + (frequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY);
    }
    }


    private static final int writeParentAddress(final byte[] buffer, final int index,
            final int address, final FormatOptions formatOptions) {
        if (supportsDynamicUpdate(formatOptions)) {
            if (address == FormatSpec.NO_PARENT_ADDRESS) {
                buffer[index] = buffer[index + 1] = buffer[index + 2] = 0;
            } else {
                final int absAddress = Math.abs(address);
                assert(absAddress <= SINT24_MAX);
                buffer[index] = (byte)((address < 0 ? MSB8 : 0)
                        | ((absAddress >> 16) & 0xFF));
                buffer[index + 1] = (byte)((absAddress >> 8) & 0xFF);
                buffer[index + 2] = (byte)(absAddress & 0xFF);
            }
            return index + 3;
        } else {
            return index;
        }
    }

    /**
    /**
     * Write a node to memory. The node is expected to have its final position cached.
     * Write a node to memory. The node is expected to have its final position cached.
     *
     *
@@ -854,22 +915,15 @@ public class BinaryDictInputOutput {
            final int childrenOffset = null == group.mChildren
            final int childrenOffset = null == group.mChildren
                    ? FormatSpec.NO_CHILDREN_ADDRESS
                    ? FormatSpec.NO_CHILDREN_ADDRESS
                            : group.mChildren.mCachedAddress - groupAddress;
                            : group.mChildren.mCachedAddress - groupAddress;
            byte flags = makeCharGroupFlags(group, groupAddress, childrenOffset);
            byte flags = makeCharGroupFlags(group, groupAddress, childrenOffset, formatOptions);
            buffer[index++] = flags;
            buffer[index++] = flags;


            if (supportsDynamicUpdate(formatOptions)) {
            if (parentAddress == FormatSpec.NO_PARENT_ADDRESS) {
            if (parentAddress == FormatSpec.NO_PARENT_ADDRESS) {
                    // this node is the root node.
                index = writeParentAddress(buffer, index, parentAddress, formatOptions);
                    buffer[index] = buffer[index + 1] = buffer[index + 2] = 0;
            } else {
            } else {
                    // write parent address. (version 3)
                index = writeParentAddress(buffer, index,
                    final int actualParentAddress = Math.abs(parentAddress
                        parentAddress + (node.mCachedAddress - group.mCachedAddress),
                            + (node.mCachedAddress - group.mCachedAddress));
                        formatOptions);
                    buffer[index] = (byte)((actualParentAddress >> 16) & 0xFF);
                    buffer[index + 1] = (byte)((actualParentAddress >> 8) & 0xFF);
                    buffer[index + 2] = (byte)(actualParentAddress & 0xFF);
                }
                index += 3;
            }
            }


            index = CharEncoding.writeCharArray(group.mChars, buffer, index);
            index = CharEncoding.writeCharArray(group.mChars, buffer, index);
@@ -879,7 +933,13 @@ public class BinaryDictInputOutput {
            if (group.mFrequency >= 0) {
            if (group.mFrequency >= 0) {
                buffer[index++] = (byte) group.mFrequency;
                buffer[index++] = (byte) group.mFrequency;
            }
            }
            final int shift = writeVariableAddress(buffer, index, childrenOffset);

            final int shift;
            if (formatOptions.mSupportsDynamicUpdate) {
                shift = writeVariableSignedAddress(buffer, index, childrenOffset);
            } else {
                shift = writeVariableAddress(buffer, index, childrenOffset);
            }
            index += shift;
            index += shift;
            groupAddress += shift;
            groupAddress += shift;


@@ -1104,6 +1164,58 @@ public class BinaryDictInputOutput {
    // Input methods: Read a binary dictionary to memory.
    // Input methods: Read a binary dictionary to memory.
    // readDictionaryBinary is the public entry point for them.
    // readDictionaryBinary is the public entry point for them.


    private static int getChildrenAddressSize(final int optionFlags,
            final FormatOptions formatOptions) {
        if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
        switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
            case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
                return 1;
            case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
                return 2;
            case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
                return 3;
            case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
            default:
                return 0;
        }
    }

    private static int readChildrenAddress(final FusionDictionaryBufferInterface buffer,
            final int optionFlags, final FormatOptions options) {
        if (options.mSupportsDynamicUpdate) {
            final int address = buffer.readUnsignedInt24();
            if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
            if ((address & MSB24) != 0) {
                return -(address & SINT24_MAX);
            } else {
                return address;
            }
        }
        int address;
        switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
            case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
                return buffer.readUnsignedByte();
            case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
                return buffer.readUnsignedShort();
            case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
                return buffer.readUnsignedInt24();
            case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
            default:
                return FormatSpec.NO_CHILDREN_ADDRESS;
        }
    }

    private static int readParentAddress(final FusionDictionaryBufferInterface buffer,
            final FormatOptions formatOptions) {
        if (supportsDynamicUpdate(formatOptions)) {
            final int parentAddress = buffer.readUnsignedInt24();
            final int sign = ((parentAddress & MSB24) != 0) ? -1 : 1;
            return sign * (parentAddress & SINT24_MAX);
        } else {
            return FormatSpec.NO_PARENT_ADDRESS;
        }
    }

    private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
    private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
    public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer,
    public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer,
            final int originalGroupAddress, final FormatOptions options) {
            final int originalGroupAddress, final FormatOptions options) {
@@ -1111,13 +1223,9 @@ public class BinaryDictInputOutput {
        final int flags = buffer.readUnsignedByte();
        final int flags = buffer.readUnsignedByte();
        ++addressPointer;
        ++addressPointer;


        final int parentAddress;
        final int parentAddress = readParentAddress(buffer, options);
        if (supportsDynamicUpdate(options)) {
        if (supportsDynamicUpdate(options)) {
            // read the parent address. (version 3)
            parentAddress = -buffer.readUnsignedInt24();
            addressPointer += 3;
            addressPointer += 3;
        } else {
            parentAddress = FormatSpec.NO_PARENT_ADDRESS;
        }
        }


        final int characters[];
        final int characters[];
@@ -1146,25 +1254,11 @@ public class BinaryDictInputOutput {
        } else {
        } else {
            frequency = CharGroup.NOT_A_TERMINAL;
            frequency = CharGroup.NOT_A_TERMINAL;
        }
        }
        int childrenAddress = addressPointer;
        int childrenAddress = readChildrenAddress(buffer, flags, options);
        switch (flags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
        if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
        case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
            childrenAddress += addressPointer;
            childrenAddress += buffer.readUnsignedByte();
            addressPointer += 1;
            break;
        case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
            childrenAddress += buffer.readUnsignedShort();
            addressPointer += 2;
            break;
        case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
            childrenAddress += buffer.readUnsignedInt24();
            addressPointer += 3;
            break;
        case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
        default:
            childrenAddress = FormatSpec.NO_CHILDREN_ADDRESS;
            break;
        }
        }
        addressPointer += getChildrenAddressSize(flags, options);
        ArrayList<WeightedString> shortcutTargets = null;
        ArrayList<WeightedString> shortcutTargets = null;
        if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
        if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
            final int pointerBefore = buffer.position();
            final int pointerBefore = buffer.position();
@@ -1250,6 +1344,7 @@ public class BinaryDictInputOutput {


        final String result;
        final String result;
        final int originalPointer = buffer.position();
        final int originalPointer = buffer.position();
        buffer.position(address);


        if (supportsDynamicUpdate(formatOptions)) {
        if (supportsDynamicUpdate(formatOptions)) {
            result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions);
            result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions);
@@ -1279,7 +1374,6 @@ public class BinaryDictInputOutput {
                sGetWordBuffer[index--] =
                sGetWordBuffer[index--] =
                        currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1];
                        currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1];
            }
            }

            if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
            if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
            currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
            currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
        }
        }
+13 −8
Original line number Original line Diff line number Diff line
@@ -42,11 +42,13 @@ public final class FormatSpec {
     * ps
     * ps
     *
     *
     * f |
     * f |
     * o | IF HAS_LINKEDLIST_NODE (defined in the file header)
     * o | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header)
     * r |     forward link address, 3byte
     * r |     forward link address, 3byte
     * w | the address must be positive.
     * w | 1 byte = bbbbbbbb match
     * a |
     * a |   case 1xxxxxxx => -((xxxxxxx << 16) + (next byte << 8) + next byte)
     * rdlinkaddress
     * r |   otherwise => (xxxxxxx << 16) + (next byte << 8) + next byte
     * d |
     * linkaddress
     */
     */


    /* Node(CharGroup) layout is as follows:
    /* Node(CharGroup) layout is as follows:
@@ -63,11 +65,13 @@ public final class FormatSpec {
     *   | is blacklisted ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_BLACKLISTED
     *   | is blacklisted ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_BLACKLISTED
     *
     *
     * p |
     * p |
     * a | IF HAS_PARENT_ADDRESS (defined in the file header)
     * a | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header)
     * r |     parent address, 3byte
     * r |     parent address, 3byte
     * e | the address must be negative, so the absolute value of the address is stored.
     * e | 1 byte = bbbbbbbb match
     * n |
     * n |   case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte)
     * taddress
     * t |   otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte
     * a |
     * ddress
     *
     *
     * c | IF FLAG_HAS_MULTIPLE_CHARS
     * c | IF FLAG_HAS_MULTIPLE_CHARS
     * h |   char, char, char, char    n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
     * h |   char, char, char, char    n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@@ -206,6 +210,7 @@ public final class FormatSpec {


    // This option needs to be the same numeric value as the one in binary_format.h.
    // This option needs to be the same numeric value as the one in binary_format.h.
    static final int NOT_VALID_WORD = -99;
    static final int NOT_VALID_WORD = -99;
    static final int SIGNED_CHILDREN_ADDRESS_SIZE = 3;


    /**
    /**
     * Options about file format.
     * Options about file format.