Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 14087ba5 authored by Yuichiro Hanada's avatar Yuichiro Hanada
Browse files

Add Ver4DictDecoder.

Bug: 9618601
Change-Id: I43c5840505c6a847aaf4893a400392ccd45903c0
parent 926ee64b
Loading
Loading
Loading
Loading
+2 −2
Original line number Original line Diff line number Diff line
@@ -498,7 +498,7 @@ public final class BinaryDictDecoderUtils {


            // reach the end of the array.
            // reach the end of the array.
            if (options.mSupportsDynamicUpdate) {
            if (options.mSupportsDynamicUpdate) {
                final boolean hasValidForwardLink = dictDecoder.readForwardLinkAndAdvancePosition();
                final boolean hasValidForwardLink = dictDecoder.readAndFollowForwardLink();
                if (!hasValidForwardLink) break;
                if (!hasValidForwardLink) break;
            }
            }
        } while (options.mSupportsDynamicUpdate && dictDecoder.hasNextPtNodeArray());
        } while (options.mSupportsDynamicUpdate && dictDecoder.hasNextPtNodeArray());
@@ -550,7 +550,7 @@ public final class BinaryDictDecoderUtils {
     * @return the created (or merged) dictionary.
     * @return the created (or merged) dictionary.
     */
     */
    @UsedForTesting
    @UsedForTesting
    /* package */ static FusionDictionary readDictionaryBinary(final Ver3DictDecoder dictDecoder,
    /* package */ static FusionDictionary readDictionaryBinary(final DictDecoder dictDecoder,
            final FusionDictionary dict) throws IOException, UnsupportedFormatException {
            final FusionDictionary dict) throws IOException, UnsupportedFormatException {
        // Read header
        // Read header
        final FileHeader fileHeader = dictDecoder.readHeader();
        final FileHeader fileHeader = dictDecoder.readHeader();
+2 −2
Original line number Original line Diff line number Diff line
@@ -368,9 +368,9 @@ public class BinaryDictEncoderUtils {
            if (null != ptNode.mBigrams) {
            if (null != ptNode.mBigrams) {
                for (WeightedString bigram : ptNode.mBigrams) {
                for (WeightedString bigram : ptNode.mBigrams) {
                    final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
                    final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
                            nodeSize + size + FormatSpec.PTNODE_FLAGS_SIZE,
                            nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE,
                            FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
                            FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
                    nodeSize += getByteSize(offset) + FormatSpec.PTNODE_FLAGS_SIZE;
                    nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
                }
                }
            }
            }
            ptNode.mCachedSize = nodeSize;
            ptNode.mCachedSize = nodeSize;
+2 −2
Original line number Original line Diff line number Diff line
@@ -114,7 +114,7 @@ public final class BinaryDictIOUtils {
            if (p.mPosition == p.mNumOfPtNode) {
            if (p.mPosition == p.mNumOfPtNode) {
                if (formatOptions.mSupportsDynamicUpdate) {
                if (formatOptions.mSupportsDynamicUpdate) {
                    final boolean hasValidForwardLinkAddress =
                    final boolean hasValidForwardLinkAddress =
                            dictDecoder.readForwardLinkAndAdvancePosition();
                            dictDecoder.readAndFollowForwardLink();
                    if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
                    if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
                        // The node array has a forward link.
                        // The node array has a forward link.
                        p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT;
                        p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT;
@@ -233,7 +233,7 @@ public final class BinaryDictIOUtils {
                }
                }


                final boolean hasValidForwardLinkAddress =
                final boolean hasValidForwardLinkAddress =
                        dictDecoder.readForwardLinkAndAdvancePosition();
                        dictDecoder.readAndFollowForwardLink();
                if (!hasValidForwardLinkAddress || !dictDecoder.hasNextPtNodeArray()) {
                if (!hasValidForwardLinkAddress || !dictDecoder.hasNextPtNodeArray()) {
                    return FormatSpec.NOT_VALID_WORD;
                    return FormatSpec.NOT_VALID_WORD;
                }
                }
+188 −18
Original line number Original line Diff line number Diff line
@@ -17,9 +17,11 @@
package com.android.inputmethod.latin.makedict;
package com.android.inputmethod.latin.makedict;


import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;


import java.io.File;
import java.io.File;
@@ -30,13 +32,50 @@ import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeMap;
import java.util.TreeMap;


/**
/**
 * An interface of binary dictionary decoder.
 * The base class of binary dictionary decoders.
 */
 */
public interface DictDecoder {
public abstract class DictDecoder {
    public FileHeader readHeader() throws IOException, UnsupportedFormatException;

    protected FileHeader readHeader(final DictBuffer dictBuffer)
            throws IOException, UnsupportedFormatException {
        if (dictBuffer == null) {
            openDictBuffer();
        }

        final int version = HeaderReader.readVersion(dictBuffer);
        if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
                || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
            throw new UnsupportedFormatException("Unsupported version : " + version);
        }
        // TODO: Remove this field.
        final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);

        final int headerSize = HeaderReader.readHeaderSize(dictBuffer);

        if (headerSize < 0) {
            throw new UnsupportedFormatException("header size can't be negative.");
        }

        final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
                headerSize);

        final FileHeader header = new FileHeader(headerSize,
                new FusionDictionary.DictionaryOptions(attributes,
                        0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
                        0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
                        new FormatOptions(version,
                                0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
        return header;
    }

    /**
     * Reads and returns the file header.
     */
    public abstract FileHeader readHeader() throws IOException, UnsupportedFormatException;


    /**
    /**
     * Reads PtNode from nodeAddress.
     * Reads PtNode from nodeAddress.
@@ -44,7 +83,7 @@ public interface DictDecoder {
     * @param formatOptions the format options.
     * @param formatOptions the format options.
     * @return PtNodeInfo.
     * @return PtNodeInfo.
     */
     */
    public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
    public abstract PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);


    /**
    /**
     * Reads a buffer and returns the memory representation of the dictionary.
     * Reads a buffer and returns the memory representation of the dictionary.
@@ -59,7 +98,7 @@ public interface DictDecoder {
     * @return the created (or merged) dictionary.
     * @return the created (or merged) dictionary.
     */
     */
    @UsedForTesting
    @UsedForTesting
    public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
    public abstract FusionDictionary readDictionaryBinary(final FusionDictionary dict,
            final boolean deleteDictIfBroken)
            final boolean deleteDictIfBroken)
                    throws FileNotFoundException, IOException, UnsupportedFormatException;
                    throws FileNotFoundException, IOException, UnsupportedFormatException;


@@ -74,7 +113,12 @@ public interface DictDecoder {
     */
     */
    @UsedForTesting
    @UsedForTesting
    public int getTerminalPosition(final String word)
    public int getTerminalPosition(final String word)
            throws IOException, UnsupportedFormatException;
            throws IOException, UnsupportedFormatException {
        if (!isDictBufferOpen()) {
            openDictBuffer();
        }
        return BinaryDictIOUtils.getTerminalPosition(this, word);
    }


    /**
    /**
     * Reads unigrams and bigrams from the binary file.
     * Reads unigrams and bigrams from the binary file.
@@ -86,50 +130,56 @@ public interface DictDecoder {
     * @throws IOException if the file can't be read.
     * @throws IOException if the file can't be read.
     * @throws UnsupportedFormatException if the format of the file is not recognized.
     * @throws UnsupportedFormatException if the format of the file is not recognized.
     */
     */
    @UsedForTesting
    public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
    public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
            final TreeMap<Integer, Integer> frequencies,
            final TreeMap<Integer, Integer> frequencies,
            final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
            final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
            throws IOException, UnsupportedFormatException;
            throws IOException, UnsupportedFormatException {
        if (!isDictBufferOpen()) {
            openDictBuffer();
        }
        BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
    }


    /**
    /**
     * Sets the position of the buffer to the given value.
     * Sets the position of the buffer to the given value.
     *
     *
     * @param newPos the new position
     * @param newPos the new position
     */
     */
    public void setPosition(final int newPos);
    public abstract void setPosition(final int newPos);


    /**
    /**
     * Gets the position of the buffer.
     * Gets the position of the buffer.
     *
     *
     * @return the position
     * @return the position
     */
     */
    public int getPosition();
    public abstract int getPosition();


    /**
    /**
     * Reads and returns the PtNode count out of a buffer and forwards the pointer.
     * Reads and returns the PtNode count out of a buffer and forwards the pointer.
     */
     */
    public int readPtNodeCount();
    public abstract int readPtNodeCount();


    /**
    /**
     * Reads the forward link and advances the position.
     * Reads the forward link and advances the position.
     *
     *
     * @return if this method advances the position then true else false.
     * @return true if this method moves the file pointer, false otherwise.
     */
     */
    public boolean readForwardLinkAndAdvancePosition();
    public abstract boolean readAndFollowForwardLink();
    public boolean hasNextPtNodeArray();
    public abstract boolean hasNextPtNodeArray();


    /**
    /**
     * Opens the dictionary file and makes DictBuffer.
     * Opens the dictionary file and makes DictBuffer.
     */
     */
    @UsedForTesting
    @UsedForTesting
    public void openDictBuffer() throws FileNotFoundException, IOException;
    public abstract void openDictBuffer() throws FileNotFoundException, IOException;
    @UsedForTesting
    @UsedForTesting
    public boolean isOpenedDictBuffer();
    public abstract boolean isDictBufferOpen();


    // Flags for DictionaryBufferFactory.
    // Constants for DictionaryBufferFactory.
    public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
    public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
    public static final int USE_BYTEARRAY = 0x02000000;
    public static final int USE_BYTEARRAY = 0x02000000;
    public static final int USE_WRITABLE_BYTEBUFFER = 0x04000000;
    public static final int USE_WRITABLE_BYTEBUFFER = 0x03000000;
    public static final int MASK_DICTBUFFER = 0x0F000000;
    public static final int MASK_DICTBUFFER = 0x0F000000;


    public interface DictionaryBufferFactory {
    public interface DictionaryBufferFactory {
@@ -221,4 +271,124 @@ public interface DictDecoder {
            return null;
            return null;
        }
        }
    }
    }

    /**
     * A utility class for reading a file header.
     */
    protected static class HeaderReader {
        protected static int readVersion(final DictBuffer dictBuffer)
                throws IOException, UnsupportedFormatException {
            return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
        }

        protected static int readOptionFlags(final DictBuffer dictBuffer) {
            return dictBuffer.readUnsignedShort();
        }

        protected static int readHeaderSize(final DictBuffer dictBuffer) {
            return dictBuffer.readInt();
        }

        protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
                final int headerSize) {
            final HashMap<String, String> attributes = new HashMap<String, String>();
            while (dictBuffer.position() < headerSize) {
                // We can avoid an infinite loop here since dictBuffer.position() is always
                // increased by calling CharEncoding.readString.
                final String key = CharEncoding.readString(dictBuffer);
                final String value = CharEncoding.readString(dictBuffer);
                attributes.put(key, value);
            }
            dictBuffer.position(headerSize);
            return attributes;
        }
    }

    /**
     * A utility class for reading a PtNode.
     */
    protected static class PtNodeReader {
        protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
            return dictBuffer.readUnsignedByte();
        }

        protected static int readParentAddress(final DictBuffer dictBuffer,
                final FormatOptions formatOptions) {
            if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
                return BinaryDictDecoderUtils.readSInt24(dictBuffer);
            } else {
                return FormatSpec.NO_PARENT_ADDRESS;
            }
        }

        protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags,
                final FormatOptions formatOptions) {
            if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
                final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer);
                if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
                return address;
            } else {
                switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
                        return dictBuffer.readUnsignedByte();
                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
                        return dictBuffer.readUnsignedShort();
                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
                        return dictBuffer.readUnsignedInt24();
                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
                    default:
                        return FormatSpec.NO_CHILDREN_ADDRESS;
                }
            }
        }

        // Reads shortcuts and returns the read length.
        protected static int readShortcut(final DictBuffer dictBuffer,
                final ArrayList<WeightedString> shortcutTargets) {
            final int pointerBefore = dictBuffer.position();
            dictBuffer.readUnsignedShort(); // skip the size
            while (true) {
                final int targetFlags = dictBuffer.readUnsignedByte();
                final String word = CharEncoding.readString(dictBuffer);
                shortcutTargets.add(new WeightedString(word,
                        targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
                if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
            }
            return dictBuffer.position() - pointerBefore;
        }

        protected static int readBigramAddresses(final DictBuffer dictBuffer,
                final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
            int readLength = 0;
            int bigramCount = 0;
            while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
                final int bigramFlags = dictBuffer.readUnsignedByte();
                ++readLength;
                final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
                        ? 1 : -1;
                int bigramAddress = baseAddress + readLength;
                switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
                    case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
                        bigramAddress += sign * dictBuffer.readUnsignedByte();
                        readLength += 1;
                        break;
                    case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
                        bigramAddress += sign * dictBuffer.readUnsignedShort();
                        readLength += 2;
                        break;
                    case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
                        bigramAddress += sign * dictBuffer.readUnsignedInt24();
                        readLength += 3;
                        break;
                    default:
                        throw new RuntimeException("Has bigrams with no address");
                }
                bigrams.add(new PendingAttribute(
                        bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
                        bigramAddress));
                if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
            }
            return readLength;
        }
    }
}
}
+13 −5
Original line number Original line Diff line number Diff line
@@ -360,19 +360,27 @@ public final class FormatSpec {
     * Returns new dictionary decoder.
     * Returns new dictionary decoder.
     *
     *
     * @param dictFile the dictionary file.
     * @param dictFile the dictionary file.
     * @param bufferType the flag indicating buffer type which is used by the dictionary decoder.
     * @param bufferType The type of buffer, as one of USE_* in DictDecoder.
     * @return new dictionary decoder if the dictionary file exists, otherwise null.
     * @return new dictionary decoder if the dictionary file exists, otherwise null.
     */
     */
    public static DictDecoder getDictDecoder(final File dictFile, final int bufferType) {
    public static DictDecoder getDictDecoder(final File dictFile, final int bufferType) {
        if (!dictFile.isFile()) return null;
        if (dictFile.isDirectory()) {
            return new Ver4DictDecoder(dictFile, bufferType);
        } else if (dictFile.isFile()) {
            return new Ver3DictDecoder(dictFile, bufferType);
            return new Ver3DictDecoder(dictFile, bufferType);
        }
        }
        return null;
    }


    public static DictDecoder getDictDecoder(final File dictFile,
    public static DictDecoder getDictDecoder(final File dictFile,
            final DictionaryBufferFactory factory) {
            final DictionaryBufferFactory factory) {
        if (!dictFile.isFile()) return null;
        if (dictFile.isDirectory()) {
            return new Ver4DictDecoder(dictFile, factory);
        } else if (dictFile.isFile()) {
            return new Ver3DictDecoder(dictFile, factory);
            return new Ver3DictDecoder(dictFile, factory);
        }
        }
        return null;
    }


    public static DictDecoder getDictDecoder(final File dictFile) {
    public static DictDecoder getDictDecoder(final File dictFile) {
        return getDictDecoder(dictFile, DictDecoder.USE_READONLY_BYTEBUFFER);
        return getDictDecoder(dictFile, DictDecoder.USE_READONLY_BYTEBUFFER);
Loading