Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7ba78687 authored by Jean Chalard's avatar Jean Chalard Committed by Android (Google) Code Review
Browse files

Merge "Add Ver4DictDecoder."

parents 11ce0545 14087ba5
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -498,7 +498,7 @@ public final class BinaryDictDecoderUtils {

            // reach the end of the array.
            if (options.mSupportsDynamicUpdate) {
                final boolean hasValidForwardLink = dictDecoder.readForwardLinkAndAdvancePosition();
                final boolean hasValidForwardLink = dictDecoder.readAndFollowForwardLink();
                if (!hasValidForwardLink) break;
            }
        } while (options.mSupportsDynamicUpdate && dictDecoder.hasNextPtNodeArray());
@@ -550,7 +550,7 @@ public final class BinaryDictDecoderUtils {
     * @return the created (or merged) dictionary.
     */
    @UsedForTesting
    /* package */ static FusionDictionary readDictionaryBinary(final Ver3DictDecoder dictDecoder,
    /* package */ static FusionDictionary readDictionaryBinary(final DictDecoder dictDecoder,
            final FusionDictionary dict) throws IOException, UnsupportedFormatException {
        // Read header
        final FileHeader fileHeader = dictDecoder.readHeader();
+2 −2
Original line number Diff line number Diff line
@@ -368,9 +368,9 @@ public class BinaryDictEncoderUtils {
            if (null != ptNode.mBigrams) {
                for (WeightedString bigram : ptNode.mBigrams) {
                    final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
                            nodeSize + size + FormatSpec.PTNODE_FLAGS_SIZE,
                            nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE,
                            FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
                    nodeSize += getByteSize(offset) + FormatSpec.PTNODE_FLAGS_SIZE;
                    nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
                }
            }
            ptNode.mCachedSize = nodeSize;
+2 −2
Original line number Diff line number Diff line
@@ -114,7 +114,7 @@ public final class BinaryDictIOUtils {
            if (p.mPosition == p.mNumOfPtNode) {
                if (formatOptions.mSupportsDynamicUpdate) {
                    final boolean hasValidForwardLinkAddress =
                            dictDecoder.readForwardLinkAndAdvancePosition();
                            dictDecoder.readAndFollowForwardLink();
                    if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
                        // The node array has a forward link.
                        p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT;
@@ -233,7 +233,7 @@ public final class BinaryDictIOUtils {
                }

                final boolean hasValidForwardLinkAddress =
                        dictDecoder.readForwardLinkAndAdvancePosition();
                        dictDecoder.readAndFollowForwardLink();
                if (!hasValidForwardLinkAddress || !dictDecoder.hasNextPtNodeArray()) {
                    return FormatSpec.NOT_VALID_WORD;
                }
+188 −18
Original line number Diff line number Diff line
@@ -17,9 +17,11 @@
package com.android.inputmethod.latin.makedict;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;

import java.io.File;
@@ -30,13 +32,50 @@ import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeMap;

/**
 * An interface of binary dictionary decoder.
 * The base class of binary dictionary decoders.
 */
public interface DictDecoder {
    public FileHeader readHeader() throws IOException, UnsupportedFormatException;
public abstract class DictDecoder {

    protected FileHeader readHeader(final DictBuffer dictBuffer)
            throws IOException, UnsupportedFormatException {
        if (dictBuffer == null) {
            openDictBuffer();
        }

        final int version = HeaderReader.readVersion(dictBuffer);
        if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
                || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
            throw new UnsupportedFormatException("Unsupported version : " + version);
        }
        // TODO: Remove this field.
        final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);

        final int headerSize = HeaderReader.readHeaderSize(dictBuffer);

        if (headerSize < 0) {
            throw new UnsupportedFormatException("header size can't be negative.");
        }

        final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
                headerSize);

        final FileHeader header = new FileHeader(headerSize,
                new FusionDictionary.DictionaryOptions(attributes,
                        0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
                        0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
                        new FormatOptions(version,
                                0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
        return header;
    }

    /**
     * Reads and returns the file header.
     */
    public abstract FileHeader readHeader() throws IOException, UnsupportedFormatException;

    /**
     * Reads PtNode from nodeAddress.
@@ -44,7 +83,7 @@ public interface DictDecoder {
     * @param formatOptions the format options.
     * @return PtNodeInfo.
     */
    public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);
    public abstract PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions);

    /**
     * Reads a buffer and returns the memory representation of the dictionary.
@@ -59,7 +98,7 @@ public interface DictDecoder {
     * @return the created (or merged) dictionary.
     */
    @UsedForTesting
    public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
    public abstract FusionDictionary readDictionaryBinary(final FusionDictionary dict,
            final boolean deleteDictIfBroken)
                    throws FileNotFoundException, IOException, UnsupportedFormatException;

@@ -74,7 +113,12 @@ public interface DictDecoder {
     */
    @UsedForTesting
    public int getTerminalPosition(final String word)
            throws IOException, UnsupportedFormatException;
            throws IOException, UnsupportedFormatException {
        if (!isDictBufferOpen()) {
            openDictBuffer();
        }
        return BinaryDictIOUtils.getTerminalPosition(this, word);
    }

    /**
     * Reads unigrams and bigrams from the binary file.
@@ -86,50 +130,56 @@ public interface DictDecoder {
     * @throws IOException if the file can't be read.
     * @throws UnsupportedFormatException if the format of the file is not recognized.
     */
    @UsedForTesting
    public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
            final TreeMap<Integer, Integer> frequencies,
            final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
            throws IOException, UnsupportedFormatException;
            throws IOException, UnsupportedFormatException {
        if (!isDictBufferOpen()) {
            openDictBuffer();
        }
        BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
    }

    /**
     * Sets the position of the buffer to the given value.
     *
     * @param newPos the new position
     */
    public void setPosition(final int newPos);
    public abstract void setPosition(final int newPos);

    /**
     * Gets the position of the buffer.
     *
     * @return the position
     */
    public int getPosition();
    public abstract int getPosition();

    /**
     * Reads and returns the PtNode count out of a buffer and forwards the pointer.
     */
    public int readPtNodeCount();
    public abstract int readPtNodeCount();

    /**
     * Reads the forward link and advances the position.
     *
     * @return if this method advances the position then true else false.
     * @return true if this method moves the file pointer, false otherwise.
     */
    public boolean readForwardLinkAndAdvancePosition();
    public boolean hasNextPtNodeArray();
    public abstract boolean readAndFollowForwardLink();
    public abstract boolean hasNextPtNodeArray();

    /**
     * Opens the dictionary file and makes DictBuffer.
     */
    @UsedForTesting
    public void openDictBuffer() throws FileNotFoundException, IOException;
    public abstract void openDictBuffer() throws FileNotFoundException, IOException;
    @UsedForTesting
    public boolean isOpenedDictBuffer();
    public abstract boolean isDictBufferOpen();

    // Flags for DictionaryBufferFactory.
    // Constants for DictionaryBufferFactory.
    public static final int USE_READONLY_BYTEBUFFER = 0x01000000;
    public static final int USE_BYTEARRAY = 0x02000000;
    public static final int USE_WRITABLE_BYTEBUFFER = 0x04000000;
    public static final int USE_WRITABLE_BYTEBUFFER = 0x03000000;
    public static final int MASK_DICTBUFFER = 0x0F000000;

    public interface DictionaryBufferFactory {
@@ -221,4 +271,124 @@ public interface DictDecoder {
            return null;
        }
    }

    /**
     * A utility class for reading a file header.
     */
    protected static class HeaderReader {
        protected static int readVersion(final DictBuffer dictBuffer)
                throws IOException, UnsupportedFormatException {
            return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
        }

        protected static int readOptionFlags(final DictBuffer dictBuffer) {
            return dictBuffer.readUnsignedShort();
        }

        protected static int readHeaderSize(final DictBuffer dictBuffer) {
            return dictBuffer.readInt();
        }

        protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
                final int headerSize) {
            final HashMap<String, String> attributes = new HashMap<String, String>();
            while (dictBuffer.position() < headerSize) {
                // We can avoid an infinite loop here since dictBuffer.position() is always
                // increased by calling CharEncoding.readString.
                final String key = CharEncoding.readString(dictBuffer);
                final String value = CharEncoding.readString(dictBuffer);
                attributes.put(key, value);
            }
            dictBuffer.position(headerSize);
            return attributes;
        }
    }

    /**
     * A utility class for reading a PtNode.
     */
    protected static class PtNodeReader {
        protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
            return dictBuffer.readUnsignedByte();
        }

        protected static int readParentAddress(final DictBuffer dictBuffer,
                final FormatOptions formatOptions) {
            if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
                return BinaryDictDecoderUtils.readSInt24(dictBuffer);
            } else {
                return FormatSpec.NO_PARENT_ADDRESS;
            }
        }

        protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags,
                final FormatOptions formatOptions) {
            if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
                final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer);
                if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
                return address;
            } else {
                switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
                        return dictBuffer.readUnsignedByte();
                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
                        return dictBuffer.readUnsignedShort();
                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
                        return dictBuffer.readUnsignedInt24();
                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
                    default:
                        return FormatSpec.NO_CHILDREN_ADDRESS;
                }
            }
        }

        // Reads shortcuts and returns the read length.
        protected static int readShortcut(final DictBuffer dictBuffer,
                final ArrayList<WeightedString> shortcutTargets) {
            final int pointerBefore = dictBuffer.position();
            dictBuffer.readUnsignedShort(); // skip the size
            while (true) {
                final int targetFlags = dictBuffer.readUnsignedByte();
                final String word = CharEncoding.readString(dictBuffer);
                shortcutTargets.add(new WeightedString(word,
                        targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
                if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
            }
            return dictBuffer.position() - pointerBefore;
        }

        protected static int readBigramAddresses(final DictBuffer dictBuffer,
                final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
            int readLength = 0;
            int bigramCount = 0;
            while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
                final int bigramFlags = dictBuffer.readUnsignedByte();
                ++readLength;
                final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
                        ? 1 : -1;
                int bigramAddress = baseAddress + readLength;
                switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
                    case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
                        bigramAddress += sign * dictBuffer.readUnsignedByte();
                        readLength += 1;
                        break;
                    case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
                        bigramAddress += sign * dictBuffer.readUnsignedShort();
                        readLength += 2;
                        break;
                    case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
                        bigramAddress += sign * dictBuffer.readUnsignedInt24();
                        readLength += 3;
                        break;
                    default:
                        throw new RuntimeException("Has bigrams with no address");
                }
                bigrams.add(new PendingAttribute(
                        bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
                        bigramAddress));
                if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
            }
            return readLength;
        }
    }
}
+13 −5
Original line number Diff line number Diff line
@@ -360,19 +360,27 @@ public final class FormatSpec {
     * Returns new dictionary decoder.
     *
     * @param dictFile the dictionary file.
     * @param bufferType the flag indicating buffer type which is used by the dictionary decoder.
     * @param bufferType The type of buffer, as one of USE_* in DictDecoder.
     * @return new dictionary decoder if the dictionary file exists, otherwise null.
     */
    public static DictDecoder getDictDecoder(final File dictFile, final int bufferType) {
        if (!dictFile.isFile()) return null;
        if (dictFile.isDirectory()) {
            return new Ver4DictDecoder(dictFile, bufferType);
        } else if (dictFile.isFile()) {
            return new Ver3DictDecoder(dictFile, bufferType);
        }
        return null;
    }

    public static DictDecoder getDictDecoder(final File dictFile,
            final DictionaryBufferFactory factory) {
        if (!dictFile.isFile()) return null;
        if (dictFile.isDirectory()) {
            return new Ver4DictDecoder(dictFile, factory);
        } else if (dictFile.isFile()) {
            return new Ver3DictDecoder(dictFile, factory);
        }
        return null;
    }

    public static DictDecoder getDictDecoder(final File dictFile) {
        return getDictDecoder(dictFile, DictDecoder.USE_READONLY_BYTEBUFFER);
Loading