Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d794b42f authored by Yuichiro Hanada's avatar Yuichiro Hanada
Browse files

Add HeaderReaderInterface.

Change-Id: I298f86b70d18cd08b240509b6f757c72e1a59ffe
parent 606a056b
Loading
Loading
Loading
Loading
+13 −15
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
import com.android.inputmethod.latin.utils.JniUtils;

import java.io.ByteArrayOutputStream;
@@ -250,7 +251,7 @@ public final class BinaryDictDecoder {
        /**
         * Reads a string from a buffer. This is the converse of the above method.
         */
        private static String readString(final FusionDictionaryBufferInterface buffer) {
        static String readString(final FusionDictionaryBufferInterface buffer) {
            final StringBuilder s = new StringBuilder();
            int character = readChar(buffer);
            while (character != FormatSpec.INVALID_CHARACTER) {
@@ -629,7 +630,7 @@ public final class BinaryDictDecoder {
     * @throws UnsupportedFormatException
     * @throws IOException
     */
    private static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
    static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
            throws IOException, UnsupportedFormatException {
        final int version = getFormatVersion(buffer);
        if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
@@ -643,25 +644,22 @@ public final class BinaryDictDecoder {

    /**
     * Reads a header from a buffer.
     * @param buffer the buffer to read.
     * @param headerReader the header reader
     * @throws IOException
     * @throws UnsupportedFormatException
     */
    public static FileHeader readHeader(final FusionDictionaryBufferInterface buffer)
    public static FileHeader readHeader(final HeaderReaderInterface headerReader)
            throws IOException, UnsupportedFormatException {
        final int version = checkFormatVersion(buffer);
        final int optionsFlags = buffer.readUnsignedShort();
        final int version = headerReader.readVersion();
        final int optionsFlags = headerReader.readOptionFlags();

        final HashMap<String, String> attributes = new HashMap<String, String>();
        final int headerSize;
        headerSize = buffer.readInt();
        final int headerSize = headerReader.readHeaderSize();

        if (headerSize < 0) {
            throw new UnsupportedFormatException("header size can't be negative.");
        }

        populateOptions(buffer, headerSize, attributes);
        buffer.position(headerSize);
        final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);

        final FileHeader header = new FileHeader(headerSize,
                new FusionDictionary.DictionaryOptions(attributes,
@@ -711,14 +709,14 @@ public final class BinaryDictDecoder {
        }

        // Read header
        final FileHeader header = readHeader(reader.getBuffer());
        final FileHeader fileHeader = readHeader(reader);

        Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
        Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
        final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize,
                reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions);
        final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize,
                reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);

        FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
        FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
        if (null != dict) {
            for (final Word w : dict) {
                if (w.mIsBlacklistEntry) {
+29 −24
Original line number Diff line number Diff line
@@ -24,13 +24,13 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayWrapper;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
@@ -141,20 +141,20 @@ public final class BinaryDictIOUtils {
     * Reads unigrams and bigrams from the binary file.
     * Doesn't store a full memory representation of the dictionary.
     *
     * @param reader the reader.
     * @param dictReader the dict reader.
     * @param words the map to store the address as a key and the word as a value.
     * @param frequencies the map to store the address as a key and the frequency as a value.
     * @param bigrams the map to store the address as a key and the list of address as a value.
     * @throws IOException if the file can't be read.
     * @throws UnsupportedFormatException if the format of the file is not recognized.
     */
    public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader,
    public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader,
            final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
            final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
            UnsupportedFormatException {
        // Read header
        final FileHeader header = BinaryDictDecoder.readHeader(reader.getBuffer());
        readUnigramsAndBigramsBinaryInner(reader.getBuffer(), header.mHeaderSize, words,
        final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
        readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words,
                frequencies, bigrams, header.mFormatOptions);
    }

@@ -162,20 +162,20 @@ public final class BinaryDictIOUtils {
     * Gets the address of the last CharGroup of the exact matching word in the dictionary.
     * If no match is found, returns NOT_VALID_WORD.
     *
     * @param reader the reader.
     * @param dictReader the dict reader.
     * @param word the word we search for.
     * @return the address of the terminal node.
     * @throws IOException if the file can't be read.
     * @throws UnsupportedFormatException if the format of the file is not recognized.
     */
    @UsedForTesting
    public static int getTerminalPosition(final BinaryDictReader reader,
    public static int getTerminalPosition(final BinaryDictReader dictReader,
            final String word) throws IOException, UnsupportedFormatException {
        final FusionDictionaryBufferInterface buffer = reader.getBuffer();
        final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
        if (word == null) return FormatSpec.NOT_VALID_WORD;
        if (buffer.position() != 0) buffer.position(0);

        final FileHeader header = BinaryDictDecoder.readHeader(buffer);
        final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
        int wordPos = 0;
        final int wordLen = word.codePointCount(0, word.length());
        for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
@@ -510,20 +510,20 @@ public final class BinaryDictIOUtils {
    /**
     * Find a word using the BinaryDictReader.
     *
     * @param reader the reader
     * @param dictReader the dict reader
     * @param word the word searched
     * @return the found group
     * @throws IOException
     * @throws UnsupportedFormatException
     */
    @UsedForTesting
    public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader reader,
    public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader,
            final String word) throws IOException, UnsupportedFormatException {
        int position = getTerminalPosition(reader, word);
        final FusionDictionaryBufferInterface buffer = reader.getBuffer();
        int position = getTerminalPosition(dictReader, word);
        final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
        if (position != FormatSpec.NOT_VALID_WORD) {
            buffer.position(0);
            final FileHeader header = BinaryDictDecoder.readHeader(buffer);
            final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
            buffer.position(position);
            return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions);
        }
@@ -544,17 +544,22 @@ public final class BinaryDictIOUtils {
            final File file, final long offset, final long length)
            throws FileNotFoundException, IOException, UnsupportedFormatException {
        final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
        final BinaryDictReader dictReader = new BinaryDictReader(file);
        dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() {
            @Override
            public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file)
                    throws FileNotFoundException, IOException {
                final FileInputStream inStream = new FileInputStream(file);
                try {
                    inStream.read(buffer);
            final BinaryDictDecoder.ByteBufferWrapper wrapper =
                    new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map(
                            FileChannel.MapMode.READ_ONLY, offset, length));
            return BinaryDictDecoder.readHeader(wrapper);
                    return new ByteArrayWrapper(buffer);
                } finally {
                    inStream.close();
                }
            }
        });
        return BinaryDictDecoder.readHeader(dictReader);
    }

    public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
            final long length) {
+34 −1
Original line number Diff line number Diff line
@@ -17,7 +17,9 @@
package com.android.inputmethod.latin.makedict;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
import com.android.inputmethod.latin.utils.ByteArrayWrapper;

import java.io.File;
@@ -27,8 +29,9 @@ import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.HashMap;

public class BinaryDictReader {
public class BinaryDictReader implements HeaderReaderInterface {

    public interface FusionDictionaryBufferFactory {
        public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
@@ -133,4 +136,34 @@ public class BinaryDictReader {
        openBuffer(factory);
        return getBuffer();
    }

    // The implementation of HeaderReaderInterface
    @Override
    public int readVersion() throws IOException, UnsupportedFormatException {
        return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer);
    }

    @Override
    public int readOptionFlags() {
        return mFusionDictionaryBuffer.readUnsignedShort();
    }

    @Override
    public int readHeaderSize() {
        return mFusionDictionaryBuffer.readInt();
    }

    @Override
    public HashMap<String, String> readAttributes(final int headerSize) {
        final HashMap<String, String> attributes = new HashMap<String, String>();
        while (mFusionDictionaryBuffer.position() < headerSize) {
            // We can avoid infinite loop here since mFusionDictonary.position() is always increased
            // by calling CharEncoding.readString.
            final String key = CharEncoding.readString(mFusionDictionaryBuffer);
            final String value = CharEncoding.readString(mFusionDictionaryBuffer);
            attributes.put(key, value);
        }
        mFusionDictionaryBuffer.position(headerSize);
        return attributes;
    }
}
+26 −26
Original line number Diff line number Diff line
@@ -49,18 +49,18 @@ public final class DynamicBinaryDictIOUtils {
    /**
     * Delete the word from the binary file.
     *
     * @param reader the reader.
     * @param dictReader the dict reader.
     * @param word the word we delete
     * @throws IOException
     * @throws UnsupportedFormatException
     */
    @UsedForTesting
    public static void deleteWord(final BinaryDictReader reader, final String word)
    public static void deleteWord(final BinaryDictReader dictReader, final String word)
            throws IOException, UnsupportedFormatException {
        final FusionDictionaryBufferInterface buffer = reader.getBuffer();
        final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
        buffer.position(0);
        final FileHeader header = BinaryDictDecoder.readHeader(buffer);
        final int wordPosition = BinaryDictIOUtils.getTerminalPosition(reader, word);
        final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
        final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
        if (wordPosition == FormatSpec.NOT_VALID_WORD) return;

        buffer.position(wordPosition);
@@ -236,7 +236,7 @@ public final class DynamicBinaryDictIOUtils {
    /**
     * Insert a word into a binary dictionary.
     *
     * @param reader the reader.
     * @param dictReader the dict reader.
     * @param destination a stream to the underlying file, with the pointer at the end of the file.
     * @param word the word to insert.
     * @param frequency the frequency of the new word.
@@ -249,16 +249,16 @@ public final class DynamicBinaryDictIOUtils {
    // TODO: Support batch insertion.
    // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
    @UsedForTesting
    public static void insertWord(final BinaryDictReader reader, final OutputStream destination,
    public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination,
            final String word, final int frequency, final ArrayList<WeightedString> bigramStrings,
            final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
            final boolean isBlackListEntry)
                    throws IOException, UnsupportedFormatException {
        final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
        final FusionDictionaryBufferInterface buffer = reader.getBuffer();
        final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
        if (bigramStrings != null) {
            for (final WeightedString bigram : bigramStrings) {
                int position = BinaryDictIOUtils.getTerminalPosition(reader, bigram.mWord);
                int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord);
                if (position == FormatSpec.NOT_VALID_WORD) {
                    // TODO: figure out what is the correct thing to do here.
                } else {
@@ -273,7 +273,7 @@ public final class DynamicBinaryDictIOUtils {

        // find the insert position of the word.
        if (buffer.position() != 0) buffer.position(0);
        final FileHeader header = BinaryDictDecoder.readHeader(buffer);
        final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);

        int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position();
        final int[] codePoints = FusionDictionary.getCodePoints(word);
@@ -289,9 +289,9 @@ public final class DynamicBinaryDictIOUtils {
            for (int i = 0; i < charGroupCount; ++i) {
                address = buffer.position();
                final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
                        buffer.position(), header.mFormatOptions);
                        buffer.position(), fileHeader.mFormatOptions);
                final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags,
                        header.mFormatOptions);
                        fileHeader.mFormatOptions);
                if (isMovedGroup) continue;
                nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS)
                        ? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address;
@@ -311,16 +311,16 @@ public final class DynamicBinaryDictIOUtils {
                        final int newNodeAddress = buffer.limit();
                        final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1,
                                isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */,
                                false /* isBlackListEntry */, header.mFormatOptions);
                                false /* isBlackListEntry */, fileHeader.mFormatOptions);
                        int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags,
                                frequency, nodeParentAddress, shortcuts, bigrams, destination,
                                buffer, nodeOriginAddress, address, header.mFormatOptions);
                                buffer, nodeOriginAddress, address, fileHeader.mFormatOptions);

                        final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p,
                                currentInfo.mCharacters.length);
                        if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
                            updateParentAddresses(buffer, currentInfo.mChildrenAddress,
                                    newNodeAddress + written + 1, header.mFormatOptions);
                                    newNodeAddress + written + 1, fileHeader.mFormatOptions);
                        }
                        final CharGroupInfo newInfo2 = new CharGroupInfo(
                                newNodeAddress + written + 1, -1 /* endAddress */,
@@ -352,17 +352,17 @@ public final class DynamicBinaryDictIOUtils {
                                    false /* isTerminal */, 0 /* childrenAddressSize*/,
                                    false /* hasShortcut */, false /* hasBigrams */,
                                    false /* isNotAWord */, false /* isBlackListEntry */,
                                    header.mFormatOptions);
                                    fileHeader.mFormatOptions);
                            int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p,
                                    prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
                                    destination, buffer, nodeOriginAddress, address,
                                    header.mFormatOptions);
                                    fileHeader.mFormatOptions);

                            final int[] suffixCharacters = Arrays.copyOfRange(
                                    currentInfo.mCharacters, p, currentInfo.mCharacters.length);
                            if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
                                updateParentAddresses(buffer, currentInfo.mChildrenAddress,
                                        newNodeAddress + written + 1, header.mFormatOptions);
                                        newNodeAddress + written + 1, fileHeader.mFormatOptions);
                            }
                            final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags(
                                    suffixCharacters.length > 1,
@@ -371,21 +371,21 @@ public final class DynamicBinaryDictIOUtils {
                                    (currentInfo.mFlags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)
                                            != 0,
                                    (currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0,
                                    isNotAWord, isBlackListEntry, header.mFormatOptions);
                                    isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
                            final CharGroupInfo suffixInfo = new CharGroupInfo(
                                    newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags,
                                    suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1,
                                    currentInfo.mChildrenAddress, currentInfo.mShortcutTargets,
                                    currentInfo.mBigrams);
                            written += BinaryDictIOUtils.computeGroupSize(suffixInfo,
                                    header.mFormatOptions) + 1;
                                    fileHeader.mFormatOptions) + 1;

                            final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p,
                                    codePoints.length);
                            final int flags = BinaryDictEncoder.makeCharGroupFlags(
                                    newCharacters.length > 1, isTerminal,
                                    0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
                                    isNotAWord, isBlackListEntry, header.mFormatOptions);
                                    isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
                            final CharGroupInfo newInfo = new CharGroupInfo(
                                    newNodeAddress + written, -1 /* endAddress */, flags,
                                    newCharacters, frequency, newNodeAddress + 1,
@@ -407,13 +407,13 @@ public final class DynamicBinaryDictIOUtils {
                        final boolean hasMultipleChars = currentInfo.mCharacters.length > 1;
                        final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
                                isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
                                isNotAWord, isBlackListEntry, header.mFormatOptions);
                                isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
                        final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
                                -1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
                                nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
                                bigrams);
                        moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address,
                                header.mFormatOptions);
                                fileHeader.mFormatOptions);
                        return;
                    }
                    wordPos += currentInfo.mCharacters.length;
@@ -432,12 +432,12 @@ public final class DynamicBinaryDictIOUtils {
                         */
                        final int newNodeAddress = buffer.limit();
                        updateChildrenAddress(buffer, address, newNodeAddress,
                                header.mFormatOptions);
                                fileHeader.mFormatOptions);
                        final int newGroupAddress = newNodeAddress + 1;
                        final boolean hasMultipleChars = (wordLen - wordPos) > 1;
                        final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
                                isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
                                isNotAWord, isBlackListEntry, header.mFormatOptions);
                                isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
                        final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
                        final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags,
                                characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS,
@@ -482,7 +482,7 @@ public final class DynamicBinaryDictIOUtils {
                final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
                final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1,
                        isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
                        isNotAWord, isBlackListEntry, header.mFormatOptions);
                        isNotAWord, isBlackListEntry, fileHeader.mFormatOptions);
                final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1,
                        -1 /* endAddress */, flags, characters, frequency, nodeParentAddress,
                        FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams);
+32 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.makedict.decoder;

import com.android.inputmethod.latin.makedict.UnsupportedFormatException;

import java.io.IOException;
import java.util.HashMap;

/**
 * An interface to read a binary dictionary file header.
 */
public interface HeaderReaderInterface {
    public int readVersion() throws IOException, UnsupportedFormatException;
    public int readOptionFlags();
    public int readHeaderSize();
    public HashMap<String, String> readAttributes(final int headerSize);
}
Loading