Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ab6a9377 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Use native logic to read Ver4 dict.

Bug: 11281877
Bug: 12810574
Change-Id: Ief371d3ef61818e4e031de4659aee3c9584c7379
parent fc9ca59c
Loading
Loading
Loading
Loading
+46 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -223,4 +224,49 @@ public abstract class AbstractDictDecoder implements DictDecoder {
    public boolean hasValidRawBinaryDictionary() {
        return checkHeader() == SUCCESS;
    }

    // Placeholder implementations below. These are actually unused.
    @Override
    public void openDictBuffer() throws FileNotFoundException, IOException,
            UnsupportedFormatException {
    }

    @Override
    public boolean isDictBufferOpen() {
        return false;
    }

    @Override
    public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) {
        return null;
    }

    @Override
    public void setPosition(int newPos) {
    }

    @Override
    public int getPosition() {
        return 0;
    }

    @Override
    public int readPtNodeCount() {
        return 0;
    }

    @Override
    public boolean readAndFollowForwardLink() {
        return false;
    }

    @Override
    public boolean hasNextPtNodeArray() {
        return false;
    }

    @Override
    @UsedForTesting
    public void skipPtNode(final FormatOptions formatOptions) {
    }
}
+11 −11
Original line number Diff line number Diff line
@@ -436,25 +436,25 @@ public final class FusionDictionary implements Iterable<WordProperty> {
    /**
     * Helper method to add a new bigram to the dictionary.
     *
     * @param word1 the previous word of the context
     * @param word2 the next word of the context
     * @param word0 the previous word of the context
     * @param word1 the next word of the context
     * @param frequency the bigram frequency
     */
    public void setBigram(final String word1, final String word2, final int frequency) {
        PtNode ptNode = findWordInTree(mRootNodeArray, word1);
        if (ptNode != null) {
            final PtNode ptNode2 = findWordInTree(mRootNodeArray, word2);
            if (ptNode2 == null) {
                add(getCodePoints(word2), 0, null, false /* isNotAWord */,
    public void setBigram(final String word0, final String word1, final int frequency) {
        PtNode ptNode0 = findWordInTree(mRootNodeArray, word0);
        if (ptNode0 != null) {
            final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1);
            if (ptNode1 == null) {
                add(getCodePoints(word1), 0, null, false /* isNotAWord */,
                        false /* isBlacklistEntry */);
                // The PtNode for the first word may have moved by the above insertion,
                // if word1 and word2 share a common stem that happens not to have been
                // a cutting point until now. In this case, we need to refresh ptNode.
                ptNode = findWordInTree(mRootNodeArray, word1);
                ptNode0 = findWordInTree(mRootNodeArray, word0);
            }
            ptNode.addBigram(word2, frequency);
            ptNode0.addBigram(word1, frequency);
        } else {
            throw new RuntimeException("First word of bigram not found");
            throw new RuntimeException("First word of bigram not found " + word0);
        }
    }

+0 −223
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.makedict;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.utils.CollectionUtils;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;

/**
 * SparseTable is an extensible map from integer to integer.
 * This holds one value for every mBlockSize keys, so it uses 1/mBlockSize'th of the full index
 * memory.
 */
@UsedForTesting
public class SparseTable {

    /**
     * mLookupTable is indexed by terminal ID, containing exactly one entry for every mBlockSize
     * terminals.
     * It contains at index i = j / mBlockSize the index in each ArrayList in mContentsTables where
     * the values for terminals with IDs j to j + mBlockSize - 1 are stored as an mBlockSize-sized
     * integer array.
     */
    private final ArrayList<Integer> mLookupTable;
    private final ArrayList<ArrayList<Integer>> mContentTables;

    private final int mBlockSize;
    private final int mContentTableCount;
    public static final int NOT_EXIST = -1;
    public static final int SIZE_OF_INT_IN_BYTES = 4;

    @UsedForTesting
    public SparseTable(final int initialCapacity, final int blockSize,
            final int contentTableCount) {
        mBlockSize = blockSize;
        final int lookupTableSize = initialCapacity / mBlockSize
                + (initialCapacity % mBlockSize > 0 ? 1 : 0);
        mLookupTable = new ArrayList<Integer>(Collections.nCopies(lookupTableSize, NOT_EXIST));
        mContentTableCount = contentTableCount;
        mContentTables = CollectionUtils.newArrayList();
        for (int i = 0; i < mContentTableCount; ++i) {
            mContentTables.add(new ArrayList<Integer>());
        }
    }

    @UsedForTesting
    public SparseTable(final ArrayList<Integer> lookupTable,
            final ArrayList<ArrayList<Integer>> contentTables, final int blockSize) {
        mBlockSize = blockSize;
        mContentTableCount = contentTables.size();
        mLookupTable = lookupTable;
        mContentTables = contentTables;
    }

    /**
     * Converts an byte array to an int array considering each set of 4 bytes is an int stored in
     * big-endian.
     * The length of byteArray must be a multiple of four.
     * Otherwise, IndexOutOfBoundsException will be raised.
     */
    @UsedForTesting
    private static ArrayList<Integer> convertByteArrayToIntegerArray(final byte[] byteArray) {
        final ArrayList<Integer> integerArray = new ArrayList<Integer>(byteArray.length / 4);
        for (int i = 0; i < byteArray.length; i += 4) {
            int value = 0;
            for (int j = i; j < i + 4; ++j) {
                value <<= 8;
                value |= byteArray[j] & 0xFF;
             }
            integerArray.add(value);
        }
        return integerArray;
    }

    @UsedForTesting
    public int get(final int contentTableIndex, final int index) {
        if (!contains(index)) {
            return NOT_EXIST;
        }
        return mContentTables.get(contentTableIndex).get(
                mLookupTable.get(index / mBlockSize) + (index % mBlockSize));
    }

    @UsedForTesting
    public ArrayList<Integer> getAll(final int index) {
        final ArrayList<Integer> ret = CollectionUtils.newArrayList();
        for (int i = 0; i < mContentTableCount; ++i) {
            ret.add(get(i, index));
        }
        return ret;
    }

    @UsedForTesting
    public void set(final int contentTableIndex, final int index, final int value) {
        if (mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
            mLookupTable.set(index / mBlockSize, mContentTables.get(contentTableIndex).size());
            for (int i = 0; i < mContentTableCount; ++i) {
                for (int j = 0; j < mBlockSize; ++j) {
                    mContentTables.get(i).add(NOT_EXIST);
                }
            }
        }
        mContentTables.get(contentTableIndex).set(
                mLookupTable.get(index / mBlockSize) + (index % mBlockSize), value);
    }

    public void remove(final int indexOfContent, final int index) {
        set(indexOfContent, index, NOT_EXIST);
    }

    @UsedForTesting
    public int size() {
        return mLookupTable.size() * mBlockSize;
    }

    @UsedForTesting
    /* package */ int getContentTableSize() {
        // This class always has at least one content table.
        return mContentTables.get(0).size();
    }

    @UsedForTesting
    /* package */ int getLookupTableSize() {
        return mLookupTable.size();
    }

    public boolean contains(final int index) {
        if (index < 0 || index / mBlockSize >= mLookupTable.size()
                || mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
            return false;
        }
        return true;
    }

    @UsedForTesting
    public void write(final OutputStream lookupOutStream, final OutputStream[] contentOutStreams)
            throws IOException {
         if (contentOutStreams.length != mContentTableCount) {
             throw new RuntimeException(contentOutStreams.length + " streams are given, but the"
                     + " table has " + mContentTableCount + " content tables.");
         }
        for (final int index : mLookupTable) {
          BinaryDictEncoderUtils.writeUIntToStream(lookupOutStream, index, SIZE_OF_INT_IN_BYTES);
        }

        for (int i = 0; i < contentOutStreams.length; ++i) {
            for (final int data : mContentTables.get(i)) {
                BinaryDictEncoderUtils.writeUIntToStream(contentOutStreams[i], data,
                        SIZE_OF_INT_IN_BYTES);
            }
        }
    }

    @UsedForTesting
    public void writeToFiles(final File lookupTableFile, final File[] contentFiles)
            throws IOException {
        FileOutputStream lookupTableOutStream = null;
        final FileOutputStream[] contentTableOutStreams = new FileOutputStream[mContentTableCount];
        try {
            lookupTableOutStream = new FileOutputStream(lookupTableFile);
            for (int i = 0; i < contentFiles.length; ++i) {
                contentTableOutStreams[i] = new FileOutputStream(contentFiles[i]);
            }
            write(lookupTableOutStream, contentTableOutStreams);
        } finally {
            if (lookupTableOutStream != null) {
                lookupTableOutStream.close();
            }
            for (int i = 0; i < contentTableOutStreams.length; ++i) {
                if (contentTableOutStreams[i] != null) {
                    contentTableOutStreams[i].close();
                }
            }
        }
    }

    private static byte[] readFileToByteArray(final File file) throws IOException {
        final byte[] contents = new byte[(int) file.length()];
        FileInputStream inStream = null;
        try {
            inStream = new FileInputStream(file);
            inStream.read(contents);
        } finally {
            if (inStream != null) {
                inStream.close();
            }
        }
        return contents;
    }

    @UsedForTesting
    public static SparseTable readFromFiles(final File lookupTableFile, final File[] contentFiles,
            final int blockSize) throws IOException {
        final ArrayList<ArrayList<Integer>> contentTables =
                new ArrayList<ArrayList<Integer>>(contentFiles.length);
        for (int i = 0; i < contentFiles.length; ++i) {
            contentTables.add(convertByteArrayToIntegerArray(readFileToByteArray(contentFiles[i])));
        }
        return new SparseTable(convertByteArrayToIntegerArray(readFileToByteArray(lookupTableFile)),
                contentTables, blockSize);
    }
}
+0 −126
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.makedict;

import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;

/**
 * An auxiliary class for reading SparseTable and data written by SparseTableContentWriter.
 */
public class SparseTableContentReader {

    /**
     * An interface of a function which is passed to SparseTableContentReader.read.
     */
    public interface SparseTableContentReaderInterface {
        /**
         * Reads data.
         *
         * @param buffer the DictBuffer. The position of the buffer is set to the head of data.
         */
        public void read(final DictBuffer buffer);
    }

    protected final int mContentCount;
    protected final int mBlockSize;
    protected final File mBaseDir;
    protected final File mLookupTableFile;
    protected final File[] mAddressTableFiles;
    protected final File[] mContentFiles;
    protected DictBuffer mLookupTableBuffer;
    protected final DictBuffer[] mAddressTableBuffers;
    private final DictBuffer[] mContentBuffers;
    protected final DictionaryBufferFactory mFactory;

    /**
     * Sole constructor of SparseTableContentReader.
     *
     * @param name the name of SparseTable.
     * @param blockSize the block size of the content table.
     * @param baseDir the directory which contains the files of the content table.
     * @param contentFilenames the file names of content files.
     * @param contentSuffixes the ids of contents. These ids are used for a suffix of a name of
     * address files and content files.
     * @param factory the DictionaryBufferFactory which is used for opening the files.
     */
    public SparseTableContentReader(final String name, final int blockSize, final File baseDir,
            final String[] contentFilenames, final String[] contentSuffixes,
            final DictionaryBufferFactory factory) {
        if (contentFilenames.length != contentSuffixes.length) {
            throw new RuntimeException("The length of contentFilenames and the length of"
                    + " contentSuffixes are different " + contentFilenames.length + ", "
                    + contentSuffixes.length);
        }
        mBlockSize = blockSize;
        mBaseDir = baseDir;
        mFactory = factory;
        mContentCount = contentFilenames.length;
        mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
        mAddressTableFiles = new File[mContentCount];
        mContentFiles = new File[mContentCount];
        for (int i = 0; i < mContentCount; ++i) {
            mAddressTableFiles[i] = new File(mBaseDir,
                    name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentSuffixes[i]);
            mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentSuffixes[i]);
        }
        mAddressTableBuffers = new DictBuffer[mContentCount];
        mContentBuffers = new DictBuffer[mContentCount];
    }

    public void openBuffers() throws FileNotFoundException, IOException {
        mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
        for (int i = 0; i < mContentCount; ++i) {
            mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
            mContentBuffers[i] = mFactory.getDictionaryBuffer(mContentFiles[i]);
        }
    }

    /**
     * Calls the read() callback of the reader with the appropriate buffer appropriately positioned.
     * @param contentNumber the index in the original contentFilenames[] array.
     * @param terminalId the terminal ID to read.
     * @param reader the reader on which to call the callback.
     */
    protected void read(final int contentNumber, final int terminalId,
            final SparseTableContentReaderInterface reader) {
        if (terminalId < 0 || (terminalId / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
                >= mLookupTableBuffer.limit()) {
            return;
        }

        mLookupTableBuffer.position((terminalId / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
        final int indexInAddressTable = mLookupTableBuffer.readInt();
        if (indexInAddressTable == SparseTable.NOT_EXIST) {
            return;
        }

        mAddressTableBuffers[contentNumber].position(SparseTable.SIZE_OF_INT_IN_BYTES
                * ((indexInAddressTable * mBlockSize) + (terminalId % mBlockSize)));
        final int address = mAddressTableBuffers[contentNumber].readInt();
        if (address == SparseTable.NOT_EXIST) {
            return;
        }

        mContentBuffers[contentNumber].position(address);
        reader.read(mContentBuffers[contentNumber]);
    }
}
 No newline at end of file
+0 −93
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.makedict;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

/**
 * An auxiliary class for writing data associated with SparseTable to files.
 */
public class SparseTableContentWriter {
    public interface SparseTableContentWriterInterface {
        public void write(final OutputStream outStream) throws IOException;
    }

    private final int mContentCount;
    private final SparseTable mSparseTable;
    private final File mLookupTableFile;
    protected final File mBaseDir;
    private final File[] mAddressTableFiles;
    private final File[] mContentFiles;
    protected final OutputStream[] mContentOutStreams;

    /**
     * Sole constructor of SparseTableContentWriter.
     *
     * @param name the name of SparseTable.
     * @param initialCapacity the initial capacity of SparseTable.
     * @param blockSize the block size of the content table.
     * @param baseDir the directory which contains the files of the content table.
     * @param contentFilenames the file names of content files.
     * @param contentIds the ids of contents. These ids are used for a suffix of a name of address
     * files and content files.
     */
    public SparseTableContentWriter(final String name, final int initialCapacity,
            final int blockSize, final File baseDir, final String[] contentFilenames,
            final String[] contentIds) {
        if (contentFilenames.length != contentIds.length) {
            throw new RuntimeException("The length of contentFilenames and the length of"
                    + " contentIds are different " + contentFilenames.length + ", "
                    + contentIds.length);
        }
        mContentCount = contentFilenames.length;
        mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount);
        mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
        mAddressTableFiles = new File[mContentCount];
        mContentFiles = new File[mContentCount];
        mBaseDir = baseDir;
        for (int i = 0; i < mContentCount; ++i) {
            mAddressTableFiles[i] = new File(mBaseDir,
                    name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]);
            mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]);
        }
        mContentOutStreams = new OutputStream[mContentCount];
    }

    public void openStreams() throws FileNotFoundException {
        for (int i = 0; i < mContentCount; ++i) {
            mContentOutStreams[i] = new FileOutputStream(mContentFiles[i]);
        }
    }

    protected void write(final int contentIndex, final int index,
            final SparseTableContentWriterInterface writer) throws IOException {
        mSparseTable.set(contentIndex, index, (int) mContentFiles[contentIndex].length());
        writer.write(mContentOutStreams[contentIndex]);
        mContentOutStreams[contentIndex].flush();
    }

    public void closeStreams() throws IOException {
        mSparseTable.writeToFiles(mLookupTableFile, mAddressTableFiles);
        for (int i = 0; i < mContentCount; ++i) {
            mContentOutStreams[i].close();
        }
    }
}
 No newline at end of file
Loading