Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6422f77e authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Use native logic to read Ver4 dict."

parents fc6d0f87 ab6a9377
Loading
Loading
Loading
Loading
+46 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -223,4 +224,49 @@ public abstract class AbstractDictDecoder implements DictDecoder {
    public boolean hasValidRawBinaryDictionary() {
        return checkHeader() == SUCCESS;
    }

    // Placeholder implementations below. These are actually unused.
    @Override
    public void openDictBuffer() throws FileNotFoundException, IOException,
            UnsupportedFormatException {
    }

    @Override
    public boolean isDictBufferOpen() {
        return false;
    }

    @Override
    public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions options) {
        return null;
    }

    @Override
    public void setPosition(int newPos) {
    }

    @Override
    public int getPosition() {
        return 0;
    }

    @Override
    public int readPtNodeCount() {
        return 0;
    }

    @Override
    public boolean readAndFollowForwardLink() {
        return false;
    }

    @Override
    public boolean hasNextPtNodeArray() {
        return false;
    }

    @Override
    @UsedForTesting
    public void skipPtNode(final FormatOptions formatOptions) {
    }
}
+11 −11
Original line number Diff line number Diff line
@@ -436,25 +436,25 @@ public final class FusionDictionary implements Iterable<WordProperty> {
    /**
     * Helper method to add a new bigram to the dictionary.
     *
     * @param word1 the previous word of the context
     * @param word2 the next word of the context
     * @param word0 the previous word of the context
     * @param word1 the next word of the context
     * @param frequency the bigram frequency
     */
    public void setBigram(final String word1, final String word2, final int frequency) {
        PtNode ptNode = findWordInTree(mRootNodeArray, word1);
        if (ptNode != null) {
            final PtNode ptNode2 = findWordInTree(mRootNodeArray, word2);
            if (ptNode2 == null) {
                add(getCodePoints(word2), 0, null, false /* isNotAWord */,
    public void setBigram(final String word0, final String word1, final int frequency) {
        PtNode ptNode0 = findWordInTree(mRootNodeArray, word0);
        if (ptNode0 != null) {
            final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1);
            if (ptNode1 == null) {
                add(getCodePoints(word1), 0, null, false /* isNotAWord */,
                        false /* isBlacklistEntry */);
                // The PtNode for the first word may have moved by the above insertion,
                // if word1 and word2 share a common stem that happens not to have been
                // a cutting point until now. In this case, we need to refresh ptNode.
                ptNode = findWordInTree(mRootNodeArray, word1);
                ptNode0 = findWordInTree(mRootNodeArray, word0);
            }
            ptNode.addBigram(word2, frequency);
            ptNode0.addBigram(word1, frequency);
        } else {
            throw new RuntimeException("First word of bigram not found");
            throw new RuntimeException("First word of bigram not found " + word0);
        }
    }

+0 −223
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.makedict;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.utils.CollectionUtils;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;

/**
 * SparseTable is an extensible map from integer to integer.
 * This holds one value for every mBlockSize keys, so it uses 1/mBlockSize'th of the full index
 * memory.
 */
@UsedForTesting
public class SparseTable {

    /**
     * mLookupTable is indexed by terminal ID, containing exactly one entry for every mBlockSize
     * terminals.
     * It contains at index i = j / mBlockSize the index in each ArrayList in mContentsTables where
     * the values for terminals with IDs j to j + mBlockSize - 1 are stored as an mBlockSize-sized
     * integer array.
     */
    private final ArrayList<Integer> mLookupTable;
    private final ArrayList<ArrayList<Integer>> mContentTables;

    private final int mBlockSize;
    private final int mContentTableCount;
    public static final int NOT_EXIST = -1;
    public static final int SIZE_OF_INT_IN_BYTES = 4;

    @UsedForTesting
    public SparseTable(final int initialCapacity, final int blockSize,
            final int contentTableCount) {
        mBlockSize = blockSize;
        final int lookupTableSize = initialCapacity / mBlockSize
                + (initialCapacity % mBlockSize > 0 ? 1 : 0);
        mLookupTable = new ArrayList<Integer>(Collections.nCopies(lookupTableSize, NOT_EXIST));
        mContentTableCount = contentTableCount;
        mContentTables = CollectionUtils.newArrayList();
        for (int i = 0; i < mContentTableCount; ++i) {
            mContentTables.add(new ArrayList<Integer>());
        }
    }

    @UsedForTesting
    public SparseTable(final ArrayList<Integer> lookupTable,
            final ArrayList<ArrayList<Integer>> contentTables, final int blockSize) {
        mBlockSize = blockSize;
        mContentTableCount = contentTables.size();
        mLookupTable = lookupTable;
        mContentTables = contentTables;
    }

    /**
     * Converts an byte array to an int array considering each set of 4 bytes is an int stored in
     * big-endian.
     * The length of byteArray must be a multiple of four.
     * Otherwise, IndexOutOfBoundsException will be raised.
     */
    @UsedForTesting
    private static ArrayList<Integer> convertByteArrayToIntegerArray(final byte[] byteArray) {
        final ArrayList<Integer> integerArray = new ArrayList<Integer>(byteArray.length / 4);
        for (int i = 0; i < byteArray.length; i += 4) {
            int value = 0;
            for (int j = i; j < i + 4; ++j) {
                value <<= 8;
                value |= byteArray[j] & 0xFF;
             }
            integerArray.add(value);
        }
        return integerArray;
    }

    @UsedForTesting
    public int get(final int contentTableIndex, final int index) {
        if (!contains(index)) {
            return NOT_EXIST;
        }
        return mContentTables.get(contentTableIndex).get(
                mLookupTable.get(index / mBlockSize) + (index % mBlockSize));
    }

    @UsedForTesting
    public ArrayList<Integer> getAll(final int index) {
        final ArrayList<Integer> ret = CollectionUtils.newArrayList();
        for (int i = 0; i < mContentTableCount; ++i) {
            ret.add(get(i, index));
        }
        return ret;
    }

    @UsedForTesting
    public void set(final int contentTableIndex, final int index, final int value) {
        if (mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
            mLookupTable.set(index / mBlockSize, mContentTables.get(contentTableIndex).size());
            for (int i = 0; i < mContentTableCount; ++i) {
                for (int j = 0; j < mBlockSize; ++j) {
                    mContentTables.get(i).add(NOT_EXIST);
                }
            }
        }
        mContentTables.get(contentTableIndex).set(
                mLookupTable.get(index / mBlockSize) + (index % mBlockSize), value);
    }

    public void remove(final int indexOfContent, final int index) {
        set(indexOfContent, index, NOT_EXIST);
    }

    @UsedForTesting
    public int size() {
        return mLookupTable.size() * mBlockSize;
    }

    @UsedForTesting
    /* package */ int getContentTableSize() {
        // This class always has at least one content table.
        return mContentTables.get(0).size();
    }

    @UsedForTesting
    /* package */ int getLookupTableSize() {
        return mLookupTable.size();
    }

    public boolean contains(final int index) {
        if (index < 0 || index / mBlockSize >= mLookupTable.size()
                || mLookupTable.get(index / mBlockSize) == NOT_EXIST) {
            return false;
        }
        return true;
    }

    @UsedForTesting
    public void write(final OutputStream lookupOutStream, final OutputStream[] contentOutStreams)
            throws IOException {
         if (contentOutStreams.length != mContentTableCount) {
             throw new RuntimeException(contentOutStreams.length + " streams are given, but the"
                     + " table has " + mContentTableCount + " content tables.");
         }
        for (final int index : mLookupTable) {
          BinaryDictEncoderUtils.writeUIntToStream(lookupOutStream, index, SIZE_OF_INT_IN_BYTES);
        }

        for (int i = 0; i < contentOutStreams.length; ++i) {
            for (final int data : mContentTables.get(i)) {
                BinaryDictEncoderUtils.writeUIntToStream(contentOutStreams[i], data,
                        SIZE_OF_INT_IN_BYTES);
            }
        }
    }

    @UsedForTesting
    public void writeToFiles(final File lookupTableFile, final File[] contentFiles)
            throws IOException {
        FileOutputStream lookupTableOutStream = null;
        final FileOutputStream[] contentTableOutStreams = new FileOutputStream[mContentTableCount];
        try {
            lookupTableOutStream = new FileOutputStream(lookupTableFile);
            for (int i = 0; i < contentFiles.length; ++i) {
                contentTableOutStreams[i] = new FileOutputStream(contentFiles[i]);
            }
            write(lookupTableOutStream, contentTableOutStreams);
        } finally {
            if (lookupTableOutStream != null) {
                lookupTableOutStream.close();
            }
            for (int i = 0; i < contentTableOutStreams.length; ++i) {
                if (contentTableOutStreams[i] != null) {
                    contentTableOutStreams[i].close();
                }
            }
        }
    }

    private static byte[] readFileToByteArray(final File file) throws IOException {
        final byte[] contents = new byte[(int) file.length()];
        FileInputStream inStream = null;
        try {
            inStream = new FileInputStream(file);
            inStream.read(contents);
        } finally {
            if (inStream != null) {
                inStream.close();
            }
        }
        return contents;
    }

    @UsedForTesting
    public static SparseTable readFromFiles(final File lookupTableFile, final File[] contentFiles,
            final int blockSize) throws IOException {
        final ArrayList<ArrayList<Integer>> contentTables =
                new ArrayList<ArrayList<Integer>>(contentFiles.length);
        for (int i = 0; i < contentFiles.length; ++i) {
            contentTables.add(convertByteArrayToIntegerArray(readFileToByteArray(contentFiles[i])));
        }
        return new SparseTable(convertByteArrayToIntegerArray(readFileToByteArray(lookupTableFile)),
                contentTables, blockSize);
    }
}
+0 −126
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.makedict;

import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;

/**
 * An auxiliary class for reading SparseTable and data written by SparseTableContentWriter.
 */
public class SparseTableContentReader {

    /**
     * An interface of a function which is passed to SparseTableContentReader.read.
     */
    public interface SparseTableContentReaderInterface {
        /**
         * Reads data.
         *
         * @param buffer the DictBuffer. The position of the buffer is set to the head of data.
         */
        public void read(final DictBuffer buffer);
    }

    protected final int mContentCount;
    protected final int mBlockSize;
    protected final File mBaseDir;
    protected final File mLookupTableFile;
    protected final File[] mAddressTableFiles;
    protected final File[] mContentFiles;
    protected DictBuffer mLookupTableBuffer;
    protected final DictBuffer[] mAddressTableBuffers;
    private final DictBuffer[] mContentBuffers;
    protected final DictionaryBufferFactory mFactory;

    /**
     * Sole constructor of SparseTableContentReader.
     *
     * @param name the name of SparseTable.
     * @param blockSize the block size of the content table.
     * @param baseDir the directory which contains the files of the content table.
     * @param contentFilenames the file names of content files.
     * @param contentSuffixes the ids of contents. These ids are used for a suffix of a name of
     * address files and content files.
     * @param factory the DictionaryBufferFactory which is used for opening the files.
     */
    public SparseTableContentReader(final String name, final int blockSize, final File baseDir,
            final String[] contentFilenames, final String[] contentSuffixes,
            final DictionaryBufferFactory factory) {
        if (contentFilenames.length != contentSuffixes.length) {
            throw new RuntimeException("The length of contentFilenames and the length of"
                    + " contentSuffixes are different " + contentFilenames.length + ", "
                    + contentSuffixes.length);
        }
        mBlockSize = blockSize;
        mBaseDir = baseDir;
        mFactory = factory;
        mContentCount = contentFilenames.length;
        mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
        mAddressTableFiles = new File[mContentCount];
        mContentFiles = new File[mContentCount];
        for (int i = 0; i < mContentCount; ++i) {
            mAddressTableFiles[i] = new File(mBaseDir,
                    name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentSuffixes[i]);
            mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentSuffixes[i]);
        }
        mAddressTableBuffers = new DictBuffer[mContentCount];
        mContentBuffers = new DictBuffer[mContentCount];
    }

    public void openBuffers() throws FileNotFoundException, IOException {
        mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
        for (int i = 0; i < mContentCount; ++i) {
            mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
            mContentBuffers[i] = mFactory.getDictionaryBuffer(mContentFiles[i]);
        }
    }

    /**
     * Calls the read() callback of the reader with the appropriate buffer appropriately positioned.
     * @param contentNumber the index in the original contentFilenames[] array.
     * @param terminalId the terminal ID to read.
     * @param reader the reader on which to call the callback.
     */
    protected void read(final int contentNumber, final int terminalId,
            final SparseTableContentReaderInterface reader) {
        if (terminalId < 0 || (terminalId / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
                >= mLookupTableBuffer.limit()) {
            return;
        }

        mLookupTableBuffer.position((terminalId / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
        final int indexInAddressTable = mLookupTableBuffer.readInt();
        if (indexInAddressTable == SparseTable.NOT_EXIST) {
            return;
        }

        mAddressTableBuffers[contentNumber].position(SparseTable.SIZE_OF_INT_IN_BYTES
                * ((indexInAddressTable * mBlockSize) + (terminalId % mBlockSize)));
        final int address = mAddressTableBuffers[contentNumber].readInt();
        if (address == SparseTable.NOT_EXIST) {
            return;
        }

        mContentBuffers[contentNumber].position(address);
        reader.read(mContentBuffers[contentNumber]);
    }
}
 No newline at end of file
+0 −93
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.makedict;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

/**
 * An auxiliary class for writing data associated with SparseTable to files.
 */
public class SparseTableContentWriter {
    public interface SparseTableContentWriterInterface {
        public void write(final OutputStream outStream) throws IOException;
    }

    private final int mContentCount;
    private final SparseTable mSparseTable;
    private final File mLookupTableFile;
    protected final File mBaseDir;
    private final File[] mAddressTableFiles;
    private final File[] mContentFiles;
    protected final OutputStream[] mContentOutStreams;

    /**
     * Sole constructor of SparseTableContentWriter.
     *
     * @param name the name of SparseTable.
     * @param initialCapacity the initial capacity of SparseTable.
     * @param blockSize the block size of the content table.
     * @param baseDir the directory which contains the files of the content table.
     * @param contentFilenames the file names of content files.
     * @param contentIds the ids of contents. These ids are used for a suffix of a name of address
     * files and content files.
     */
    public SparseTableContentWriter(final String name, final int initialCapacity,
            final int blockSize, final File baseDir, final String[] contentFilenames,
            final String[] contentIds) {
        if (contentFilenames.length != contentIds.length) {
            throw new RuntimeException("The length of contentFilenames and the length of"
                    + " contentIds are different " + contentFilenames.length + ", "
                    + contentIds.length);
        }
        mContentCount = contentFilenames.length;
        mSparseTable = new SparseTable(initialCapacity, blockSize, mContentCount);
        mLookupTableFile = new File(baseDir, name + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
        mAddressTableFiles = new File[mContentCount];
        mContentFiles = new File[mContentCount];
        mBaseDir = baseDir;
        for (int i = 0; i < mContentCount; ++i) {
            mAddressTableFiles[i] = new File(mBaseDir,
                    name + FormatSpec.CONTENT_TABLE_FILE_SUFFIX + contentIds[i]);
            mContentFiles[i] = new File(mBaseDir, contentFilenames[i] + contentIds[i]);
        }
        mContentOutStreams = new OutputStream[mContentCount];
    }

    public void openStreams() throws FileNotFoundException {
        for (int i = 0; i < mContentCount; ++i) {
            mContentOutStreams[i] = new FileOutputStream(mContentFiles[i]);
        }
    }

    protected void write(final int contentIndex, final int index,
            final SparseTableContentWriterInterface writer) throws IOException {
        mSparseTable.set(contentIndex, index, (int) mContentFiles[contentIndex].length());
        writer.write(mContentOutStreams[contentIndex]);
        mContentOutStreams[contentIndex].flush();
    }

    public void closeStreams() throws IOException {
        mSparseTable.writeToFiles(mLookupTableFile, mAddressTableFiles);
        for (int i = 0; i < mContentCount; ++i) {
            mContentOutStreams[i].close();
        }
    }
}
 No newline at end of file
Loading