Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java +13 −15 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface; import com.android.inputmethod.latin.utils.JniUtils; import java.io.ByteArrayOutputStream; Loading Loading @@ -250,7 +251,7 @@ public final class BinaryDictDecoder { /** * Reads a string from a buffer. This is the converse of the above method. */ private static String readString(final FusionDictionaryBufferInterface buffer) { static String readString(final FusionDictionaryBufferInterface buffer) { final StringBuilder s = new StringBuilder(); int character = readChar(buffer); while (character != FormatSpec.INVALID_CHARACTER) { Loading Loading @@ -629,7 +630,7 @@ public final class BinaryDictDecoder { * @throws UnsupportedFormatException * @throws IOException */ private static int checkFormatVersion(final FusionDictionaryBufferInterface buffer) static int checkFormatVersion(final FusionDictionaryBufferInterface buffer) throws IOException, UnsupportedFormatException { final int version = getFormatVersion(buffer); if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION Loading @@ -643,25 +644,22 @@ public final class BinaryDictDecoder { /** * Reads a header from a buffer. * @param buffer the buffer to read. * @param headerReader the header reader * @throws IOException * @throws UnsupportedFormatException */ public static FileHeader readHeader(final FusionDictionaryBufferInterface buffer) public static FileHeader readHeader(final HeaderReaderInterface headerReader) throws IOException, UnsupportedFormatException { final int version = checkFormatVersion(buffer); final int optionsFlags = buffer.readUnsignedShort(); final int version = headerReader.readVersion(); final int optionsFlags = headerReader.readOptionFlags(); final HashMap<String, String> attributes = new HashMap<String, String>(); final int headerSize; headerSize = buffer.readInt(); final int headerSize = headerReader.readHeaderSize(); if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } populateOptions(buffer, headerSize, attributes); buffer.position(headerSize); final HashMap<String, String> attributes = headerReader.readAttributes(headerSize); final FileHeader header = new FileHeader(headerSize, new FusionDictionary.DictionaryOptions(attributes, Loading Loading @@ -711,14 +709,14 @@ public final class BinaryDictDecoder { } // Read header final FileHeader header = readHeader(reader.getBuffer()); final FileHeader fileHeader = readHeader(reader); Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize, reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions); final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize, reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions); FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions); if (null != dict) { for (final Word w : dict) { if (w.mIsBlacklistEntry) { Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +29 −24 Original line number Diff line number Diff line Loading @@ -24,13 +24,13 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.ByteArrayWrapper; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Iterator; import java.util.Map; Loading Loading @@ -141,20 +141,20 @@ public final class BinaryDictIOUtils { * Reads unigrams and bigrams from the binary file. * Doesn't store a full memory representation of the dictionary. * * @param reader the reader. * @param dictReader the dict reader. * @param words the map to store the address as a key and the word as a value. * @param frequencies the map to store the address as a key and the frequency as a value. * @param bigrams the map to store the address as a key and the list of address as a value. * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader, public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header final FileHeader header = BinaryDictDecoder.readHeader(reader.getBuffer()); readUnigramsAndBigramsBinaryInner(reader.getBuffer(), header.mHeaderSize, words, final FileHeader header = BinaryDictDecoder.readHeader(dictReader); readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words, frequencies, bigrams, header.mFormatOptions); } Loading @@ -162,20 +162,20 @@ public final class BinaryDictIOUtils { * Gets the address of the last CharGroup of the exact matching word in the dictionary. * If no match is found, returns NOT_VALID_WORD. * * @param reader the reader. * @param dictReader the dict reader. * @param word the word we search for. * @return the address of the terminal node. * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ @UsedForTesting public static int getTerminalPosition(final BinaryDictReader reader, public static int getTerminalPosition(final BinaryDictReader dictReader, final String word) throws IOException, UnsupportedFormatException { final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); if (word == null) return FormatSpec.NOT_VALID_WORD; if (buffer.position() != 0) buffer.position(0); final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(dictReader); int wordPos = 0; final int wordLen = word.codePointCount(0, word.length()); for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { Loading Loading @@ -510,20 +510,20 @@ public final class BinaryDictIOUtils { /** * Find a word using the BinaryDictReader. * * @param reader the reader * @param dictReader the dict reader * @param word the word searched * @return the found group * @throws IOException * @throws UnsupportedFormatException */ @UsedForTesting public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader reader, public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader, final String word) throws IOException, UnsupportedFormatException { int position = getTerminalPosition(reader, word); final FusionDictionaryBufferInterface buffer = reader.getBuffer(); int position = getTerminalPosition(dictReader, word); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); if (position != FormatSpec.NOT_VALID_WORD) { buffer.position(0); final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(dictReader); buffer.position(position); return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions); } Loading @@ -544,17 +544,22 @@ public final class BinaryDictIOUtils { final File file, final long offset, final long length) throws FileNotFoundException, IOException, UnsupportedFormatException { final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE]; final BinaryDictReader dictReader = new BinaryDictReader(file); dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() { @Override public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file) throws FileNotFoundException, IOException { final FileInputStream inStream = new FileInputStream(file); try { inStream.read(buffer); final BinaryDictDecoder.ByteBufferWrapper wrapper = new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map( FileChannel.MapMode.READ_ONLY, offset, length)); return BinaryDictDecoder.readHeader(wrapper); return new ByteArrayWrapper(buffer); } finally { inStream.close(); } } }); return BinaryDictDecoder.readHeader(dictReader); } public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset, final long length) { Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java +34 −1 Original line number Diff line number Diff line Loading @@ -17,7 +17,9 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface; import com.android.inputmethod.latin.utils.ByteArrayWrapper; import java.io.File; Loading @@ -27,8 +29,9 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.HashMap; public class BinaryDictReader { public class BinaryDictReader implements HeaderReaderInterface { public interface FusionDictionaryBufferFactory { public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) Loading Loading @@ -133,4 +136,34 @@ public class BinaryDictReader { openBuffer(factory); return getBuffer(); } // The implementation of HeaderReaderInterface @Override public int readVersion() throws IOException, UnsupportedFormatException { return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer); } @Override public int readOptionFlags() { return mFusionDictionaryBuffer.readUnsignedShort(); } @Override public int readHeaderSize() { return mFusionDictionaryBuffer.readInt(); } @Override public HashMap<String, String> readAttributes(final int headerSize) { final HashMap<String, String> attributes = new HashMap<String, String>(); while (mFusionDictionaryBuffer.position() < headerSize) { // We can avoid infinite loop here since mFusionDictonary.position() is always increased // by calling CharEncoding.readString. final String key = CharEncoding.readString(mFusionDictionaryBuffer); final String value = CharEncoding.readString(mFusionDictionaryBuffer); attributes.put(key, value); } mFusionDictionaryBuffer.position(headerSize); return attributes; } } java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +26 −26 Original line number Diff line number Diff line Loading @@ -49,18 +49,18 @@ public final class DynamicBinaryDictIOUtils { /** * Delete the word from the binary file. * * @param reader the reader. * @param dictReader the dict reader. * @param word the word we delete * @throws IOException * @throws UnsupportedFormatException */ @UsedForTesting public static void deleteWord(final BinaryDictReader reader, final String word) public static void deleteWord(final BinaryDictReader dictReader, final String word) throws IOException, UnsupportedFormatException { final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); buffer.position(0); final FileHeader header = BinaryDictDecoder.readHeader(buffer); final int wordPosition = BinaryDictIOUtils.getTerminalPosition(reader, word); final FileHeader header = BinaryDictDecoder.readHeader(dictReader); final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word); if (wordPosition == FormatSpec.NOT_VALID_WORD) return; buffer.position(wordPosition); Loading Loading @@ -236,7 +236,7 @@ public final class DynamicBinaryDictIOUtils { /** * Insert a word into a binary dictionary. * * @param reader the reader. * @param dictReader the dict reader. * @param destination a stream to the underlying file, with the pointer at the end of the file. * @param word the word to insert. * @param frequency the frequency of the new word. Loading @@ -249,16 +249,16 @@ public final class DynamicBinaryDictIOUtils { // TODO: Support batch insertion. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. @UsedForTesting public static void insertWord(final BinaryDictReader reader, final OutputStream destination, public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination, final String word, final int frequency, final ArrayList<WeightedString> bigramStrings, final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, final boolean isBlackListEntry) throws IOException, UnsupportedFormatException { final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>(); final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); if (bigramStrings != null) { for (final WeightedString bigram : bigramStrings) { int position = BinaryDictIOUtils.getTerminalPosition(reader, bigram.mWord); int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord); if (position == FormatSpec.NOT_VALID_WORD) { // TODO: figure out what is the correct thing to do here. } else { Loading @@ -273,7 +273,7 @@ public final class DynamicBinaryDictIOUtils { // find the insert position of the word. if (buffer.position() != 0) buffer.position(0); final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader); int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position(); final int[] codePoints = FusionDictionary.getCodePoints(word); Loading @@ -289,9 +289,9 @@ public final class DynamicBinaryDictIOUtils { for (int i = 0; i < charGroupCount; ++i) { address = buffer.position(); final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer, buffer.position(), header.mFormatOptions); buffer.position(), fileHeader.mFormatOptions); final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, header.mFormatOptions); fileHeader.mFormatOptions); if (isMovedGroup) continue; nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) ? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address; Loading @@ -311,16 +311,16 @@ public final class DynamicBinaryDictIOUtils { final int newNodeAddress = buffer.limit(); final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1, isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, false /* isBlackListEntry */, header.mFormatOptions); false /* isBlackListEntry */, fileHeader.mFormatOptions); int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags, frequency, nodeParentAddress, shortcuts, bigrams, destination, buffer, nodeOriginAddress, address, header.mFormatOptions); buffer, nodeOriginAddress, address, fileHeader.mFormatOptions); final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p, currentInfo.mCharacters.length); if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { updateParentAddresses(buffer, currentInfo.mChildrenAddress, newNodeAddress + written + 1, header.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions); } final CharGroupInfo newInfo2 = new CharGroupInfo( newNodeAddress + written + 1, -1 /* endAddress */, Loading Loading @@ -352,17 +352,17 @@ public final class DynamicBinaryDictIOUtils { false /* isTerminal */, 0 /* childrenAddressSize*/, false /* hasShortcut */, false /* hasBigrams */, false /* isNotAWord */, false /* isBlackListEntry */, header.mFormatOptions); fileHeader.mFormatOptions); int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, prefixFlags, -1 /* frequency */, nodeParentAddress, null, null, destination, buffer, nodeOriginAddress, address, header.mFormatOptions); fileHeader.mFormatOptions); final int[] suffixCharacters = Arrays.copyOfRange( currentInfo.mCharacters, p, currentInfo.mCharacters.length); if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { updateParentAddresses(buffer, currentInfo.mChildrenAddress, newNodeAddress + written + 1, header.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions); } final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags( suffixCharacters.length > 1, Loading @@ -371,21 +371,21 @@ public final class DynamicBinaryDictIOUtils { (currentInfo.mFlags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0, (currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo suffixInfo = new CharGroupInfo( newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags, suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1, currentInfo.mChildrenAddress, currentInfo.mShortcutTargets, currentInfo.mBigrams); written += BinaryDictIOUtils.computeGroupSize(suffixInfo, header.mFormatOptions) + 1; fileHeader.mFormatOptions) + 1; final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p, codePoints.length); final int flags = BinaryDictEncoder.makeCharGroupFlags( newCharacters.length > 1, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo( newNodeAddress + written, -1 /* endAddress */, flags, newCharacters, frequency, newNodeAddress + 1, Loading @@ -407,13 +407,13 @@ public final class DynamicBinaryDictIOUtils { final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, -1 /* endAddress */, flags, currentInfo.mCharacters, frequency, nodeParentAddress, currentInfo.mChildrenAddress, shortcuts, bigrams); moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address, header.mFormatOptions); fileHeader.mFormatOptions); return; } wordPos += currentInfo.mCharacters.length; Loading @@ -432,12 +432,12 @@ public final class DynamicBinaryDictIOUtils { */ final int newNodeAddress = buffer.limit(); updateChildrenAddress(buffer, address, newNodeAddress, header.mFormatOptions); fileHeader.mFormatOptions); final int newGroupAddress = newNodeAddress + 1; final boolean hasMultipleChars = (wordLen - wordPos) > 1; final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags, characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, Loading Loading @@ -482,7 +482,7 @@ public final class DynamicBinaryDictIOUtils { final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, -1 /* endAddress */, flags, characters, frequency, nodeParentAddress, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); Loading java/src/com/android/inputmethod/latin/makedict/decoder/HeaderReaderInterface.java 0 → 100644 +32 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin.makedict.decoder; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import java.io.IOException; import java.util.HashMap; /** * An interface to read a binary dictionary file header. */ public interface HeaderReaderInterface { public int readVersion() throws IOException, UnsupportedFormatException; public int readOptionFlags(); public int readHeaderSize(); public HashMap<String, String> readAttributes(final int headerSize); } Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java +13 −15 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface; import com.android.inputmethod.latin.utils.JniUtils; import java.io.ByteArrayOutputStream; Loading Loading @@ -250,7 +251,7 @@ public final class BinaryDictDecoder { /** * Reads a string from a buffer. This is the converse of the above method. */ private static String readString(final FusionDictionaryBufferInterface buffer) { static String readString(final FusionDictionaryBufferInterface buffer) { final StringBuilder s = new StringBuilder(); int character = readChar(buffer); while (character != FormatSpec.INVALID_CHARACTER) { Loading Loading @@ -629,7 +630,7 @@ public final class BinaryDictDecoder { * @throws UnsupportedFormatException * @throws IOException */ private static int checkFormatVersion(final FusionDictionaryBufferInterface buffer) static int checkFormatVersion(final FusionDictionaryBufferInterface buffer) throws IOException, UnsupportedFormatException { final int version = getFormatVersion(buffer); if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION Loading @@ -643,25 +644,22 @@ public final class BinaryDictDecoder { /** * Reads a header from a buffer. * @param buffer the buffer to read. * @param headerReader the header reader * @throws IOException * @throws UnsupportedFormatException */ public static FileHeader readHeader(final FusionDictionaryBufferInterface buffer) public static FileHeader readHeader(final HeaderReaderInterface headerReader) throws IOException, UnsupportedFormatException { final int version = checkFormatVersion(buffer); final int optionsFlags = buffer.readUnsignedShort(); final int version = headerReader.readVersion(); final int optionsFlags = headerReader.readOptionFlags(); final HashMap<String, String> attributes = new HashMap<String, String>(); final int headerSize; headerSize = buffer.readInt(); final int headerSize = headerReader.readHeaderSize(); if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } populateOptions(buffer, headerSize, attributes); buffer.position(headerSize); final HashMap<String, String> attributes = headerReader.readAttributes(headerSize); final FileHeader header = new FileHeader(headerSize, new FusionDictionary.DictionaryOptions(attributes, Loading Loading @@ -711,14 +709,14 @@ public final class BinaryDictDecoder { } // Read header final FileHeader header = readHeader(reader.getBuffer()); final FileHeader fileHeader = readHeader(reader); Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); final PtNodeArray root = readNodeArray(reader.getBuffer(), header.mHeaderSize, reverseNodeArrayMapping, reverseGroupMapping, header.mFormatOptions); final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize, reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions); FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions); if (null != dict) { for (final Word w : dict) { if (w.mIsBlacklistEntry) { Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +29 −24 Original line number Diff line number Diff line Loading @@ -24,13 +24,13 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.ByteArrayWrapper; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Iterator; import java.util.Map; Loading Loading @@ -141,20 +141,20 @@ public final class BinaryDictIOUtils { * Reads unigrams and bigrams from the binary file. * Doesn't store a full memory representation of the dictionary. * * @param reader the reader. * @param dictReader the dict reader. * @param words the map to store the address as a key and the word as a value. * @param frequencies the map to store the address as a key and the frequency as a value. * @param bigrams the map to store the address as a key and the list of address as a value. * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ public static void readUnigramsAndBigramsBinary(final BinaryDictReader reader, public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { // Read header final FileHeader header = BinaryDictDecoder.readHeader(reader.getBuffer()); readUnigramsAndBigramsBinaryInner(reader.getBuffer(), header.mHeaderSize, words, final FileHeader header = BinaryDictDecoder.readHeader(dictReader); readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words, frequencies, bigrams, header.mFormatOptions); } Loading @@ -162,20 +162,20 @@ public final class BinaryDictIOUtils { * Gets the address of the last CharGroup of the exact matching word in the dictionary. * If no match is found, returns NOT_VALID_WORD. * * @param reader the reader. * @param dictReader the dict reader. * @param word the word we search for. * @return the address of the terminal node. * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ @UsedForTesting public static int getTerminalPosition(final BinaryDictReader reader, public static int getTerminalPosition(final BinaryDictReader dictReader, final String word) throws IOException, UnsupportedFormatException { final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); if (word == null) return FormatSpec.NOT_VALID_WORD; if (buffer.position() != 0) buffer.position(0); final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(dictReader); int wordPos = 0; final int wordLen = word.codePointCount(0, word.length()); for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { Loading Loading @@ -510,20 +510,20 @@ public final class BinaryDictIOUtils { /** * Find a word using the BinaryDictReader. * * @param reader the reader * @param dictReader the dict reader * @param word the word searched * @return the found group * @throws IOException * @throws UnsupportedFormatException */ @UsedForTesting public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader reader, public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader, final String word) throws IOException, UnsupportedFormatException { int position = getTerminalPosition(reader, word); final FusionDictionaryBufferInterface buffer = reader.getBuffer(); int position = getTerminalPosition(dictReader, word); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); if (position != FormatSpec.NOT_VALID_WORD) { buffer.position(0); final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader header = BinaryDictDecoder.readHeader(dictReader); buffer.position(position); return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions); } Loading @@ -544,17 +544,22 @@ public final class BinaryDictIOUtils { final File file, final long offset, final long length) throws FileNotFoundException, IOException, UnsupportedFormatException { final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE]; final BinaryDictReader dictReader = new BinaryDictReader(file); dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() { @Override public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file) throws FileNotFoundException, IOException { final FileInputStream inStream = new FileInputStream(file); try { inStream.read(buffer); final BinaryDictDecoder.ByteBufferWrapper wrapper = new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map( FileChannel.MapMode.READ_ONLY, offset, length)); return BinaryDictDecoder.readHeader(wrapper); return new ByteArrayWrapper(buffer); } finally { inStream.close(); } } }); return BinaryDictDecoder.readHeader(dictReader); } public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset, final long length) { Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java +34 −1 Original line number Diff line number Diff line Loading @@ -17,7 +17,9 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface; import com.android.inputmethod.latin.utils.ByteArrayWrapper; import java.io.File; Loading @@ -27,8 +29,9 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.HashMap; public class BinaryDictReader { public class BinaryDictReader implements HeaderReaderInterface { public interface FusionDictionaryBufferFactory { public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) Loading Loading @@ -133,4 +136,34 @@ public class BinaryDictReader { openBuffer(factory); return getBuffer(); } // The implementation of HeaderReaderInterface @Override public int readVersion() throws IOException, UnsupportedFormatException { return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer); } @Override public int readOptionFlags() { return mFusionDictionaryBuffer.readUnsignedShort(); } @Override public int readHeaderSize() { return mFusionDictionaryBuffer.readInt(); } @Override public HashMap<String, String> readAttributes(final int headerSize) { final HashMap<String, String> attributes = new HashMap<String, String>(); while (mFusionDictionaryBuffer.position() < headerSize) { // We can avoid infinite loop here since mFusionDictonary.position() is always increased // by calling CharEncoding.readString. final String key = CharEncoding.readString(mFusionDictionaryBuffer); final String value = CharEncoding.readString(mFusionDictionaryBuffer); attributes.put(key, value); } mFusionDictionaryBuffer.position(headerSize); return attributes; } }
java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +26 −26 Original line number Diff line number Diff line Loading @@ -49,18 +49,18 @@ public final class DynamicBinaryDictIOUtils { /** * Delete the word from the binary file. * * @param reader the reader. * @param dictReader the dict reader. * @param word the word we delete * @throws IOException * @throws UnsupportedFormatException */ @UsedForTesting public static void deleteWord(final BinaryDictReader reader, final String word) public static void deleteWord(final BinaryDictReader dictReader, final String word) throws IOException, UnsupportedFormatException { final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); buffer.position(0); final FileHeader header = BinaryDictDecoder.readHeader(buffer); final int wordPosition = BinaryDictIOUtils.getTerminalPosition(reader, word); final FileHeader header = BinaryDictDecoder.readHeader(dictReader); final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word); if (wordPosition == FormatSpec.NOT_VALID_WORD) return; buffer.position(wordPosition); Loading Loading @@ -236,7 +236,7 @@ public final class DynamicBinaryDictIOUtils { /** * Insert a word into a binary dictionary. * * @param reader the reader. * @param dictReader the dict reader. * @param destination a stream to the underlying file, with the pointer at the end of the file. * @param word the word to insert. * @param frequency the frequency of the new word. Loading @@ -249,16 +249,16 @@ public final class DynamicBinaryDictIOUtils { // TODO: Support batch insertion. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. @UsedForTesting public static void insertWord(final BinaryDictReader reader, final OutputStream destination, public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination, final String word, final int frequency, final ArrayList<WeightedString> bigramStrings, final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, final boolean isBlackListEntry) throws IOException, UnsupportedFormatException { final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>(); final FusionDictionaryBufferInterface buffer = reader.getBuffer(); final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); if (bigramStrings != null) { for (final WeightedString bigram : bigramStrings) { int position = BinaryDictIOUtils.getTerminalPosition(reader, bigram.mWord); int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord); if (position == FormatSpec.NOT_VALID_WORD) { // TODO: figure out what is the correct thing to do here. } else { Loading @@ -273,7 +273,7 @@ public final class DynamicBinaryDictIOUtils { // find the insert position of the word. if (buffer.position() != 0) buffer.position(0); final FileHeader header = BinaryDictDecoder.readHeader(buffer); final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader); int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position(); final int[] codePoints = FusionDictionary.getCodePoints(word); Loading @@ -289,9 +289,9 @@ public final class DynamicBinaryDictIOUtils { for (int i = 0; i < charGroupCount; ++i) { address = buffer.position(); final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer, buffer.position(), header.mFormatOptions); buffer.position(), fileHeader.mFormatOptions); final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, header.mFormatOptions); fileHeader.mFormatOptions); if (isMovedGroup) continue; nodeParentAddress = (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) ? FormatSpec.NO_PARENT_ADDRESS : currentInfo.mParentAddress + address; Loading @@ -311,16 +311,16 @@ public final class DynamicBinaryDictIOUtils { final int newNodeAddress = buffer.limit(); final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1, isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, false /* isBlackListEntry */, header.mFormatOptions); false /* isBlackListEntry */, fileHeader.mFormatOptions); int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags, frequency, nodeParentAddress, shortcuts, bigrams, destination, buffer, nodeOriginAddress, address, header.mFormatOptions); buffer, nodeOriginAddress, address, fileHeader.mFormatOptions); final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p, currentInfo.mCharacters.length); if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { updateParentAddresses(buffer, currentInfo.mChildrenAddress, newNodeAddress + written + 1, header.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions); } final CharGroupInfo newInfo2 = new CharGroupInfo( newNodeAddress + written + 1, -1 /* endAddress */, Loading Loading @@ -352,17 +352,17 @@ public final class DynamicBinaryDictIOUtils { false /* isTerminal */, 0 /* childrenAddressSize*/, false /* hasShortcut */, false /* hasBigrams */, false /* isNotAWord */, false /* isBlackListEntry */, header.mFormatOptions); fileHeader.mFormatOptions); int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, prefixFlags, -1 /* frequency */, nodeParentAddress, null, null, destination, buffer, nodeOriginAddress, address, header.mFormatOptions); fileHeader.mFormatOptions); final int[] suffixCharacters = Arrays.copyOfRange( currentInfo.mCharacters, p, currentInfo.mCharacters.length); if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { updateParentAddresses(buffer, currentInfo.mChildrenAddress, newNodeAddress + written + 1, header.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions); } final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags( suffixCharacters.length > 1, Loading @@ -371,21 +371,21 @@ public final class DynamicBinaryDictIOUtils { (currentInfo.mFlags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0, (currentInfo.mFlags & FormatSpec.FLAG_HAS_BIGRAMS) != 0, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo suffixInfo = new CharGroupInfo( newNodeAddress + written + 1, -1 /* endAddress */, suffixFlags, suffixCharacters, currentInfo.mFrequency, newNodeAddress + 1, currentInfo.mChildrenAddress, currentInfo.mShortcutTargets, currentInfo.mBigrams); written += BinaryDictIOUtils.computeGroupSize(suffixInfo, header.mFormatOptions) + 1; fileHeader.mFormatOptions) + 1; final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p, codePoints.length); final int flags = BinaryDictEncoder.makeCharGroupFlags( newCharacters.length > 1, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo( newNodeAddress + written, -1 /* endAddress */, flags, newCharacters, frequency, newNodeAddress + 1, Loading @@ -407,13 +407,13 @@ public final class DynamicBinaryDictIOUtils { final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, -1 /* endAddress */, flags, currentInfo.mCharacters, frequency, nodeParentAddress, currentInfo.mChildrenAddress, shortcuts, bigrams); moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address, header.mFormatOptions); fileHeader.mFormatOptions); return; } wordPos += currentInfo.mCharacters.length; Loading @@ -432,12 +432,12 @@ public final class DynamicBinaryDictIOUtils { */ final int newNodeAddress = buffer.limit(); updateChildrenAddress(buffer, address, newNodeAddress, header.mFormatOptions); fileHeader.mFormatOptions); final int newGroupAddress = newNodeAddress + 1; final boolean hasMultipleChars = (wordLen - wordPos) > 1; final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final CharGroupInfo newInfo = new CharGroupInfo(newGroupAddress, -1, flags, characters, frequency, address, FormatSpec.NO_CHILDREN_ADDRESS, Loading Loading @@ -482,7 +482,7 @@ public final class DynamicBinaryDictIOUtils { final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); isNotAWord, isBlackListEntry, fileHeader.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, -1 /* endAddress */, flags, characters, frequency, nodeParentAddress, FormatSpec.NO_CHILDREN_ADDRESS, shortcuts, bigrams); Loading
java/src/com/android/inputmethod/latin/makedict/decoder/HeaderReaderInterface.java 0 → 100644 +32 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin.makedict.decoder; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import java.io.IOException; import java.util.HashMap; /** * An interface to read a binary dictionary file header. */ public interface HeaderReaderInterface { public int readVersion() throws IOException, UnsupportedFormatException; public int readOptionFlags(); public int readHeaderSize(); public HashMap<String, String> readAttributes(final int headerSize); }