Loading java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java 0 → 100644 +206 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.TreeMap; /** * A base class of the binary dictionary decoder. */ public abstract class AbstractDictDecoder implements DictDecoder { protected FileHeader readHeader(final DictBuffer dictBuffer) throws IOException, UnsupportedFormatException { if (dictBuffer == null) { openDictBuffer(); } final int version = HeaderReader.readVersion(dictBuffer); if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { throw new UnsupportedFormatException("Unsupported version : " + version); } // TODO: Remove this field. final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer); final int headerSize = HeaderReader.readHeaderSize(dictBuffer); if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer, headerSize); final FileHeader header = new FileHeader(headerSize, new FusionDictionary.DictionaryOptions(attributes, 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), new FormatOptions(version, 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); return header; } @Override @UsedForTesting public int getTerminalPosition(final String word) throws IOException, UnsupportedFormatException { if (!isDictBufferOpen()) { openDictBuffer(); } return BinaryDictIOUtils.getTerminalPosition(this, word); } @Override @UsedForTesting public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words, final TreeMap<Integer, Integer> frequencies, final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { if (!isDictBufferOpen()) { openDictBuffer(); } BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); } /** * A utility class for reading a file header. */ protected static class HeaderReader { protected static int readVersion(final DictBuffer dictBuffer) throws IOException, UnsupportedFormatException { return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); } protected static int readOptionFlags(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedShort(); } protected static int readHeaderSize(final DictBuffer dictBuffer) { return dictBuffer.readInt(); } protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer, final int headerSize) { final HashMap<String, String> attributes = new HashMap<String, String>(); while (dictBuffer.position() < headerSize) { // We can avoid an infinite loop here since dictBuffer.position() is always // increased by calling CharEncoding.readString. final String key = CharEncoding.readString(dictBuffer); final String value = CharEncoding.readString(dictBuffer); attributes.put(key, value); } dictBuffer.position(headerSize); return attributes; } } /** * A utility class for reading a PtNode. */ protected static class PtNodeReader { protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedByte(); } protected static int readParentAddress(final DictBuffer dictBuffer, final FormatOptions formatOptions) { if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { return BinaryDictDecoderUtils.readSInt24(dictBuffer); } else { return FormatSpec.NO_PARENT_ADDRESS; } } protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, final FormatOptions formatOptions) { if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; return address; } else { switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: return dictBuffer.readUnsignedByte(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: return dictBuffer.readUnsignedShort(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: return dictBuffer.readUnsignedInt24(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: default: return FormatSpec.NO_CHILDREN_ADDRESS; } } } // Reads shortcuts and returns the read length. protected static int readShortcut(final DictBuffer dictBuffer, final ArrayList<WeightedString> shortcutTargets) { final int pointerBefore = dictBuffer.position(); dictBuffer.readUnsignedShort(); // skip the size while (true) { final int targetFlags = dictBuffer.readUnsignedByte(); final String word = CharEncoding.readString(dictBuffer); shortcutTargets.add(new WeightedString(word, targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } return dictBuffer.position() - pointerBefore; } protected static int readBigramAddresses(final DictBuffer dictBuffer, final ArrayList<PendingAttribute> bigrams, final int baseAddress) { int readLength = 0; int bigramCount = 0; while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { final int bigramFlags = dictBuffer.readUnsignedByte(); ++readLength; final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) ? 1 : -1; int bigramAddress = baseAddress + readLength; switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: bigramAddress += sign * dictBuffer.readUnsignedByte(); readLength += 1; break; case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: bigramAddress += sign * dictBuffer.readUnsignedShort(); readLength += 2; break; case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: bigramAddress += sign * dictBuffer.readUnsignedInt24(); readLength += 3; break; default: throw new RuntimeException("Has bigrams with no address"); } bigrams.add(new PendingAttribute( bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, bigramAddress)); if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } return readLength; } } } java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +15 −180 Original line number Diff line number Diff line Loading @@ -17,11 +17,9 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import java.io.File; Loading @@ -32,50 +30,17 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.HashMap; import java.util.TreeMap; /** * The base class of binary dictionary decoders. * An interface of binary dictionary decoders. */ public abstract class DictDecoder { protected FileHeader readHeader(final DictBuffer dictBuffer) throws IOException, UnsupportedFormatException { if (dictBuffer == null) { openDictBuffer(); } final int version = HeaderReader.readVersion(dictBuffer); if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { throw new UnsupportedFormatException("Unsupported version : " + version); } // TODO: Remove this field. final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer); final int headerSize = HeaderReader.readHeaderSize(dictBuffer); if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer, headerSize); final FileHeader header = new FileHeader(headerSize, new FusionDictionary.DictionaryOptions(attributes, 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), new FormatOptions(version, 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); return header; } public interface DictDecoder { /** * Reads and returns the file header. */ public abstract FileHeader readHeader() throws IOException, UnsupportedFormatException; public FileHeader readHeader() throws IOException, UnsupportedFormatException; /** * Reads PtNode from nodeAddress. Loading @@ -83,7 +48,7 @@ public abstract class DictDecoder { * @param formatOptions the format options. * @return PtNodeInfo. */ public abstract PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); /** * Reads a buffer and returns the memory representation of the dictionary. Loading @@ -98,7 +63,7 @@ public abstract class DictDecoder { * @return the created (or merged) dictionary. */ @UsedForTesting public abstract FusionDictionary readDictionaryBinary(final FusionDictionary dict, public FusionDictionary readDictionaryBinary(final FusionDictionary dict, final boolean deleteDictIfBroken) throws FileNotFoundException, IOException, UnsupportedFormatException; Loading @@ -113,12 +78,7 @@ public abstract class DictDecoder { */ @UsedForTesting public int getTerminalPosition(final String word) throws IOException, UnsupportedFormatException { if (!isDictBufferOpen()) { openDictBuffer(); } return BinaryDictIOUtils.getTerminalPosition(this, word); } throws IOException, UnsupportedFormatException; /** * Reads unigrams and bigrams from the binary file. Loading @@ -134,47 +94,42 @@ public abstract class DictDecoder { public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words, final TreeMap<Integer, Integer> frequencies, final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { if (!isDictBufferOpen()) { openDictBuffer(); } BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); } throws IOException, UnsupportedFormatException; /** * Sets the position of the buffer to the given value. * * @param newPos the new position */ public abstract void setPosition(final int newPos); public void setPosition(final int newPos); /** * Gets the position of the buffer. * * @return the position */ public abstract int getPosition(); public int getPosition(); /** * Reads and returns the PtNode count out of a buffer and forwards the pointer. */ public abstract int readPtNodeCount(); public int readPtNodeCount(); /** * Reads the forward link and advances the position. * * @return true if this method moves the file pointer, false otherwise. */ public abstract boolean readAndFollowForwardLink(); public abstract boolean hasNextPtNodeArray(); public boolean readAndFollowForwardLink(); public boolean hasNextPtNodeArray(); /** * Opens the dictionary file and makes DictBuffer. */ @UsedForTesting public abstract void openDictBuffer() throws FileNotFoundException, IOException; public void openDictBuffer() throws FileNotFoundException, IOException; @UsedForTesting public abstract boolean isDictBufferOpen(); public boolean isDictBufferOpen(); // Constants for DictionaryBufferFactory. public static final int USE_READONLY_BYTEBUFFER = 0x01000000; Loading Loading @@ -272,125 +227,5 @@ public abstract class DictDecoder { } } /** * A utility class for reading a file header. */ protected static class HeaderReader { protected static int readVersion(final DictBuffer dictBuffer) throws IOException, UnsupportedFormatException { return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); } protected static int readOptionFlags(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedShort(); } protected static int readHeaderSize(final DictBuffer dictBuffer) { return dictBuffer.readInt(); } protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer, final int headerSize) { final HashMap<String, String> attributes = new HashMap<String, String>(); while (dictBuffer.position() < headerSize) { // We can avoid an infinite loop here since dictBuffer.position() is always // increased by calling CharEncoding.readString. final String key = CharEncoding.readString(dictBuffer); final String value = CharEncoding.readString(dictBuffer); attributes.put(key, value); } dictBuffer.position(headerSize); return attributes; } } /** * A utility class for reading a PtNode. */ protected static class PtNodeReader { protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedByte(); } protected static int readParentAddress(final DictBuffer dictBuffer, final FormatOptions formatOptions) { if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { return BinaryDictDecoderUtils.readSInt24(dictBuffer); } else { return FormatSpec.NO_PARENT_ADDRESS; } } protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, final FormatOptions formatOptions) { if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; return address; } else { switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: return dictBuffer.readUnsignedByte(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: return dictBuffer.readUnsignedShort(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: return dictBuffer.readUnsignedInt24(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: default: return FormatSpec.NO_CHILDREN_ADDRESS; } } } // Reads shortcuts and returns the read length. protected static int readShortcut(final DictBuffer dictBuffer, final ArrayList<WeightedString> shortcutTargets) { final int pointerBefore = dictBuffer.position(); dictBuffer.readUnsignedShort(); // skip the size while (true) { final int targetFlags = dictBuffer.readUnsignedByte(); final String word = CharEncoding.readString(dictBuffer); shortcutTargets.add(new WeightedString(word, targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } return dictBuffer.position() - pointerBefore; } protected static int readBigramAddresses(final DictBuffer dictBuffer, final ArrayList<PendingAttribute> bigrams, final int baseAddress) { int readLength = 0; int bigramCount = 0; while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { final int bigramFlags = dictBuffer.readUnsignedByte(); ++readLength; final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) ? 1 : -1; int bigramAddress = baseAddress + readLength; switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: bigramAddress += sign * dictBuffer.readUnsignedByte(); readLength += 1; break; case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: bigramAddress += sign * dictBuffer.readUnsignedShort(); readLength += 2; break; case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: bigramAddress += sign * dictBuffer.readUnsignedInt24(); readLength += 3; break; default: throw new RuntimeException("Has bigrams with no address"); } bigrams.add(new PendingAttribute( bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, bigramAddress)); if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } return readLength; } } public abstract void skipPtNode(final FormatOptions formatOptions); public void skipPtNode(final FormatOptions formatOptions); } java/src/com/android/inputmethod/latin/makedict/DictUpdater.java +1 −1 Original line number Diff line number Diff line Loading @@ -24,7 +24,7 @@ import java.util.ArrayList; /** * An interface of a binary dictionary updater. */ public interface DictUpdater { public interface DictUpdater extends DictDecoder { /** * Deletes the word from the binary dictionary. Loading java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java +2 −2 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ import java.util.Arrays; * An implementation of DictDecoder for version 3 binary dictionary. */ @UsedForTesting public class Ver3DictDecoder extends DictDecoder { public class Ver3DictDecoder extends AbstractDictDecoder { private static final String TAG = Ver3DictDecoder.class.getSimpleName(); static { Loading @@ -47,7 +47,7 @@ public class Ver3DictDecoder extends DictDecoder { // TODO: implement something sensical instead of just a phony method private static native int doNothing(); protected static class PtNodeReader extends DictDecoder.PtNodeReader { protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { private static int readFrequency(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedByte(); } Loading java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +2 −3 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ import java.util.Arrays; * An implementation of binary dictionary decoder for version 4 binary dictionary. */ @UsedForTesting public class Ver4DictDecoder extends DictDecoder { public class Ver4DictDecoder extends AbstractDictDecoder { private static final String TAG = Ver4DictDecoder.class.getSimpleName(); private static final int FILETYPE_TRIE = 1; Loading Loading @@ -157,8 +157,7 @@ public class Ver4DictDecoder extends DictDecoder { new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); } protected static class PtNodeReader extends DictDecoder.PtNodeReader { protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); return frequencyBuffer.readUnsignedByte(); Loading Loading
java/src/com/android/inputmethod/latin/makedict/AbstractDictDecoder.java 0 → 100644 +206 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.TreeMap; /** * A base class of the binary dictionary decoder. */ public abstract class AbstractDictDecoder implements DictDecoder { protected FileHeader readHeader(final DictBuffer dictBuffer) throws IOException, UnsupportedFormatException { if (dictBuffer == null) { openDictBuffer(); } final int version = HeaderReader.readVersion(dictBuffer); if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { throw new UnsupportedFormatException("Unsupported version : " + version); } // TODO: Remove this field. final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer); final int headerSize = HeaderReader.readHeaderSize(dictBuffer); if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer, headerSize); final FileHeader header = new FileHeader(headerSize, new FusionDictionary.DictionaryOptions(attributes, 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), new FormatOptions(version, 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); return header; } @Override @UsedForTesting public int getTerminalPosition(final String word) throws IOException, UnsupportedFormatException { if (!isDictBufferOpen()) { openDictBuffer(); } return BinaryDictIOUtils.getTerminalPosition(this, word); } @Override @UsedForTesting public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words, final TreeMap<Integer, Integer> frequencies, final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { if (!isDictBufferOpen()) { openDictBuffer(); } BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); } /** * A utility class for reading a file header. */ protected static class HeaderReader { protected static int readVersion(final DictBuffer dictBuffer) throws IOException, UnsupportedFormatException { return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); } protected static int readOptionFlags(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedShort(); } protected static int readHeaderSize(final DictBuffer dictBuffer) { return dictBuffer.readInt(); } protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer, final int headerSize) { final HashMap<String, String> attributes = new HashMap<String, String>(); while (dictBuffer.position() < headerSize) { // We can avoid an infinite loop here since dictBuffer.position() is always // increased by calling CharEncoding.readString. final String key = CharEncoding.readString(dictBuffer); final String value = CharEncoding.readString(dictBuffer); attributes.put(key, value); } dictBuffer.position(headerSize); return attributes; } } /** * A utility class for reading a PtNode. */ protected static class PtNodeReader { protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedByte(); } protected static int readParentAddress(final DictBuffer dictBuffer, final FormatOptions formatOptions) { if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { return BinaryDictDecoderUtils.readSInt24(dictBuffer); } else { return FormatSpec.NO_PARENT_ADDRESS; } } protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, final FormatOptions formatOptions) { if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; return address; } else { switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: return dictBuffer.readUnsignedByte(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: return dictBuffer.readUnsignedShort(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: return dictBuffer.readUnsignedInt24(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: default: return FormatSpec.NO_CHILDREN_ADDRESS; } } } // Reads shortcuts and returns the read length. protected static int readShortcut(final DictBuffer dictBuffer, final ArrayList<WeightedString> shortcutTargets) { final int pointerBefore = dictBuffer.position(); dictBuffer.readUnsignedShort(); // skip the size while (true) { final int targetFlags = dictBuffer.readUnsignedByte(); final String word = CharEncoding.readString(dictBuffer); shortcutTargets.add(new WeightedString(word, targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } return dictBuffer.position() - pointerBefore; } protected static int readBigramAddresses(final DictBuffer dictBuffer, final ArrayList<PendingAttribute> bigrams, final int baseAddress) { int readLength = 0; int bigramCount = 0; while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { final int bigramFlags = dictBuffer.readUnsignedByte(); ++readLength; final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) ? 1 : -1; int bigramAddress = baseAddress + readLength; switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: bigramAddress += sign * dictBuffer.readUnsignedByte(); readLength += 1; break; case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: bigramAddress += sign * dictBuffer.readUnsignedShort(); readLength += 2; break; case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: bigramAddress += sign * dictBuffer.readUnsignedInt24(); readLength += 3; break; default: throw new RuntimeException("Has bigrams with no address"); } bigrams.add(new PendingAttribute( bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, bigramAddress)); if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } return readLength; } } }
java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +15 −180 Original line number Diff line number Diff line Loading @@ -17,11 +17,9 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import java.io.File; Loading @@ -32,50 +30,17 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.HashMap; import java.util.TreeMap; /** * The base class of binary dictionary decoders. * An interface of binary dictionary decoders. */ public abstract class DictDecoder { protected FileHeader readHeader(final DictBuffer dictBuffer) throws IOException, UnsupportedFormatException { if (dictBuffer == null) { openDictBuffer(); } final int version = HeaderReader.readVersion(dictBuffer); if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { throw new UnsupportedFormatException("Unsupported version : " + version); } // TODO: Remove this field. final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer); final int headerSize = HeaderReader.readHeaderSize(dictBuffer); if (headerSize < 0) { throw new UnsupportedFormatException("header size can't be negative."); } final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer, headerSize); final FileHeader header = new FileHeader(headerSize, new FusionDictionary.DictionaryOptions(attributes, 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), new FormatOptions(version, 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); return header; } public interface DictDecoder { /** * Reads and returns the file header. */ public abstract FileHeader readHeader() throws IOException, UnsupportedFormatException; public FileHeader readHeader() throws IOException, UnsupportedFormatException; /** * Reads PtNode from nodeAddress. Loading @@ -83,7 +48,7 @@ public abstract class DictDecoder { * @param formatOptions the format options. * @return PtNodeInfo. */ public abstract PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); public PtNodeInfo readPtNode(final int ptNodePos, final FormatOptions formatOptions); /** * Reads a buffer and returns the memory representation of the dictionary. Loading @@ -98,7 +63,7 @@ public abstract class DictDecoder { * @return the created (or merged) dictionary. */ @UsedForTesting public abstract FusionDictionary readDictionaryBinary(final FusionDictionary dict, public FusionDictionary readDictionaryBinary(final FusionDictionary dict, final boolean deleteDictIfBroken) throws FileNotFoundException, IOException, UnsupportedFormatException; Loading @@ -113,12 +78,7 @@ public abstract class DictDecoder { */ @UsedForTesting public int getTerminalPosition(final String word) throws IOException, UnsupportedFormatException { if (!isDictBufferOpen()) { openDictBuffer(); } return BinaryDictIOUtils.getTerminalPosition(this, word); } throws IOException, UnsupportedFormatException; /** * Reads unigrams and bigrams from the binary file. Loading @@ -134,47 +94,42 @@ public abstract class DictDecoder { public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words, final TreeMap<Integer, Integer> frequencies, final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { if (!isDictBufferOpen()) { openDictBuffer(); } BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); } throws IOException, UnsupportedFormatException; /** * Sets the position of the buffer to the given value. * * @param newPos the new position */ public abstract void setPosition(final int newPos); public void setPosition(final int newPos); /** * Gets the position of the buffer. * * @return the position */ public abstract int getPosition(); public int getPosition(); /** * Reads and returns the PtNode count out of a buffer and forwards the pointer. */ public abstract int readPtNodeCount(); public int readPtNodeCount(); /** * Reads the forward link and advances the position. * * @return true if this method moves the file pointer, false otherwise. */ public abstract boolean readAndFollowForwardLink(); public abstract boolean hasNextPtNodeArray(); public boolean readAndFollowForwardLink(); public boolean hasNextPtNodeArray(); /** * Opens the dictionary file and makes DictBuffer. */ @UsedForTesting public abstract void openDictBuffer() throws FileNotFoundException, IOException; public void openDictBuffer() throws FileNotFoundException, IOException; @UsedForTesting public abstract boolean isDictBufferOpen(); public boolean isDictBufferOpen(); // Constants for DictionaryBufferFactory. public static final int USE_READONLY_BYTEBUFFER = 0x01000000; Loading Loading @@ -272,125 +227,5 @@ public abstract class DictDecoder { } } /** * A utility class for reading a file header. */ protected static class HeaderReader { protected static int readVersion(final DictBuffer dictBuffer) throws IOException, UnsupportedFormatException { return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer); } protected static int readOptionFlags(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedShort(); } protected static int readHeaderSize(final DictBuffer dictBuffer) { return dictBuffer.readInt(); } protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer, final int headerSize) { final HashMap<String, String> attributes = new HashMap<String, String>(); while (dictBuffer.position() < headerSize) { // We can avoid an infinite loop here since dictBuffer.position() is always // increased by calling CharEncoding.readString. final String key = CharEncoding.readString(dictBuffer); final String value = CharEncoding.readString(dictBuffer); attributes.put(key, value); } dictBuffer.position(headerSize); return attributes; } } /** * A utility class for reading a PtNode. */ protected static class PtNodeReader { protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedByte(); } protected static int readParentAddress(final DictBuffer dictBuffer, final FormatOptions formatOptions) { if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { return BinaryDictDecoderUtils.readSInt24(dictBuffer); } else { return FormatSpec.NO_PARENT_ADDRESS; } } protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags, final FormatOptions formatOptions) { if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer); if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; return address; } else { switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) { case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE: return dictBuffer.readUnsignedByte(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES: return dictBuffer.readUnsignedShort(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES: return dictBuffer.readUnsignedInt24(); case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS: default: return FormatSpec.NO_CHILDREN_ADDRESS; } } } // Reads shortcuts and returns the read length. protected static int readShortcut(final DictBuffer dictBuffer, final ArrayList<WeightedString> shortcutTargets) { final int pointerBefore = dictBuffer.position(); dictBuffer.readUnsignedShort(); // skip the size while (true) { final int targetFlags = dictBuffer.readUnsignedByte(); final String word = CharEncoding.readString(dictBuffer); shortcutTargets.add(new WeightedString(word, targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } return dictBuffer.position() - pointerBefore; } protected static int readBigramAddresses(final DictBuffer dictBuffer, final ArrayList<PendingAttribute> bigrams, final int baseAddress) { int readLength = 0; int bigramCount = 0; while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { final int bigramFlags = dictBuffer.readUnsignedByte(); ++readLength; final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE) ? 1 : -1; int bigramAddress = baseAddress + readLength; switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) { case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE: bigramAddress += sign * dictBuffer.readUnsignedByte(); readLength += 1; break; case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES: bigramAddress += sign * dictBuffer.readUnsignedShort(); readLength += 2; break; case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES: bigramAddress += sign * dictBuffer.readUnsignedInt24(); readLength += 3; break; default: throw new RuntimeException("Has bigrams with no address"); } bigrams.add(new PendingAttribute( bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, bigramAddress)); if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } return readLength; } } public abstract void skipPtNode(final FormatOptions formatOptions); public void skipPtNode(final FormatOptions formatOptions); }
java/src/com/android/inputmethod/latin/makedict/DictUpdater.java +1 −1 Original line number Diff line number Diff line Loading @@ -24,7 +24,7 @@ import java.util.ArrayList; /** * An interface of a binary dictionary updater. */ public interface DictUpdater { public interface DictUpdater extends DictDecoder { /** * Deletes the word from the binary dictionary. Loading
java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java +2 −2 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ import java.util.Arrays; * An implementation of DictDecoder for version 3 binary dictionary. */ @UsedForTesting public class Ver3DictDecoder extends DictDecoder { public class Ver3DictDecoder extends AbstractDictDecoder { private static final String TAG = Ver3DictDecoder.class.getSimpleName(); static { Loading @@ -47,7 +47,7 @@ public class Ver3DictDecoder extends DictDecoder { // TODO: implement something sensical instead of just a phony method private static native int doNothing(); protected static class PtNodeReader extends DictDecoder.PtNodeReader { protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { private static int readFrequency(final DictBuffer dictBuffer) { return dictBuffer.readUnsignedByte(); } Loading
java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +2 −3 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ import java.util.Arrays; * An implementation of binary dictionary decoder for version 4 binary dictionary. */ @UsedForTesting public class Ver4DictDecoder extends DictDecoder { public class Ver4DictDecoder extends AbstractDictDecoder { private static final String TAG = Ver4DictDecoder.class.getSimpleName(); private static final int FILETYPE_TRIE = 1; Loading Loading @@ -157,8 +157,7 @@ public class Ver4DictDecoder extends DictDecoder { new File[] { contentFile }, FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE); } protected static class PtNodeReader extends DictDecoder.PtNodeReader { protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); return frequencyBuffer.readUnsignedByte(); Loading