Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +15 −4 Original line number Diff line number Diff line Loading @@ -126,8 +126,14 @@ public class BinaryDictEncoderUtils { */ private static int getPtNodeMaximumSize(final PtNode ptNode, final FormatOptions options) { int size = getNodeHeaderSize(ptNode, options); // If terminal, one byte for the frequency if (ptNode.isTerminal()) size += FormatSpec.PTNODE_FREQUENCY_SIZE; if (ptNode.isTerminal()) { // If terminal, one byte for the frequency or four bytes for the terminal id. if (options.mHasTerminalId) { size += FormatSpec.PTNODE_TERMINAL_ID_SIZE; } else { size += FormatSpec.PTNODE_FREQUENCY_SIZE; } } size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address size += getShortcutListSize(ptNode.mShortcutTargets); if (null != ptNode.mBigrams) { Loading Loading @@ -345,7 +351,13 @@ public class BinaryDictEncoderUtils { changed = true; } int nodeSize = getNodeHeaderSize(ptNode, formatOptions); if (ptNode.isTerminal()) nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE; if (ptNode.isTerminal()) { if (formatOptions.mHasTerminalId) { nodeSize += FormatSpec.PTNODE_TERMINAL_ID_SIZE; } else { nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE; } } if (formatOptions.mSupportsDynamicUpdate) { nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; } else if (null != ptNode.mChildren) { Loading Loading @@ -787,7 +799,6 @@ public class BinaryDictEncoderUtils { + FormatSpec.MAX_TERMINAL_FREQUENCY + " : " + ptNode.mFrequency); } dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict); } if (formatOptions.mSupportsDynamicUpdate) { Loading java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +13 −1 Original line number Diff line number Diff line Loading @@ -198,9 +198,12 @@ public final class FormatSpec { public static final int MAGIC_NUMBER = 0x9BC13AFE; static final int MINIMUM_SUPPORTED_VERSION = 2; static final int MAXIMUM_SUPPORTED_VERSION = 3; static final int MAXIMUM_SUPPORTED_VERSION = 4; static final int NOT_A_VERSION_NUMBER = -1; static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3; static final int FIRST_VERSION_WITH_TERMINAL_ID = 4; static final int VERSION3 = 3; static final int VERSION4 = 4; // These options need to be the same numeric values as the one in the native reading code. static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; Loading Loading @@ -251,11 +254,17 @@ public final class FormatSpec { static final int PTNODE_TERMINATOR_SIZE = 1; static final int PTNODE_FLAGS_SIZE = 1; static final int PTNODE_FREQUENCY_SIZE = 1; static final int PTNODE_TERMINAL_ID_SIZE = 4; static final int PTNODE_MAX_ADDRESS_SIZE = 3; static final int PTNODE_ATTRIBUTE_FLAGS_SIZE = 1; static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3; static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2; // These values are used only by version 4 or later. static final String TRIE_FILE_EXTENSION = ".trie"; static final String FREQ_FILE_EXTENSION = ".freq"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; static final int NO_PARENT_ADDRESS = 0; static final int NO_FORWARD_LINK_ADDRESS = 0; Loading @@ -264,6 +273,7 @@ public final class FormatSpec { static final int MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT = 0x7F; // 127 static final int MAX_PTNODES_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767 static final int MAX_BIGRAMS_IN_A_PTNODE = 10000; static final int MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE = 0xFFFF; static final int MAX_TERMINAL_FREQUENCY = 255; static final int MAX_BIGRAM_FREQUENCY = 15; Loading @@ -287,6 +297,7 @@ public final class FormatSpec { public static final class FormatOptions { public final int mVersion; public final boolean mSupportsDynamicUpdate; public final boolean mHasTerminalId; @UsedForTesting public FormatOptions(final int version) { this(version, false); Loading @@ -300,6 +311,7 @@ public final class FormatSpec { + FIRST_VERSION_WITH_DYNAMIC_UPDATE + " and ulterior."); } mSupportsDynamicUpdate = supportsDynamicUpdate; mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID); } } Loading java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +6 −0 Original line number Diff line number Diff line Loading @@ -111,6 +111,7 @@ public final class FusionDictionary implements Iterable<Word> { ArrayList<WeightedString> mShortcutTargets; ArrayList<WeightedString> mBigrams; int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. int mTerminalId; // NOT_A_TERMINAL == mTerminalId indicates this is not a terminal. PtNodeArray mChildren; boolean mIsNotAWord; // Only a shortcut boolean mIsBlacklistEntry; Loading @@ -129,6 +130,7 @@ public final class FusionDictionary implements Iterable<Word> { final boolean isNotAWord, final boolean isBlacklistEntry) { mChars = chars; mFrequency = frequency; mTerminalId = frequency; mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = null; Loading Loading @@ -156,6 +158,10 @@ public final class FusionDictionary implements Iterable<Word> { mChildren.mData.add(n); } public int getTerminalId() { return mTerminalId; } public boolean isTerminal() { return NOT_A_TERMINAL != mFrequency; } Loading java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java +2 −2 Original line number Diff line number Diff line Loading @@ -68,7 +68,7 @@ public class Ver3DictEncoder implements DictEncoder { @Override public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { if (formatOptions.mVersion > 3) { if (formatOptions.mVersion > FormatSpec.VERSION3) { throw new UnsupportedFormatException( "The given format options has wrong version number : " + formatOptions.mVersion); Loading Loading @@ -200,7 +200,7 @@ public class Ver3DictEncoder implements DictEncoder { mPosition += shortcutShift; } final int shortcutByteSize = mPosition - indexOfShortcutByteSize; if (shortcutByteSize > 0xFFFF) { if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) { throw new RuntimeException("Shortcut list too large"); } BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize, Loading java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java 0 → 100644 +269 −0 Original line number Diff line number Diff line /* /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Iterator; /** * An implementation of DictEncoder for version 4 binary dictionary. */ @UsedForTesting public class Ver4DictEncoder implements DictEncoder { private final File mDictPlacedDir; private byte[] mTrieBuf; private byte[] mFreqBuf; private int mTriePos; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; @UsedForTesting public Ver4DictEncoder(final File dictPlacedDir) { mDictPlacedDir = dictPlacedDir; } private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions) throws FileNotFoundException, IOException { final FileHeader header = new FileHeader(0, dictOptions, formatOptions); final String filename = header.getId() + "." + header.getVersion(); final File mDictDir = new File(mDictPlacedDir, filename); final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); } if (!trieFile.exists()) trieFile.createNewFile(); if (!freqFile.exists()) freqFile.createNewFile(); mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); } private void close() throws IOException { try { if (mTrieOutStream != null) { mTrieOutStream.close(); } if (mFreqOutStream != null) { mFreqOutStream.close(); } } finally { mTrieOutStream = null; mFreqOutStream = null; } } @Override public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { if (formatOptions.mVersion != FormatSpec.VERSION4) { throw new UnsupportedFormatException("File header has a wrong version number : " + formatOptions.mVersion); } if (!mDictPlacedDir.isDirectory()) { throw new UnsupportedFormatException("Given path is not a directory."); } if (mTrieOutStream == null) { openStreams(formatOptions, dict.mOptions); } BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict, formatOptions); MakedictLog.i("Flattening the tree..."); ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); int terminalCount = 0; for (final PtNodeArray array : flatNodes) { for (final PtNode node : array.mData) { if (node.isTerminal()) node.mTerminalId = terminalCount++; } } MakedictLog.i("Computing addresses..."); BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions); if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; mTrieBuf = new byte[bufferSize]; mFreqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE]; MakedictLog.i("Writing file..."); for (PtNodeArray nodeArray : flatNodes) { BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions); } if (MakedictLog.DBG) { BinaryDictEncoderUtils.showStatistics(flatNodes); MakedictLog.i("has " + terminalCount + " terminals."); } mTrieOutStream.write(mTrieBuf); mFreqOutStream.write(mFreqBuf); MakedictLog.i("Done"); close(); } @Override public void setPosition(int position) { if (mTrieBuf == null || position < 0 || position >- mTrieBuf.length) return; mTriePos = position; } @Override public int getPosition() { return mTriePos; } @Override public void writePtNodeCount(int ptNodeCount) { final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount); // ptNodeCount must fit on one byte or two bytes. // Please see comments in FormatSpec if (countSize != 1 && countSize != 2) { throw new RuntimeException("Strange size from getPtNodeCountSize : " + countSize); } mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, ptNodeCount, countSize); } private void writePtNodeFlags(final PtNode ptNode, final int parentAddress, final FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, BinaryDictEncoderUtils.makePtNodeFlags(ptNode, mTriePos, childrenPos, formatOptions), FormatSpec.PTNODE_FLAGS_SIZE); } private void writeParentPosition(int parentPos, final PtNode ptNode, final FormatOptions formatOptions) { if (parentPos != FormatSpec.NO_PARENT_ADDRESS) { parentPos -= ptNode.mCachedAddressAfterUpdate; } mTriePos = BinaryDictEncoderUtils.writeParentAddress(mTrieBuf, mTriePos, parentPos, formatOptions); } private void writeCharacters(final int[] characters, final boolean hasSeveralChars) { mTriePos = CharEncoding.writeCharArray(characters, mTrieBuf, mTriePos); if (hasSeveralChars) { mTrieBuf[mTriePos++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR; } } private void writeTerminalId(final int terminalId) { mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, terminalId, FormatSpec.PTNODE_TERMINAL_ID_SIZE); } private void writeFrequency(final int frequency, final int terminalId) { final int freqPos = terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE; BinaryDictEncoderUtils.writeUIntToBuffer(mFreqBuf, freqPos, frequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE); } private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); if (formatOptions.mSupportsDynamicUpdate) { mTriePos += BinaryDictEncoderUtils.writeSignedChildrenPosition(mTrieBuf, mTriePos, childrenPos); } else { mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf, mTriePos, childrenPos); } } private void writeShortcuts(ArrayList<WeightedString> shortcuts) { if (null == shortcuts || shortcuts.isEmpty()) return; final int indexOfShortcutByteSize = mTriePos; mTriePos += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE; final Iterator<WeightedString> shortcutIterator = shortcuts.iterator(); while (shortcutIterator.hasNext()) { final WeightedString target = shortcutIterator.next(); final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( shortcutIterator.hasNext(), target.mFrequency); mTrieBuf[mTriePos++] = (byte)shortcutFlags; final int shortcutShift = CharEncoding.writeString(mTrieBuf, mTriePos, target.mWord); mTriePos += shortcutShift; } final int shortcutByteSize = mTriePos - indexOfShortcutByteSize; if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) { throw new RuntimeException("Shortcut list too large : " + shortcutByteSize); } BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, indexOfShortcutByteSize, shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); } private void writeBigrams(ArrayList<WeightedString> bigrams, FusionDictionary dict) { if (bigrams == null) return; final Iterator<WeightedString> bigramIterator = bigrams.iterator(); while (bigramIterator.hasNext()) { final WeightedString bigram = bigramIterator.next(); final PtNode target = FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); final int addressOfBigram = target.mCachedAddressAfterUpdate; final int unigramFrequencyForThisWord = target.mFrequency; final int offset = addressOfBigram - (mTriePos + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); mTrieBuf[mTriePos++] = (byte) bigramFlags; mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf, mTriePos, Math.abs(offset)); } } @Override public void writeForwardLinkAddress(int forwardLinkAddress) { mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, forwardLinkAddress, FormatSpec.FORWARD_LINK_ADDRESS_SIZE); } @Override public void writePtNode(final PtNode ptNode, final int parentPosition, final FormatOptions formatOptions, final FusionDictionary dict) { writePtNodeFlags(ptNode, parentPosition, formatOptions); writeParentPosition(parentPosition, ptNode, formatOptions); writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); if (ptNode.isTerminal()) { writeTerminalId(ptNode.mTerminalId); writeFrequency(ptNode.mFrequency, ptNode.mTerminalId); } writeChildrenPosition(ptNode, formatOptions); writeShortcuts(ptNode.mShortcutTargets); writeBigrams(ptNode.mBigrams, dict); } } Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +15 −4 Original line number Diff line number Diff line Loading @@ -126,8 +126,14 @@ public class BinaryDictEncoderUtils { */ private static int getPtNodeMaximumSize(final PtNode ptNode, final FormatOptions options) { int size = getNodeHeaderSize(ptNode, options); // If terminal, one byte for the frequency if (ptNode.isTerminal()) size += FormatSpec.PTNODE_FREQUENCY_SIZE; if (ptNode.isTerminal()) { // If terminal, one byte for the frequency or four bytes for the terminal id. if (options.mHasTerminalId) { size += FormatSpec.PTNODE_TERMINAL_ID_SIZE; } else { size += FormatSpec.PTNODE_FREQUENCY_SIZE; } } size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address size += getShortcutListSize(ptNode.mShortcutTargets); if (null != ptNode.mBigrams) { Loading Loading @@ -345,7 +351,13 @@ public class BinaryDictEncoderUtils { changed = true; } int nodeSize = getNodeHeaderSize(ptNode, formatOptions); if (ptNode.isTerminal()) nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE; if (ptNode.isTerminal()) { if (formatOptions.mHasTerminalId) { nodeSize += FormatSpec.PTNODE_TERMINAL_ID_SIZE; } else { nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE; } } if (formatOptions.mSupportsDynamicUpdate) { nodeSize += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; } else if (null != ptNode.mChildren) { Loading Loading @@ -787,7 +799,6 @@ public class BinaryDictEncoderUtils { + FormatSpec.MAX_TERMINAL_FREQUENCY + " : " + ptNode.mFrequency); } dictEncoder.writePtNode(ptNode, parentPosition, formatOptions, dict); } if (formatOptions.mSupportsDynamicUpdate) { Loading
java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +13 −1 Original line number Diff line number Diff line Loading @@ -198,9 +198,12 @@ public final class FormatSpec { public static final int MAGIC_NUMBER = 0x9BC13AFE; static final int MINIMUM_SUPPORTED_VERSION = 2; static final int MAXIMUM_SUPPORTED_VERSION = 3; static final int MAXIMUM_SUPPORTED_VERSION = 4; static final int NOT_A_VERSION_NUMBER = -1; static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3; static final int FIRST_VERSION_WITH_TERMINAL_ID = 4; static final int VERSION3 = 3; static final int VERSION4 = 4; // These options need to be the same numeric values as the one in the native reading code. static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; Loading Loading @@ -251,11 +254,17 @@ public final class FormatSpec { static final int PTNODE_TERMINATOR_SIZE = 1; static final int PTNODE_FLAGS_SIZE = 1; static final int PTNODE_FREQUENCY_SIZE = 1; static final int PTNODE_TERMINAL_ID_SIZE = 4; static final int PTNODE_MAX_ADDRESS_SIZE = 3; static final int PTNODE_ATTRIBUTE_FLAGS_SIZE = 1; static final int PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE = 3; static final int PTNODE_SHORTCUT_LIST_SIZE_SIZE = 2; // These values are used only by version 4 or later. static final String TRIE_FILE_EXTENSION = ".trie"; static final String FREQ_FILE_EXTENSION = ".freq"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; static final int NO_PARENT_ADDRESS = 0; static final int NO_FORWARD_LINK_ADDRESS = 0; Loading @@ -264,6 +273,7 @@ public final class FormatSpec { static final int MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT = 0x7F; // 127 static final int MAX_PTNODES_IN_A_PT_NODE_ARRAY = 0x7FFF; // 32767 static final int MAX_BIGRAMS_IN_A_PTNODE = 10000; static final int MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE = 0xFFFF; static final int MAX_TERMINAL_FREQUENCY = 255; static final int MAX_BIGRAM_FREQUENCY = 15; Loading @@ -287,6 +297,7 @@ public final class FormatSpec { public static final class FormatOptions { public final int mVersion; public final boolean mSupportsDynamicUpdate; public final boolean mHasTerminalId; @UsedForTesting public FormatOptions(final int version) { this(version, false); Loading @@ -300,6 +311,7 @@ public final class FormatSpec { + FIRST_VERSION_WITH_DYNAMIC_UPDATE + " and ulterior."); } mSupportsDynamicUpdate = supportsDynamicUpdate; mHasTerminalId = (version >= FIRST_VERSION_WITH_TERMINAL_ID); } } Loading
java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +6 −0 Original line number Diff line number Diff line Loading @@ -111,6 +111,7 @@ public final class FusionDictionary implements Iterable<Word> { ArrayList<WeightedString> mShortcutTargets; ArrayList<WeightedString> mBigrams; int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. int mTerminalId; // NOT_A_TERMINAL == mTerminalId indicates this is not a terminal. PtNodeArray mChildren; boolean mIsNotAWord; // Only a shortcut boolean mIsBlacklistEntry; Loading @@ -129,6 +130,7 @@ public final class FusionDictionary implements Iterable<Word> { final boolean isNotAWord, final boolean isBlacklistEntry) { mChars = chars; mFrequency = frequency; mTerminalId = frequency; mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = null; Loading Loading @@ -156,6 +158,10 @@ public final class FusionDictionary implements Iterable<Word> { mChildren.mData.add(n); } public int getTerminalId() { return mTerminalId; } public boolean isTerminal() { return NOT_A_TERMINAL != mFrequency; } Loading
java/src/com/android/inputmethod/latin/makedict/Ver3DictEncoder.java +2 −2 Original line number Diff line number Diff line Loading @@ -68,7 +68,7 @@ public class Ver3DictEncoder implements DictEncoder { @Override public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { if (formatOptions.mVersion > 3) { if (formatOptions.mVersion > FormatSpec.VERSION3) { throw new UnsupportedFormatException( "The given format options has wrong version number : " + formatOptions.mVersion); Loading Loading @@ -200,7 +200,7 @@ public class Ver3DictEncoder implements DictEncoder { mPosition += shortcutShift; } final int shortcutByteSize = mPosition - indexOfShortcutByteSize; if (shortcutByteSize > 0xFFFF) { if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) { throw new RuntimeException("Shortcut list too large"); } BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize, Loading
java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java 0 → 100644 +269 −0 Original line number Diff line number Diff line /* /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Iterator; /** * An implementation of DictEncoder for version 4 binary dictionary. */ @UsedForTesting public class Ver4DictEncoder implements DictEncoder { private final File mDictPlacedDir; private byte[] mTrieBuf; private byte[] mFreqBuf; private int mTriePos; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; @UsedForTesting public Ver4DictEncoder(final File dictPlacedDir) { mDictPlacedDir = dictPlacedDir; } private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions) throws FileNotFoundException, IOException { final FileHeader header = new FileHeader(0, dictOptions, formatOptions); final String filename = header.getId() + "." + header.getVersion(); final File mDictDir = new File(mDictPlacedDir, filename); final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); } if (!trieFile.exists()) trieFile.createNewFile(); if (!freqFile.exists()) freqFile.createNewFile(); mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); } private void close() throws IOException { try { if (mTrieOutStream != null) { mTrieOutStream.close(); } if (mFreqOutStream != null) { mFreqOutStream.close(); } } finally { mTrieOutStream = null; mFreqOutStream = null; } } @Override public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) throws IOException, UnsupportedFormatException { if (formatOptions.mVersion != FormatSpec.VERSION4) { throw new UnsupportedFormatException("File header has a wrong version number : " + formatOptions.mVersion); } if (!mDictPlacedDir.isDirectory()) { throw new UnsupportedFormatException("Given path is not a directory."); } if (mTrieOutStream == null) { openStreams(formatOptions, dict.mOptions); } BinaryDictEncoderUtils.writeDictionaryHeader(mTrieOutStream, dict, formatOptions); MakedictLog.i("Flattening the tree..."); ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); int terminalCount = 0; for (final PtNodeArray array : flatNodes) { for (final PtNode node : array.mData) { if (node.isTerminal()) node.mTerminalId = terminalCount++; } } MakedictLog.i("Computing addresses..."); BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions); if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; mTrieBuf = new byte[bufferSize]; mFreqBuf = new byte[terminalCount * FormatSpec.FREQUENCY_AND_FLAGS_SIZE]; MakedictLog.i("Writing file..."); for (PtNodeArray nodeArray : flatNodes) { BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions); } if (MakedictLog.DBG) { BinaryDictEncoderUtils.showStatistics(flatNodes); MakedictLog.i("has " + terminalCount + " terminals."); } mTrieOutStream.write(mTrieBuf); mFreqOutStream.write(mFreqBuf); MakedictLog.i("Done"); close(); } @Override public void setPosition(int position) { if (mTrieBuf == null || position < 0 || position >- mTrieBuf.length) return; mTriePos = position; } @Override public int getPosition() { return mTriePos; } @Override public void writePtNodeCount(int ptNodeCount) { final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount); // ptNodeCount must fit on one byte or two bytes. // Please see comments in FormatSpec if (countSize != 1 && countSize != 2) { throw new RuntimeException("Strange size from getPtNodeCountSize : " + countSize); } mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, ptNodeCount, countSize); } private void writePtNodeFlags(final PtNode ptNode, final int parentAddress, final FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, BinaryDictEncoderUtils.makePtNodeFlags(ptNode, mTriePos, childrenPos, formatOptions), FormatSpec.PTNODE_FLAGS_SIZE); } private void writeParentPosition(int parentPos, final PtNode ptNode, final FormatOptions formatOptions) { if (parentPos != FormatSpec.NO_PARENT_ADDRESS) { parentPos -= ptNode.mCachedAddressAfterUpdate; } mTriePos = BinaryDictEncoderUtils.writeParentAddress(mTrieBuf, mTriePos, parentPos, formatOptions); } private void writeCharacters(final int[] characters, final boolean hasSeveralChars) { mTriePos = CharEncoding.writeCharArray(characters, mTrieBuf, mTriePos); if (hasSeveralChars) { mTrieBuf[mTriePos++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR; } } private void writeTerminalId(final int terminalId) { mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, terminalId, FormatSpec.PTNODE_TERMINAL_ID_SIZE); } private void writeFrequency(final int frequency, final int terminalId) { final int freqPos = terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE; BinaryDictEncoderUtils.writeUIntToBuffer(mFreqBuf, freqPos, frequency, FormatSpec.FREQUENCY_AND_FLAGS_SIZE); } private void writeChildrenPosition(PtNode ptNode, FormatOptions formatOptions) { final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); if (formatOptions.mSupportsDynamicUpdate) { mTriePos += BinaryDictEncoderUtils.writeSignedChildrenPosition(mTrieBuf, mTriePos, childrenPos); } else { mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf, mTriePos, childrenPos); } } private void writeShortcuts(ArrayList<WeightedString> shortcuts) { if (null == shortcuts || shortcuts.isEmpty()) return; final int indexOfShortcutByteSize = mTriePos; mTriePos += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE; final Iterator<WeightedString> shortcutIterator = shortcuts.iterator(); while (shortcutIterator.hasNext()) { final WeightedString target = shortcutIterator.next(); final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( shortcutIterator.hasNext(), target.mFrequency); mTrieBuf[mTriePos++] = (byte)shortcutFlags; final int shortcutShift = CharEncoding.writeString(mTrieBuf, mTriePos, target.mWord); mTriePos += shortcutShift; } final int shortcutByteSize = mTriePos - indexOfShortcutByteSize; if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) { throw new RuntimeException("Shortcut list too large : " + shortcutByteSize); } BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, indexOfShortcutByteSize, shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); } private void writeBigrams(ArrayList<WeightedString> bigrams, FusionDictionary dict) { if (bigrams == null) return; final Iterator<WeightedString> bigramIterator = bigrams.iterator(); while (bigramIterator.hasNext()) { final WeightedString bigram = bigramIterator.next(); final PtNode target = FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); final int addressOfBigram = target.mCachedAddressAfterUpdate; final int unigramFrequencyForThisWord = target.mFrequency; final int offset = addressOfBigram - (mTriePos + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); mTrieBuf[mTriePos++] = (byte) bigramFlags; mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf, mTriePos, Math.abs(offset)); } } @Override public void writeForwardLinkAddress(int forwardLinkAddress) { mTriePos = BinaryDictEncoderUtils.writeUIntToBuffer(mTrieBuf, mTriePos, forwardLinkAddress, FormatSpec.FORWARD_LINK_ADDRESS_SIZE); } @Override public void writePtNode(final PtNode ptNode, final int parentPosition, final FormatOptions formatOptions, final FusionDictionary dict) { writePtNodeFlags(ptNode, parentPosition, formatOptions); writeParentPosition(parentPosition, ptNode, formatOptions); writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); if (ptNode.isTerminal()) { writeTerminalId(ptNode.mTerminalId); writeFrequency(ptNode.mFrequency, ptNode.mTerminalId); } writeChildrenPosition(ptNode, formatOptions); writeShortcuts(ptNode.mShortcutTargets); writeBigrams(ptNode.mBigrams, dict); } }