Loading java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +8 −6 Original line number Diff line number Diff line Loading @@ -385,6 +385,7 @@ public class BinaryDictEncoderUtils { nodeSize + size, ptNode.mChildren)); } nodeSize += getShortcutListSize(ptNode.mShortcutTargets); if (formatOptions.mVersion < FormatSpec.FIRST_VERSION_WITH_TERMINAL_ID) { if (null != ptNode.mBigrams) { for (WeightedString bigram : ptNode.mBigrams) { final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray, Loading @@ -393,6 +394,7 @@ public class BinaryDictEncoderUtils { nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE; } } } ptNode.mCachedSize = nodeSize; size += nodeSize; } Loading java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +4 −0 Original line number Diff line number Diff line Loading @@ -265,8 +265,12 @@ public final class FormatSpec { static final String FREQ_FILE_EXTENSION = ".freq"; // tat = Terminal Address Table static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; static final String BIGRAM_FILE_EXTENSION = ".bigram"; static final String BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup"; static final String BIGRAM_ADDRESS_TABLE_FILE_EXTENSION = ".bigram_index"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; static final int NO_PARENT_ADDRESS = 0; Loading java/src/com/android/inputmethod/latin/makedict/SparseTable.java +44 −0 Original line number Diff line number Diff line Loading @@ -18,6 +18,9 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; Loading Loading @@ -147,4 +150,45 @@ public class SparseTable { BinaryDictEncoderUtils.writeUIntToStream(contentOutStream, index, 4); } } @UsedForTesting public void writeToFiles(final File lookupTableFile, final File contentFile) throws IOException { FileOutputStream lookupTableOutStream = null; FileOutputStream contentOutStream = null; try { lookupTableOutStream = new FileOutputStream(lookupTableFile); contentOutStream = new FileOutputStream(contentFile); write(lookupTableOutStream, contentOutStream); } finally { if (lookupTableOutStream != null) { lookupTableOutStream.close(); } if (contentOutStream != null) { contentOutStream.close(); } } } private static byte[] readFileToByteArray(final File file) throws IOException { final byte[] contents = new byte[(int) file.length()]; FileInputStream inStream = null; try { inStream = new FileInputStream(file); inStream.read(contents); } finally { if (inStream != null) { inStream.close(); } } return contents; } @UsedForTesting public static SparseTable readFromFiles(final File lookupTableFile, final File contentFile, final int blockSize) throws IOException { final byte[] lookupTable = readFileToByteArray(lookupTableFile); final byte[] content = readFileToByteArray(contentFile); return new SparseTable(lookupTable, content, blockSize); } } java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +32 −2 Original line number Diff line number Diff line Loading @@ -42,12 +42,15 @@ public class Ver4DictDecoder extends DictDecoder { private static final int FILETYPE_TRIE = 1; private static final int FILETYPE_FREQUENCY = 2; private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3; private static final int FILETYPE_BIGRAM = 4; private final File mDictDirectory; private final DictionaryBufferFactory mBufferFactory; private DictBuffer mDictBuffer; private DictBuffer mFrequencyBuffer; private DictBuffer mTerminalAddressTableBuffer; private DictBuffer mBigramBuffer; private SparseTable mBigramAddressTable; @UsedForTesting /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { Loading Loading @@ -82,6 +85,9 @@ public class Ver4DictDecoder extends DictDecoder { } else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) { return new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); } else if (fileType == FILETYPE_BIGRAM) { return new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION); } else { throw new RuntimeException("Unsupported kind of file : " + fileType); } Loading @@ -94,6 +100,8 @@ public class Ver4DictDecoder extends DictDecoder { mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM)); loadBigramAddressSparseTable(); } @Override Loading @@ -118,6 +126,15 @@ public class Ver4DictDecoder extends DictDecoder { return header; } private void loadBigramAddressSparseTable() throws IOException { final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION); final File contentFile = new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION); mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, contentFile, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); } protected static class PtNodeReader extends DictDecoder.PtNodeReader { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); Loading Loading @@ -191,8 +208,21 @@ public class Ver4DictDecoder extends DictDecoder { final ArrayList<PendingAttribute> bigrams; if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { bigrams = new ArrayList<PendingAttribute>(); addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams, addressPointer); final int posOfBigrams = mBigramAddressTable.get(terminalId); mBigramBuffer.position(posOfBigrams); while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, // remaining bigram entries are ignored. final int bigramFlags = mBigramBuffer.readUnsignedByte(); final int targetTerminalId = mBigramBuffer.readUnsignedInt24(); mTerminalAddressTableBuffer.position( targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24(); bigrams.add(new PendingAttribute( bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, targetAddress)); if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { MakedictLog.d("too many bigrams in a node."); } Loading java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +55 −23 Original line number Diff line number Diff line Loading @@ -26,6 +26,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; Loading @@ -43,9 +44,13 @@ public class Ver4DictEncoder implements DictEncoder { private byte[] mTrieBuf; private int mTriePos; private int mHeaderSize; private SparseTable mBigramAddressTable; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; private OutputStream mTerminalAddressTableOutStream; private OutputStream mBigramOutStream; private File mDictDir; private String mBaseFilename; @UsedForTesting public Ver4DictEncoder(final File dictPlacedDir) { Loading @@ -55,12 +60,14 @@ public class Ver4DictEncoder implements DictEncoder { private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions) throws FileNotFoundException, IOException { final FileHeader header = new FileHeader(0, dictOptions, formatOptions); final String filename = header.getId() + "." + header.getVersion(); final File mDictDir = new File(mDictPlacedDir, filename); final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION); mBaseFilename = header.getId() + "." + header.getVersion(); mDictDir = new File(mDictPlacedDir, mBaseFilename); final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION); final File terminalAddressTableFile = new File(mDictDir, filename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); final File bigramFile = new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); Loading @@ -71,6 +78,7 @@ public class Ver4DictEncoder implements DictEncoder { mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); mBigramOutStream = new FileOutputStream(bigramFile); } private void close() throws IOException { Loading @@ -84,10 +92,14 @@ public class Ver4DictEncoder implements DictEncoder { if (mTerminalAddressTableOutStream != null) { mTerminalAddressTableOutStream.close(); } if (mBigramOutStream != null) { mBigramOutStream.close(); } } finally { mTrieOutStream = null; mFreqOutStream = null; mTerminalAddressTableOutStream = null; mBigramOutStream = null; } } Loading Loading @@ -123,6 +135,10 @@ public class Ver4DictEncoder implements DictEncoder { if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); writeTerminalData(flatNodes, terminalCount); mBigramAddressTable = new SparseTable(terminalCount, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); writeBigrams(flatNodes, dict); writeBigramAddressSparseTable(); final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; Loading Loading @@ -230,25 +246,42 @@ public class Ver4DictEncoder implements DictEncoder { shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); } private void writeBigrams(ArrayList<WeightedString> bigrams, FusionDictionary dict) { if (bigrams == null) return; private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict) throws IOException { final ByteArrayOutputStream bigramBuffer = new ByteArrayOutputStream(); final Iterator<WeightedString> bigramIterator = bigrams.iterator(); for (final PtNodeArray nodeArray : flatNodes) { for (final PtNode ptNode : nodeArray.mData) { if (ptNode.mBigrams != null) { final int startPos = bigramBuffer.size(); mBigramAddressTable.set(ptNode.mTerminalId, startPos); final Iterator<WeightedString> bigramIterator = ptNode.mBigrams.iterator(); while (bigramIterator.hasNext()) { final WeightedString bigram = bigramIterator.next(); final PtNode target = FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); final int addressOfBigram = target.mCachedAddressAfterUpdate; final int unigramFrequencyForThisWord = target.mFrequency; final int offset = addressOfBigram - (mTriePos + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); mTrieBuf[mTriePos++] = (byte) bigramFlags; mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf, mTriePos, Math.abs(offset)); final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags( bigramIterator.hasNext(), 0, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, bigramFlags, FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, target.mTerminalId, FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); } } } } bigramBuffer.writeTo(mBigramOutStream); } private void writeBigramAddressSparseTable() throws IOException { final File lookupIndexFile = new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION); final File contentFile = new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION); mBigramAddressTable.writeToFiles(lookupIndexFile, contentFile); } @Override public void writeForwardLinkAddress(int forwardLinkAddress) { Loading @@ -267,7 +300,6 @@ public class Ver4DictEncoder implements DictEncoder { } writeChildrenPosition(ptNode, formatOptions); writeShortcuts(ptNode.mShortcutTargets); writeBigrams(ptNode.mBigrams, dict); } private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes, Loading Loading
java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java +8 −6 Original line number Diff line number Diff line Loading @@ -385,6 +385,7 @@ public class BinaryDictEncoderUtils { nodeSize + size, ptNode.mChildren)); } nodeSize += getShortcutListSize(ptNode.mShortcutTargets); if (formatOptions.mVersion < FormatSpec.FIRST_VERSION_WITH_TERMINAL_ID) { if (null != ptNode.mBigrams) { for (WeightedString bigram : ptNode.mBigrams) { final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray, Loading @@ -393,6 +394,7 @@ public class BinaryDictEncoderUtils { nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE; } } } ptNode.mCachedSize = nodeSize; size += nodeSize; } Loading
java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +4 −0 Original line number Diff line number Diff line Loading @@ -265,8 +265,12 @@ public final class FormatSpec { static final String FREQ_FILE_EXTENSION = ".freq"; // tat = Terminal Address Table static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; static final String BIGRAM_FILE_EXTENSION = ".bigram"; static final String BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup"; static final String BIGRAM_ADDRESS_TABLE_FILE_EXTENSION = ".bigram_index"; static final int FREQUENCY_AND_FLAGS_SIZE = 2; static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; static final int NO_PARENT_ADDRESS = 0; Loading
java/src/com/android/inputmethod/latin/makedict/SparseTable.java +44 −0 Original line number Diff line number Diff line Loading @@ -18,6 +18,9 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; Loading Loading @@ -147,4 +150,45 @@ public class SparseTable { BinaryDictEncoderUtils.writeUIntToStream(contentOutStream, index, 4); } } @UsedForTesting public void writeToFiles(final File lookupTableFile, final File contentFile) throws IOException { FileOutputStream lookupTableOutStream = null; FileOutputStream contentOutStream = null; try { lookupTableOutStream = new FileOutputStream(lookupTableFile); contentOutStream = new FileOutputStream(contentFile); write(lookupTableOutStream, contentOutStream); } finally { if (lookupTableOutStream != null) { lookupTableOutStream.close(); } if (contentOutStream != null) { contentOutStream.close(); } } } private static byte[] readFileToByteArray(final File file) throws IOException { final byte[] contents = new byte[(int) file.length()]; FileInputStream inStream = null; try { inStream = new FileInputStream(file); inStream.read(contents); } finally { if (inStream != null) { inStream.close(); } } return contents; } @UsedForTesting public static SparseTable readFromFiles(final File lookupTableFile, final File contentFile, final int blockSize) throws IOException { final byte[] lookupTable = readFileToByteArray(lookupTableFile); final byte[] content = readFileToByteArray(contentFile); return new SparseTable(lookupTable, content, blockSize); } }
java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +32 −2 Original line number Diff line number Diff line Loading @@ -42,12 +42,15 @@ public class Ver4DictDecoder extends DictDecoder { private static final int FILETYPE_TRIE = 1; private static final int FILETYPE_FREQUENCY = 2; private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3; private static final int FILETYPE_BIGRAM = 4; private final File mDictDirectory; private final DictionaryBufferFactory mBufferFactory; private DictBuffer mDictBuffer; private DictBuffer mFrequencyBuffer; private DictBuffer mTerminalAddressTableBuffer; private DictBuffer mBigramBuffer; private SparseTable mBigramAddressTable; @UsedForTesting /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { Loading Loading @@ -82,6 +85,9 @@ public class Ver4DictDecoder extends DictDecoder { } else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) { return new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); } else if (fileType == FILETYPE_BIGRAM) { return new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION); } else { throw new RuntimeException("Unsupported kind of file : " + fileType); } Loading @@ -94,6 +100,8 @@ public class Ver4DictDecoder extends DictDecoder { mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY)); mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer( getFile(FILETYPE_TERMINAL_ADDRESS_TABLE)); mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM)); loadBigramAddressSparseTable(); } @Override Loading @@ -118,6 +126,15 @@ public class Ver4DictDecoder extends DictDecoder { return header; } private void loadBigramAddressSparseTable() throws IOException { final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION); final File contentFile = new File(mDictDirectory, mDictDirectory.getName() + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION); mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, contentFile, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); } protected static class PtNodeReader extends DictDecoder.PtNodeReader { protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) { frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1); Loading Loading @@ -191,8 +208,21 @@ public class Ver4DictDecoder extends DictDecoder { final ArrayList<PendingAttribute> bigrams; if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { bigrams = new ArrayList<PendingAttribute>(); addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams, addressPointer); final int posOfBigrams = mBigramAddressTable.get(terminalId); mBigramBuffer.position(posOfBigrams); while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, // remaining bigram entries are ignored. final int bigramFlags = mBigramBuffer.readUnsignedByte(); final int targetTerminalId = mBigramBuffer.readUnsignedInt24(); mTerminalAddressTableBuffer.position( targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24(); bigrams.add(new PendingAttribute( bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, targetAddress)); if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { MakedictLog.d("too many bigrams in a node."); } Loading
java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +55 −23 Original line number Diff line number Diff line Loading @@ -26,6 +26,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; Loading @@ -43,9 +44,13 @@ public class Ver4DictEncoder implements DictEncoder { private byte[] mTrieBuf; private int mTriePos; private int mHeaderSize; private SparseTable mBigramAddressTable; private OutputStream mTrieOutStream; private OutputStream mFreqOutStream; private OutputStream mTerminalAddressTableOutStream; private OutputStream mBigramOutStream; private File mDictDir; private String mBaseFilename; @UsedForTesting public Ver4DictEncoder(final File dictPlacedDir) { Loading @@ -55,12 +60,14 @@ public class Ver4DictEncoder implements DictEncoder { private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions) throws FileNotFoundException, IOException { final FileHeader header = new FileHeader(0, dictOptions, formatOptions); final String filename = header.getId() + "." + header.getVersion(); final File mDictDir = new File(mDictPlacedDir, filename); final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION); mBaseFilename = header.getId() + "." + header.getVersion(); mDictDir = new File(mDictPlacedDir, mBaseFilename); final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION); final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION); final File terminalAddressTableFile = new File(mDictDir, filename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); final File bigramFile = new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_FILE_EXTENSION); if (!mDictDir.isDirectory()) { if (mDictDir.exists()) mDictDir.delete(); mDictDir.mkdirs(); Loading @@ -71,6 +78,7 @@ public class Ver4DictEncoder implements DictEncoder { mTrieOutStream = new FileOutputStream(trieFile); mFreqOutStream = new FileOutputStream(freqFile); mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile); mBigramOutStream = new FileOutputStream(bigramFile); } private void close() throws IOException { Loading @@ -84,10 +92,14 @@ public class Ver4DictEncoder implements DictEncoder { if (mTerminalAddressTableOutStream != null) { mTerminalAddressTableOutStream.close(); } if (mBigramOutStream != null) { mBigramOutStream.close(); } } finally { mTrieOutStream = null; mFreqOutStream = null; mTerminalAddressTableOutStream = null; mBigramOutStream = null; } } Loading Loading @@ -123,6 +135,10 @@ public class Ver4DictEncoder implements DictEncoder { if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); writeTerminalData(flatNodes, terminalCount); mBigramAddressTable = new SparseTable(terminalCount, FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE); writeBigrams(flatNodes, dict); writeBigramAddressSparseTable(); final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; Loading Loading @@ -230,25 +246,42 @@ public class Ver4DictEncoder implements DictEncoder { shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); } private void writeBigrams(ArrayList<WeightedString> bigrams, FusionDictionary dict) { if (bigrams == null) return; private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict) throws IOException { final ByteArrayOutputStream bigramBuffer = new ByteArrayOutputStream(); final Iterator<WeightedString> bigramIterator = bigrams.iterator(); for (final PtNodeArray nodeArray : flatNodes) { for (final PtNode ptNode : nodeArray.mData) { if (ptNode.mBigrams != null) { final int startPos = bigramBuffer.size(); mBigramAddressTable.set(ptNode.mTerminalId, startPos); final Iterator<WeightedString> bigramIterator = ptNode.mBigrams.iterator(); while (bigramIterator.hasNext()) { final WeightedString bigram = bigramIterator.next(); final PtNode target = FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); final int addressOfBigram = target.mCachedAddressAfterUpdate; final int unigramFrequencyForThisWord = target.mFrequency; final int offset = addressOfBigram - (mTriePos + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); mTrieBuf[mTriePos++] = (byte) bigramFlags; mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf, mTriePos, Math.abs(offset)); final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags( bigramIterator.hasNext(), 0, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, bigramFlags, FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, target.mTerminalId, FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE); } } } } bigramBuffer.writeTo(mBigramOutStream); } private void writeBigramAddressSparseTable() throws IOException { final File lookupIndexFile = new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION); final File contentFile = new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION); mBigramAddressTable.writeToFiles(lookupIndexFile, contentFile); } @Override public void writeForwardLinkAddress(int forwardLinkAddress) { Loading @@ -267,7 +300,6 @@ public class Ver4DictEncoder implements DictEncoder { } writeChildrenPosition(ptNode, formatOptions); writeShortcuts(ptNode.mShortcutTargets); writeBigrams(ptNode.mBigrams, dict); } private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes, Loading