Loading native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +60 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" Loading Loading @@ -303,4 +304,63 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod return siblingPos; } const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints, const int codePointCount) const { const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount, false /* forceLowerCaseSearch */); if (ptNodePos == NOT_A_DICT_POS) { AKLOGE("getWordProperty was called for invalid word."); return WordProperty(); } const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); std::vector<int> codePointVector(ptNodeParams.getCodePoints(), ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); // Fetch bigram information. std::vector<WordProperty::BigramProperty> bigrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); int bigramWord1CodePoints[MAX_WORD_LENGTH]; BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos); while (bigramsIt.hasNext()) { // Fetch the next bigram information and forward the iterator. bigramsIt.next(); // Skip the entry if the entry has been deleted. This never happens for ver2 dicts. if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) { int word1Probability = NOT_A_PROBABILITY; int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints, &word1Probability); std::vector<int> word1(bigramWord1CodePoints, bigramWord1CodePoints + word1CodePointCount); bigrams.push_back(WordProperty::BigramProperty(&word1, bigramsIt.getProbability(), NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */)); } } // Fetch shortcut information. std::vector<WordProperty::ShortcutProperty> shortcuts; int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); if (shortcutPos != NOT_A_DICT_POS) { int shortcutTargetCodePoints[MAX_WORD_LENGTH]; ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos); bool hasNext = true; while (hasNext) { const ShortcutListReadingUtils::ShortcutFlags shortcutFlags = ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos); hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags); const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget( mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos); std::vector<int> shortcutTarget(shortcutTargetCodePoints, shortcutTargetCodePoints + shortcutTargetLength); const int shortcutProbability = ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags); shortcuts.push_back( WordProperty::ShortcutProperty(&shortcutTarget, shortcutProbability)); } } return WordProperty(&codePointVector, ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(), ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(), NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &bigrams, &shortcuts); } } // namespace latinime native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +1 −4 Original line number Diff line number Diff line Loading @@ -128,10 +128,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const { // getWordProperty is not supported. return WordProperty(); } const int codePointCount) const; int getNextWordAndNextToken(const int token, int *const outCodePoints) { // getNextWordAndNextToken is not supported. Loading tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +36 −0 Original line number Diff line number Diff line Loading @@ -39,6 +39,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map.Entry; import java.util.Random; import java.util.Set; Loading Loading @@ -596,4 +597,39 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { Log.d(TAG, result); } } public void testVer2DictGetWordProperty() { final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS; final ArrayList<String> words = sWords; final HashMap<String, List<String>> shortcuts = sShortcuts; final String dictName = "testGetWordProperty"; final String dictVersion = Long.toString(System.currentTimeMillis()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), dict, words, shortcuts); addBigrams(dict, words, sEmptyBigrams); final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, getContext().getCacheDir()); file.delete(); timeWritingDictToFile(file, dict, formatOptions); final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 0 /* offset */, file.length(), true /* useFullEditDistance */, Locale.ENGLISH, dictName, false /* isUpdatable */); for (final String word : words) { final WordProperty wordProperty = binaryDictionary.getWordProperty(word); assertEquals(word, wordProperty.mWord); assertEquals(UNIGRAM_FREQ, wordProperty.getProbability()); if (shortcuts.containsKey(word)) { assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size()); final List<String> shortcutList = shortcuts.get(word); assertTrue(wordProperty.mHasShortcuts); for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { assertTrue(shortcutList.contains(shortcutTarget.mWord)); assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability()); shortcutList.remove(shortcutTarget.mWord); } assertTrue(shortcutList.isEmpty()); } } } } Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +60 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" Loading Loading @@ -303,4 +304,63 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod return siblingPos; } const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints, const int codePointCount) const { const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount, false /* forceLowerCaseSearch */); if (ptNodePos == NOT_A_DICT_POS) { AKLOGE("getWordProperty was called for invalid word."); return WordProperty(); } const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); std::vector<int> codePointVector(ptNodeParams.getCodePoints(), ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); // Fetch bigram information. std::vector<WordProperty::BigramProperty> bigrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); int bigramWord1CodePoints[MAX_WORD_LENGTH]; BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos); while (bigramsIt.hasNext()) { // Fetch the next bigram information and forward the iterator. bigramsIt.next(); // Skip the entry if the entry has been deleted. This never happens for ver2 dicts. if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) { int word1Probability = NOT_A_PROBABILITY; int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints, &word1Probability); std::vector<int> word1(bigramWord1CodePoints, bigramWord1CodePoints + word1CodePointCount); bigrams.push_back(WordProperty::BigramProperty(&word1, bigramsIt.getProbability(), NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */)); } } // Fetch shortcut information. std::vector<WordProperty::ShortcutProperty> shortcuts; int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); if (shortcutPos != NOT_A_DICT_POS) { int shortcutTargetCodePoints[MAX_WORD_LENGTH]; ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos); bool hasNext = true; while (hasNext) { const ShortcutListReadingUtils::ShortcutFlags shortcutFlags = ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos); hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags); const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget( mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos); std::vector<int> shortcutTarget(shortcutTargetCodePoints, shortcutTargetCodePoints + shortcutTargetLength); const int shortcutProbability = ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags); shortcuts.push_back( WordProperty::ShortcutProperty(&shortcutTarget, shortcutProbability)); } } return WordProperty(&codePointVector, ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(), ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(), NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &bigrams, &shortcuts); } } // namespace latinime
native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +1 −4 Original line number Diff line number Diff line Loading @@ -128,10 +128,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const { // getWordProperty is not supported. return WordProperty(); } const int codePointCount) const; int getNextWordAndNextToken(const int token, int *const outCodePoints) { // getNextWordAndNextToken is not supported. Loading
tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +36 −0 Original line number Diff line number Diff line Loading @@ -39,6 +39,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map.Entry; import java.util.Random; import java.util.Set; Loading Loading @@ -596,4 +597,39 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { Log.d(TAG, result); } } public void testVer2DictGetWordProperty() { final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS; final ArrayList<String> words = sWords; final HashMap<String, List<String>> shortcuts = sShortcuts; final String dictName = "testGetWordProperty"; final String dictVersion = Long.toString(System.currentTimeMillis()); final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); addUnigrams(words.size(), dict, words, shortcuts); addBigrams(dict, words, sEmptyBigrams); final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, getContext().getCacheDir()); file.delete(); timeWritingDictToFile(file, dict, formatOptions); final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 0 /* offset */, file.length(), true /* useFullEditDistance */, Locale.ENGLISH, dictName, false /* isUpdatable */); for (final String word : words) { final WordProperty wordProperty = binaryDictionary.getWordProperty(word); assertEquals(word, wordProperty.mWord); assertEquals(UNIGRAM_FREQ, wordProperty.getProbability()); if (shortcuts.containsKey(word)) { assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size()); final List<String> shortcutList = shortcuts.get(word); assertTrue(wordProperty.mHasShortcuts); for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { assertTrue(shortcutList.contains(shortcutTarget.mWord)); assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability()); shortcutList.remove(shortcutTarget.mWord); } assertTrue(shortcutList.isEmpty()); } } } }