Loading native/jni/NativeFileList.mk +2 −1 Original line number Diff line number Diff line Loading @@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ patricia_trie_policy.cpp \ patricia_trie_reading_utils.cpp \ ver2_patricia_trie_node_reader.cpp) \ ver2_patricia_trie_node_reader.cpp \ ver2_pt_node_array_reader.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ ver4_dict_buffers.cpp \ ver4_dict_constants.cpp \ Loading native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +4 −83 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" Loading Loading @@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( // dictionary. If no match is found, it returns NOT_A_DICT_POS. int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int pos = getRootPosition(); int wordPos = 0; while (true) { // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). if (wordPos >= length) return NOT_A_DICT_POS; int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, &pos); const int wChar = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { // If there are no more PtNodes in this array, it means we could not // find a matching character for this depth, therefore there is no match. if (0 >= ptNodeCount) return NOT_A_DICT_POS; const int ptNodePos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); if (character == wChar) { // This is the correct PtNode. Only one PtNode may start with the same char within // a PtNode array, so either we found our match in this array, or there is // no match and we can return NOT_A_DICT_POS. So we will check all the // characters in this PtNode indeed does match. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); while (NOT_A_CODE_POINT != character) { ++wordPos; // If we shoot the length of the word we search for, or if we find a single // character that does not match, as explained above, it means the word is // not in the dictionary (by virtue of this PtNode being the only one to // match the word on the first character, but not matching the whole word). if (wordPos >= length) return NOT_A_DICT_POS; if (inWord[wordPos] != character) return NOT_A_DICT_POS; character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &pos); } } // If we come here we know that so far, we do match. Either we are on a terminal // and we match the length, in which case we found it, or we traverse children. // If we don't match the length AND don't have children, then a word in the // dictionary fully matches a prefix of the searched word but not the full word. ++wordPos; if (PatriciaTrieReadingUtils::isTerminal(flags)) { if (wordPos == length) { return ptNodePos; } PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { return NOT_A_DICT_POS; } // We have children and we are still shorter than the word we are searching for, so // we need to traverse children. Put the pointer on the children position, and // break pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); break; } else { // This PtNode does not match, so skip the remaining part and go to the next. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); } if (PatriciaTrieReadingUtils::isTerminal(flags)) { PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); } if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { mShortcutListPolicy.skipAllShortcuts(&pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { mBigramListPolicy.skipAllBigrams(&pos); } } --ptNodeCount; } } DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } int PatriciaTriePolicy::getProbability(const int unigramProbability, Loading native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +4 −1 Original line number Diff line number Diff line Loading @@ -25,6 +25,7 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" Loading @@ -42,7 +43,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mDictBufferSize(mMmappedBuffer.get()->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {} mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), mPtNodeArrayReader(mDictRoot, mDictBufferSize) {} AK_FORCE_INLINE int getRootPosition() const { return 0; Loading Loading @@ -146,6 +148,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; const Ver2ParticiaTrieNodeReader mPtNodeReader; const Ver2PtNodeArrayReader mPtNodeArrayReader; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *const childDicNodes) const; Loading native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp 0 → 100644 +54 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" namespace latinime { bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, int *const outPtNodeCount, int *const outFirstPtNodePos) const { if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) { // Reading invalid position because of a bug or a broken dictionary. AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", ptNodeArrayPos, mDictSize); ASSERT(false); return false; } int readingPos = ptNodeArrayPos; const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictBuffer, &readingPos); *outPtNodeCount = ptNodeCountInArray; *outFirstPtNodePos = readingPos; return true; } bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos, int *const outNextPtNodeArrayPos) const { if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) { // Reading invalid position because of bug or broken dictionary. AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", forwordLinkPos, mDictSize); ASSERT(false); return false; } // Ver2 dicts don't have forward links. *outNextPtNodeArrayPos = NOT_A_DICT_POS; return true; } } // namespace latinime native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h 0 → 100644 +44 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H #define LATINIME_VER2_PT_NODE_ARRAY_READER_H #include <stdint.h> #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" namespace latinime { class Ver2PtNodeArrayReader : public PtNodeArrayReader { public: Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize) : mDictBuffer(dictBuffer), mDictSize(dictSize) {}; virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, int *const outPtNodeCount, int *const outFirstPtNodePos) const; virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos, int *const outNextPtNodeArrayPos) const; private: DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader); const uint8_t *const mDictBuffer; const int mDictSize; }; } // namespace latinime #endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */ Loading
native/jni/NativeFileList.mk +2 −1 Original line number Diff line number Diff line Loading @@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ patricia_trie_policy.cpp \ patricia_trie_reading_utils.cpp \ ver2_patricia_trie_node_reader.cpp) \ ver2_patricia_trie_node_reader.cpp \ ver2_pt_node_array_reader.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ ver4_dict_buffers.cpp \ ver4_dict_constants.cpp \ Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +4 −83 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" Loading Loading @@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( // dictionary. If no match is found, it returns NOT_A_DICT_POS. int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int pos = getRootPosition(); int wordPos = 0; while (true) { // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). if (wordPos >= length) return NOT_A_DICT_POS; int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, &pos); const int wChar = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { // If there are no more PtNodes in this array, it means we could not // find a matching character for this depth, therefore there is no match. if (0 >= ptNodeCount) return NOT_A_DICT_POS; const int ptNodePos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); if (character == wChar) { // This is the correct PtNode. Only one PtNode may start with the same char within // a PtNode array, so either we found our match in this array, or there is // no match and we can return NOT_A_DICT_POS. So we will check all the // characters in this PtNode indeed does match. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); while (NOT_A_CODE_POINT != character) { ++wordPos; // If we shoot the length of the word we search for, or if we find a single // character that does not match, as explained above, it means the word is // not in the dictionary (by virtue of this PtNode being the only one to // match the word on the first character, but not matching the whole word). if (wordPos >= length) return NOT_A_DICT_POS; if (inWord[wordPos] != character) return NOT_A_DICT_POS; character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &pos); } } // If we come here we know that so far, we do match. Either we are on a terminal // and we match the length, in which case we found it, or we traverse children. // If we don't match the length AND don't have children, then a word in the // dictionary fully matches a prefix of the searched word but not the full word. ++wordPos; if (PatriciaTrieReadingUtils::isTerminal(flags)) { if (wordPos == length) { return ptNodePos; } PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { return NOT_A_DICT_POS; } // We have children and we are still shorter than the word we are searching for, so // we need to traverse children. Put the pointer on the children position, and // break pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); break; } else { // This PtNode does not match, so skip the remaining part and go to the next. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); } if (PatriciaTrieReadingUtils::isTerminal(flags)) { PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos); } if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { mShortcutListPolicy.skipAllShortcuts(&pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { mBigramListPolicy.skipAllBigrams(&pos); } } --ptNodeCount; } } DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } int PatriciaTriePolicy::getProbability(const int unigramProbability, Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +4 −1 Original line number Diff line number Diff line Loading @@ -25,6 +25,7 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" Loading @@ -42,7 +43,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mDictBufferSize(mMmappedBuffer.get()->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {} mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), mPtNodeArrayReader(mDictRoot, mDictBufferSize) {} AK_FORCE_INLINE int getRootPosition() const { return 0; Loading Loading @@ -146,6 +148,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; const Ver2ParticiaTrieNodeReader mPtNodeReader; const Ver2PtNodeArrayReader mPtNodeArrayReader; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *const childDicNodes) const; Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.cpp 0 → 100644 +54 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" namespace latinime { bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, int *const outPtNodeCount, int *const outFirstPtNodePos) const { if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) { // Reading invalid position because of a bug or a broken dictionary. AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", ptNodeArrayPos, mDictSize); ASSERT(false); return false; } int readingPos = ptNodeArrayPos; const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictBuffer, &readingPos); *outPtNodeCount = ptNodeCountInArray; *outFirstPtNodePos = readingPos; return true; } bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos, int *const outNextPtNodeArrayPos) const { if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) { // Reading invalid position because of bug or broken dictionary. AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", forwordLinkPos, mDictSize); ASSERT(false); return false; } // Ver2 dicts don't have forward links. *outNextPtNodeArrayPos = NOT_A_DICT_POS; return true; } } // namespace latinime
native/jni/src/suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h 0 → 100644 +44 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H #define LATINIME_VER2_PT_NODE_ARRAY_READER_H #include <stdint.h> #include "defines.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" namespace latinime { class Ver2PtNodeArrayReader : public PtNodeArrayReader { public: Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize) : mDictBuffer(dictBuffer), mDictSize(dictSize) {}; virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, int *const outPtNodeCount, int *const outFirstPtNodePos) const; virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos, int *const outNextPtNodeArrayPos) const; private: DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader); const uint8_t *const mDictBuffer; const int mDictSize; }; } // namespace latinime #endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */