Loading native/jni/Android.mk +1 −0 Original line number Diff line number Diff line Loading @@ -73,6 +73,7 @@ LATIN_IME_CORE_SRC_FILES := \ header/header_read_write_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ dictionary_structure_with_buffer_policy_factory.cpp \ dynamic_patricia_trie_gc_event_listeners.cpp \ dynamic_patricia_trie_node_reader.cpp \ dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_reading_helper.cpp \ Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp 0 → 100644 +48 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" namespace latinime { bool DynamicPatriciaTrieGcEventListeners ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) { // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // children. bool isUselessPtNode = !node->isTerminal(); if (mChildrenValue > 0) { isUselessPtNode = false; } else if (node->isTerminal()) { // Remove children as all children are useless. int writingPos = node->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) { return false; } } if (isUselessPtNode) { // Current PtNode is no longer needed. Mark it as deleted. if (!mWritingHelper->markNodeAsDeleted(node)) { return false; } } else { valueStack.back() += 1; } return true; } } // namespace latinime native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h 0 → 100644 +75 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H #include <vector> #include "defines.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" namespace latinime { class DynamicPatriciaTrieGcEventListeners { public: // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or // not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC. // TODO: Concatenate non-terminal PtNodes. class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted( DynamicPatriciaTrieWritingHelper *const writingHelper, BufferWithExtendableBuffer *const buffer) : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(), mChildrenValue(0) {} ~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {}; bool onAscend() { if (valueStack.empty()) { return false; } mChildrenValue = valueStack.back(); valueStack.pop_back(); return true; } bool onDescend() { valueStack.push_back(0); return true; } bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node); private: DISALLOW_IMPLICIT_CONSTRUCTORS( ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted); DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBuffer; std::vector<int> valueStack; int mChildrenValue; }; private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */ native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +69 −20 Original line number Diff line number Diff line Loading @@ -23,36 +23,85 @@ namespace latinime { // To avoid infinite loop caused by invalid or malicious forward links. const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner( TraversingEventListener *const listener) { bool alreadyVisitedChildren = false; // Descend from the root to the root PtNode array. if (!listener->onDescend()) { return false; } while (!isEnd()) { if (!alreadyVisitedChildren) { if (mNodeReader.hasChildren()) { // Move to the first child. if (!listener->onDescend()) { return false; } pushReadingStateToStack(); readChildNode(); } else { alreadyVisitedChildren = true; } } else { if (!listener->onVisitingPtNode(&mNodeReader)) { return false; } readNextSiblingNode(); if (isEnd()) { // All PtNodes in current linked PtNode arrays have been visited. // Return to the parent. if (!listener->onAscend()) { return false; } popReadingStateFromStack(); alreadyVisitedChildren = true; } else { // Process sibling PtNode. alreadyVisitedChildren = false; } } } // Ascend from the root PtNode array to the root. if (!listener->onAscend()) { return false; } return !isError(); } // Read node array size and process empty node arrays. Nodes and arrays are counted up in this // method to avoid an infinite loop. void DynamicPatriciaTrieReadingHelper::nextNodeArray() { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos); mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { mPos -= mBuffer->getOriginalBufferSize(); mReadingState.mPos -= mBuffer->getOriginalBufferSize(); } mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, &mPos); mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( dictBuf, &mReadingState.mPos); if (usesAdditionalBuffer) { mPos += mBuffer->getOriginalBufferSize(); mReadingState.mPos += mBuffer->getOriginalBufferSize(); } // Count up nodes and node arrays to avoid infinite loop. mTotalNodeCount += mNodeCount; mNodeArrayCount++; if (mNodeCount < 0 || mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP || mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { mReadingState.mTotalNodeCount += mReadingState.mNodeCount; mReadingState.mNodeArrayCount++; if (mReadingState.mNodeCount < 0 || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { // Invalid dictionary. AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d" "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d", mNodeCount, mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); mReadingState.mNodeCount, mReadingState.mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); ASSERT(false); mIsError = true; mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; return; } if (mNodeCount == 0) { if (mReadingState.mNodeCount == 0) { // Empty node array. Try following forward link. followForwardLink(); } Loading @@ -60,24 +109,24 @@ void DynamicPatriciaTrieReadingHelper::nextNodeArray() { // Follow the forward link and read the next node array if exists. void DynamicPatriciaTrieReadingHelper::followForwardLink() { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { mPos -= mBuffer->getOriginalBufferSize(); mReadingState.mPos -= mBuffer->getOriginalBufferSize(); } const int forwardLinkPosition = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos); DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos); if (usesAdditionalBuffer) { mPos += mBuffer->getOriginalBufferSize(); mReadingState.mPos += mBuffer->getOriginalBufferSize(); } mPosOfLastForwardLinkField = mPos; mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos; if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { // Follow the forward link. mPos += forwardLinkPosition; mReadingState.mPos += forwardLinkPosition; nextNodeArray(); } else { // All node arrays have been read. mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } } Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +119 −47 Original line number Diff line number Diff line Loading @@ -17,6 +17,9 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H #include <cstddef> #include <vector> #include "defines.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" Loading @@ -34,12 +37,31 @@ class DictionaryShortcutsStructurePolicy; */ class DynamicPatriciaTrieReadingHelper { public: class TraversingEventListener { public: virtual ~TraversingEventListener() {}; // Returns whether the event handling was succeeded or not. virtual bool onAscend() = 0; // Returns whether the event handling was succeeded or not. virtual bool onDescend() = 0; // Returns whether the event handling was succeeded or not. virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) = 0; protected: TraversingEventListener() {}; private: DISALLOW_COPY_AND_ASSIGN(TraversingEventListener); }; DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer, const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {} : mIsError(false), mReadingState(), mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {} ~DynamicPatriciaTrieReadingHelper() {} Loading @@ -48,21 +70,21 @@ class DynamicPatriciaTrieReadingHelper { } AK_FORCE_INLINE bool isEnd() const { return mPos == NOT_A_DICT_POS; return mReadingState.mPos == NOT_A_DICT_POS; } // Initialize reading state with the head position of a node array. AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) { if (nodeArrayPos == NOT_A_DICT_POS) { mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } else { mIsError = false; mPos = nodeArrayPos; mNodeCount = 0; mPrevTotalCodePointCount = 0; mTotalNodeCount = 0; mNodeArrayCount = 0; mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPos = nodeArrayPos; mReadingState.mPrevTotalCodePointCount = 0; mReadingState.mTotalNodeCount = 0; mReadingState.mNodeArrayCount = 0; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingStateStack.clear(); nextNodeArray(); if (!isEnd()) { fetchNodeInfo(); Loading @@ -73,15 +95,17 @@ class DynamicPatriciaTrieReadingHelper { // Initialize reading state with the head position of a node. AK_FORCE_INLINE void initWithNodePos(const int nodePos) { if (nodePos == NOT_A_DICT_POS) { mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } else { mIsError = false; mPos = nodePos; mNodeCount = 1; mPrevTotalCodePointCount = 0; mTotalNodeCount = 1; mNodeArrayCount = 1; mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPos = nodePos; mReadingState.mNodeCount = 1; mReadingState.mPrevTotalCodePointCount = 0; mReadingState.mTotalNodeCount = 1; mReadingState.mNodeArrayCount = 1; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; mReadingStateStack.clear(); fetchNodeInfo(); } } Loading @@ -100,12 +124,12 @@ class DynamicPatriciaTrieReadingHelper { // Return code point count exclude the last read node's code points. AK_FORCE_INLINE int getPrevTotalCodePointCount() const { return mPrevTotalCodePointCount; return mReadingState.mPrevTotalCodePointCount; } // Return code point count include the last read node's code points. AK_FORCE_INLINE int getTotalCodePointCount() const { return mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); } AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( Loading @@ -121,9 +145,9 @@ class DynamicPatriciaTrieReadingHelper { } AK_FORCE_INLINE void readNextSiblingNode() { mNodeCount -= 1; mPos = mNodeReader.getSiblingNodePos(); if (mNodeCount <= 0) { mReadingState.mNodeCount -= 1; mReadingState.mPos = mNodeReader.getSiblingNodePos(); if (mReadingState.mNodeCount <= 0) { // All nodes in the current node array have been read. followForwardLink(); if (!isEnd()) { Loading @@ -137,47 +161,64 @@ class DynamicPatriciaTrieReadingHelper { // Read the first child node of the current node. AK_FORCE_INLINE void readChildNode() { if (mNodeReader.hasChildren()) { mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mTotalNodeCount = 0; mNodeArrayCount = 0; mPos = mNodeReader.getChildrenPos(); mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mReadingState.mTotalNodeCount = 0; mReadingState.mNodeArrayCount = 0; mReadingState.mPos = mNodeReader.getChildrenPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; // Read children node array. nextNodeArray(); if (!isEnd()) { fetchNodeInfo(); } } else { mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } } // Read the parent node of the current node. AK_FORCE_INLINE void readParentNode() { if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mTotalNodeCount = 1; mNodeArrayCount = 1; mNodeCount = 1; mPos = mNodeReader.getParentPos(); mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mReadingState.mTotalNodeCount = 1; mReadingState.mNodeArrayCount = 1; mReadingState.mNodeCount = 1; mReadingState.mPos = mNodeReader.getParentPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; fetchNodeInfo(); } else { mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } } AK_FORCE_INLINE int getPosOfLastForwardLinkField() const { return mPosOfLastForwardLinkField; return mReadingState.mPosOfLastForwardLinkField; } AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const { return mReadingState.mPosOfLastPtNodeArrayHead; } AK_FORCE_INLINE void reloadCurrentPtNodeInfo() { if (!isEnd()) { fetchNodeInfo(); } } bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener); private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; class ReadingState { public: // Note that copy constructor and assignment operator are used for this class to use // std::vector. ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {} bool mIsError; int mPos; // Node count of a node array. int mNodeCount; Loading @@ -185,21 +226,52 @@ class DynamicPatriciaTrieReadingHelper { int mTotalNodeCount; int mNodeArrayCount; int mPosOfLastForwardLinkField; int mPosOfLastPtNodeArrayHead; }; static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; static const size_t MAX_READING_STATE_STACK_SIZE; bool mIsError; ReadingState mReadingState; const BufferWithExtendableBuffer *const mBuffer; DynamicPatriciaTrieNodeReader mNodeReader; int mMergedNodeCodePoints[MAX_WORD_LENGTH]; std::vector<ReadingState> mReadingStateStack; void nextNodeArray(); void followForwardLink(); AK_FORCE_INLINE void fetchNodeInfo() { mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mPos, MAX_WORD_LENGTH, mMergedNodeCodePoints); mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mReadingState.mPos, MAX_WORD_LENGTH, mMergedNodeCodePoints); if (mNodeReader.getCodePointCount() <= 0) { // Empty node is not allowed. mIsError = true; mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } } AK_FORCE_INLINE void pushReadingStateToStack() { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { AKLOGI("Reading state stack overflow. Max size: %d", MAX_READING_STATE_STACK_SIZE); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; } else { mReadingStateStack.push_back(mReadingState); } } AK_FORCE_INLINE void popReadingStateFromStack() { if (mReadingStateStack.empty()) { mReadingState.mPos = NOT_A_DICT_POS; } else { mReadingState = mReadingStateStack.back(); mReadingStateStack.pop_back(); fetchNodeInfo(); } } }; Loading Loading
native/jni/Android.mk +1 −0 Original line number Diff line number Diff line Loading @@ -73,6 +73,7 @@ LATIN_IME_CORE_SRC_FILES := \ header/header_read_write_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ dictionary_structure_with_buffer_policy_factory.cpp \ dynamic_patricia_trie_gc_event_listeners.cpp \ dynamic_patricia_trie_node_reader.cpp \ dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_reading_helper.cpp \ Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp 0 → 100644 +48 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" namespace latinime { bool DynamicPatriciaTrieGcEventListeners ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) { // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // children. bool isUselessPtNode = !node->isTerminal(); if (mChildrenValue > 0) { isUselessPtNode = false; } else if (node->isTerminal()) { // Remove children as all children are useless. int writingPos = node->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) { return false; } } if (isUselessPtNode) { // Current PtNode is no longer needed. Mark it as deleted. if (!mWritingHelper->markNodeAsDeleted(node)) { return false; } } else { valueStack.back() += 1; } return true; } } // namespace latinime
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h 0 → 100644 +75 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H #include <vector> #include "defines.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" namespace latinime { class DynamicPatriciaTrieGcEventListeners { public: // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or // not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC. // TODO: Concatenate non-terminal PtNodes. class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted( DynamicPatriciaTrieWritingHelper *const writingHelper, BufferWithExtendableBuffer *const buffer) : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(), mChildrenValue(0) {} ~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {}; bool onAscend() { if (valueStack.empty()) { return false; } mChildrenValue = valueStack.back(); valueStack.pop_back(); return true; } bool onDescend() { valueStack.push_back(0); return true; } bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node); private: DISALLOW_IMPLICIT_CONSTRUCTORS( ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted); DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBuffer; std::vector<int> valueStack; int mChildrenValue; }; private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +69 −20 Original line number Diff line number Diff line Loading @@ -23,36 +23,85 @@ namespace latinime { // To avoid infinite loop caused by invalid or malicious forward links. const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner( TraversingEventListener *const listener) { bool alreadyVisitedChildren = false; // Descend from the root to the root PtNode array. if (!listener->onDescend()) { return false; } while (!isEnd()) { if (!alreadyVisitedChildren) { if (mNodeReader.hasChildren()) { // Move to the first child. if (!listener->onDescend()) { return false; } pushReadingStateToStack(); readChildNode(); } else { alreadyVisitedChildren = true; } } else { if (!listener->onVisitingPtNode(&mNodeReader)) { return false; } readNextSiblingNode(); if (isEnd()) { // All PtNodes in current linked PtNode arrays have been visited. // Return to the parent. if (!listener->onAscend()) { return false; } popReadingStateFromStack(); alreadyVisitedChildren = true; } else { // Process sibling PtNode. alreadyVisitedChildren = false; } } } // Ascend from the root PtNode array to the root. if (!listener->onAscend()) { return false; } return !isError(); } // Read node array size and process empty node arrays. Nodes and arrays are counted up in this // method to avoid an infinite loop. void DynamicPatriciaTrieReadingHelper::nextNodeArray() { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos); mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { mPos -= mBuffer->getOriginalBufferSize(); mReadingState.mPos -= mBuffer->getOriginalBufferSize(); } mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, &mPos); mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( dictBuf, &mReadingState.mPos); if (usesAdditionalBuffer) { mPos += mBuffer->getOriginalBufferSize(); mReadingState.mPos += mBuffer->getOriginalBufferSize(); } // Count up nodes and node arrays to avoid infinite loop. mTotalNodeCount += mNodeCount; mNodeArrayCount++; if (mNodeCount < 0 || mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP || mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { mReadingState.mTotalNodeCount += mReadingState.mNodeCount; mReadingState.mNodeArrayCount++; if (mReadingState.mNodeCount < 0 || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { // Invalid dictionary. AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d" "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d", mNodeCount, mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); mReadingState.mNodeCount, mReadingState.mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); ASSERT(false); mIsError = true; mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; return; } if (mNodeCount == 0) { if (mReadingState.mNodeCount == 0) { // Empty node array. Try following forward link. followForwardLink(); } Loading @@ -60,24 +109,24 @@ void DynamicPatriciaTrieReadingHelper::nextNodeArray() { // Follow the forward link and read the next node array if exists. void DynamicPatriciaTrieReadingHelper::followForwardLink() { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { mPos -= mBuffer->getOriginalBufferSize(); mReadingState.mPos -= mBuffer->getOriginalBufferSize(); } const int forwardLinkPosition = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos); DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos); if (usesAdditionalBuffer) { mPos += mBuffer->getOriginalBufferSize(); mReadingState.mPos += mBuffer->getOriginalBufferSize(); } mPosOfLastForwardLinkField = mPos; mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos; if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { // Follow the forward link. mPos += forwardLinkPosition; mReadingState.mPos += forwardLinkPosition; nextNodeArray(); } else { // All node arrays have been read. mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } } Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +119 −47 Original line number Diff line number Diff line Loading @@ -17,6 +17,9 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H #include <cstddef> #include <vector> #include "defines.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" Loading @@ -34,12 +37,31 @@ class DictionaryShortcutsStructurePolicy; */ class DynamicPatriciaTrieReadingHelper { public: class TraversingEventListener { public: virtual ~TraversingEventListener() {}; // Returns whether the event handling was succeeded or not. virtual bool onAscend() = 0; // Returns whether the event handling was succeeded or not. virtual bool onDescend() = 0; // Returns whether the event handling was succeeded or not. virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) = 0; protected: TraversingEventListener() {}; private: DISALLOW_COPY_AND_ASSIGN(TraversingEventListener); }; DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer, const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {} : mIsError(false), mReadingState(), mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {} ~DynamicPatriciaTrieReadingHelper() {} Loading @@ -48,21 +70,21 @@ class DynamicPatriciaTrieReadingHelper { } AK_FORCE_INLINE bool isEnd() const { return mPos == NOT_A_DICT_POS; return mReadingState.mPos == NOT_A_DICT_POS; } // Initialize reading state with the head position of a node array. AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) { if (nodeArrayPos == NOT_A_DICT_POS) { mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } else { mIsError = false; mPos = nodeArrayPos; mNodeCount = 0; mPrevTotalCodePointCount = 0; mTotalNodeCount = 0; mNodeArrayCount = 0; mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPos = nodeArrayPos; mReadingState.mPrevTotalCodePointCount = 0; mReadingState.mTotalNodeCount = 0; mReadingState.mNodeArrayCount = 0; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingStateStack.clear(); nextNodeArray(); if (!isEnd()) { fetchNodeInfo(); Loading @@ -73,15 +95,17 @@ class DynamicPatriciaTrieReadingHelper { // Initialize reading state with the head position of a node. AK_FORCE_INLINE void initWithNodePos(const int nodePos) { if (nodePos == NOT_A_DICT_POS) { mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } else { mIsError = false; mPos = nodePos; mNodeCount = 1; mPrevTotalCodePointCount = 0; mTotalNodeCount = 1; mNodeArrayCount = 1; mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPos = nodePos; mReadingState.mNodeCount = 1; mReadingState.mPrevTotalCodePointCount = 0; mReadingState.mTotalNodeCount = 1; mReadingState.mNodeArrayCount = 1; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; mReadingStateStack.clear(); fetchNodeInfo(); } } Loading @@ -100,12 +124,12 @@ class DynamicPatriciaTrieReadingHelper { // Return code point count exclude the last read node's code points. AK_FORCE_INLINE int getPrevTotalCodePointCount() const { return mPrevTotalCodePointCount; return mReadingState.mPrevTotalCodePointCount; } // Return code point count include the last read node's code points. AK_FORCE_INLINE int getTotalCodePointCount() const { return mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); } AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( Loading @@ -121,9 +145,9 @@ class DynamicPatriciaTrieReadingHelper { } AK_FORCE_INLINE void readNextSiblingNode() { mNodeCount -= 1; mPos = mNodeReader.getSiblingNodePos(); if (mNodeCount <= 0) { mReadingState.mNodeCount -= 1; mReadingState.mPos = mNodeReader.getSiblingNodePos(); if (mReadingState.mNodeCount <= 0) { // All nodes in the current node array have been read. followForwardLink(); if (!isEnd()) { Loading @@ -137,47 +161,64 @@ class DynamicPatriciaTrieReadingHelper { // Read the first child node of the current node. AK_FORCE_INLINE void readChildNode() { if (mNodeReader.hasChildren()) { mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mTotalNodeCount = 0; mNodeArrayCount = 0; mPos = mNodeReader.getChildrenPos(); mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mReadingState.mTotalNodeCount = 0; mReadingState.mNodeArrayCount = 0; mReadingState.mPos = mNodeReader.getChildrenPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; // Read children node array. nextNodeArray(); if (!isEnd()) { fetchNodeInfo(); } } else { mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } } // Read the parent node of the current node. AK_FORCE_INLINE void readParentNode() { if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mTotalNodeCount = 1; mNodeArrayCount = 1; mNodeCount = 1; mPos = mNodeReader.getParentPos(); mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mReadingState.mTotalNodeCount = 1; mReadingState.mNodeArrayCount = 1; mReadingState.mNodeCount = 1; mReadingState.mPos = mNodeReader.getParentPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; fetchNodeInfo(); } else { mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } } AK_FORCE_INLINE int getPosOfLastForwardLinkField() const { return mPosOfLastForwardLinkField; return mReadingState.mPosOfLastForwardLinkField; } AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const { return mReadingState.mPosOfLastPtNodeArrayHead; } AK_FORCE_INLINE void reloadCurrentPtNodeInfo() { if (!isEnd()) { fetchNodeInfo(); } } bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener); private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; class ReadingState { public: // Note that copy constructor and assignment operator are used for this class to use // std::vector. ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {} bool mIsError; int mPos; // Node count of a node array. int mNodeCount; Loading @@ -185,21 +226,52 @@ class DynamicPatriciaTrieReadingHelper { int mTotalNodeCount; int mNodeArrayCount; int mPosOfLastForwardLinkField; int mPosOfLastPtNodeArrayHead; }; static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; static const size_t MAX_READING_STATE_STACK_SIZE; bool mIsError; ReadingState mReadingState; const BufferWithExtendableBuffer *const mBuffer; DynamicPatriciaTrieNodeReader mNodeReader; int mMergedNodeCodePoints[MAX_WORD_LENGTH]; std::vector<ReadingState> mReadingStateStack; void nextNodeArray(); void followForwardLink(); AK_FORCE_INLINE void fetchNodeInfo() { mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mPos, MAX_WORD_LENGTH, mMergedNodeCodePoints); mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mReadingState.mPos, MAX_WORD_LENGTH, mMergedNodeCodePoints); if (mNodeReader.getCodePointCount() <= 0) { // Empty node is not allowed. mIsError = true; mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS; } } AK_FORCE_INLINE void pushReadingStateToStack() { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { AKLOGI("Reading state stack overflow. Max size: %d", MAX_READING_STATE_STACK_SIZE); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; } else { mReadingStateStack.push_back(mReadingState); } } AK_FORCE_INLINE void popReadingStateFromStack() { if (mReadingStateStack.empty()) { mReadingState.mPos = NOT_A_DICT_POS; } else { mReadingState = mReadingStateStack.back(); mReadingStateStack.pop_back(); fetchNodeInfo(); } } }; Loading