Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 702bd17d authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android Git Automerger
Browse files

am 225745d4: Merge "Step 1 to implement GC. Finding garbage PtNodes."

* commit '225745d4':
  Step 1 to implement GC. Finding garbage PtNodes.
parents e6c6df90 225745d4
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -73,6 +73,7 @@ LATIN_IME_CORE_SRC_FILES := \
        header/header_read_write_utils.cpp \
        shortcut/shortcut_list_reading_utils.cpp \
        dictionary_structure_with_buffer_policy_factory.cpp \
        dynamic_patricia_trie_gc_event_listeners.cpp \
        dynamic_patricia_trie_node_reader.cpp \
        dynamic_patricia_trie_policy.cpp \
        dynamic_patricia_trie_reading_helper.cpp \
+48 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"

namespace latinime {

bool DynamicPatriciaTrieGcEventListeners
        ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
                ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) {
    // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
    // children.
    bool isUselessPtNode = !node->isTerminal();
    if (mChildrenValue > 0) {
        isUselessPtNode = false;
    } else if (node->isTerminal()) {
        // Remove children as all children are useless.
        int writingPos = node->getChildrenPosFieldPos();
        if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
                mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
            return false;
        }
    }
    if (isUselessPtNode) {
        // Current PtNode is no longer needed. Mark it as deleted.
        if (!mWritingHelper->markNodeAsDeleted(node)) {
            return false;
        }
    } else {
        valueStack.back() += 1;
    }
    return true;
}

} // namespace latinime
+75 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H

#include <vector>

#include "defines.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"

namespace latinime {

class DynamicPatriciaTrieGcEventListeners {
 public:
    // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
    // not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
    // TODO: Concatenate non-terminal PtNodes.
    class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
        : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
     public:
        ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
                DynamicPatriciaTrieWritingHelper *const writingHelper,
                BufferWithExtendableBuffer *const buffer)
                : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
                  mChildrenValue(0) {}

        ~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {};

        bool onAscend() {
            if (valueStack.empty()) {
                return false;
            }
            mChildrenValue = valueStack.back();
            valueStack.pop_back();
            return true;
        }

        bool onDescend() {
            valueStack.push_back(0);
            return true;
        }

        bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node);

     private:
        DISALLOW_IMPLICIT_CONSTRUCTORS(
                ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted);

        DynamicPatriciaTrieWritingHelper *const mWritingHelper;
        BufferWithExtendableBuffer *const mBuffer;
        std::vector<int> valueStack;
        int mChildrenValue;
    };

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */
+69 −20
Original line number Diff line number Diff line
@@ -23,36 +23,85 @@ namespace latinime {
// To avoid infinite loop caused by invalid or malicious forward links.
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;

bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
        TraversingEventListener *const listener) {
    bool alreadyVisitedChildren = false;
    // Descend from the root to the root PtNode array.
    if (!listener->onDescend()) {
        return false;
    }
    while (!isEnd()) {
        if (!alreadyVisitedChildren) {
            if (mNodeReader.hasChildren()) {
                // Move to the first child.
                if (!listener->onDescend()) {
                    return false;
                }
                pushReadingStateToStack();
                readChildNode();
            } else {
                alreadyVisitedChildren = true;
            }
        } else {
            if (!listener->onVisitingPtNode(&mNodeReader)) {
                return false;
            }
            readNextSiblingNode();
            if (isEnd()) {
                // All PtNodes in current linked PtNode arrays have been visited.
                // Return to the parent.
                if (!listener->onAscend()) {
                    return false;
                }
                popReadingStateFromStack();
                alreadyVisitedChildren = true;
            } else {
                // Process sibling PtNode.
                alreadyVisitedChildren = false;
            }
        }
    }
    // Ascend from the root PtNode array to the root.
    if (!listener->onAscend()) {
        return false;
    }
    return !isError();
}

// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
    const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
    mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
    const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
    const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
    if (usesAdditionalBuffer) {
        mPos -= mBuffer->getOriginalBufferSize();
        mReadingState.mPos -= mBuffer->getOriginalBufferSize();
    }
    mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
            &mPos);
    mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
            dictBuf, &mReadingState.mPos);
    if (usesAdditionalBuffer) {
        mPos += mBuffer->getOriginalBufferSize();
        mReadingState.mPos += mBuffer->getOriginalBufferSize();
    }
    // Count up nodes and node arrays to avoid infinite loop.
    mTotalNodeCount += mNodeCount;
    mNodeArrayCount++;
    if (mNodeCount < 0 || mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
            || mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
    mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
    mReadingState.mNodeArrayCount++;
    if (mReadingState.mNodeCount < 0
            || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
            || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
        // Invalid dictionary.
        AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
                "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
                mNodeCount, mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
                mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
                mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
                MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
                MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
        ASSERT(false);
        mIsError = true;
        mPos = NOT_A_DICT_POS;
        mReadingState.mPos = NOT_A_DICT_POS;
        return;
    }
    if (mNodeCount == 0) {
    if (mReadingState.mNodeCount == 0) {
        // Empty node array. Try following forward link.
        followForwardLink();
    }
@@ -60,24 +109,24 @@ void DynamicPatriciaTrieReadingHelper::nextNodeArray() {

// Follow the forward link and read the next node array if exists.
void DynamicPatriciaTrieReadingHelper::followForwardLink() {
    const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
    const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
    const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
    if (usesAdditionalBuffer) {
        mPos -= mBuffer->getOriginalBufferSize();
        mReadingState.mPos -= mBuffer->getOriginalBufferSize();
    }
    const int forwardLinkPosition =
            DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos);
            DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
    if (usesAdditionalBuffer) {
        mPos += mBuffer->getOriginalBufferSize();
        mReadingState.mPos += mBuffer->getOriginalBufferSize();
    }
    mPosOfLastForwardLinkField = mPos;
    mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
    if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
        // Follow the forward link.
        mPos += forwardLinkPosition;
        mReadingState.mPos += forwardLinkPosition;
        nextNodeArray();
    } else {
        // All node arrays have been read.
        mPos = NOT_A_DICT_POS;
        mReadingState.mPos = NOT_A_DICT_POS;
    }
}

+119 −47
Original line number Diff line number Diff line
@@ -17,6 +17,9 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H

#include <cstddef>
#include <vector>

#include "defines.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
@@ -34,12 +37,31 @@ class DictionaryShortcutsStructurePolicy;
 */
class DynamicPatriciaTrieReadingHelper {
 public:
    class TraversingEventListener {
     public:
        virtual ~TraversingEventListener() {};

        // Returns whether the event handling was succeeded or not.
        virtual bool onAscend() = 0;

        // Returns whether the event handling was succeeded or not.
        virtual bool onDescend() = 0;

        // Returns whether the event handling was succeeded or not.
        virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) = 0;

     protected:
        TraversingEventListener() {};

     private:
        DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
    };

    DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
            const DictionaryBigramsStructurePolicy *const bigramsPolicy,
            const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
            : mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
              mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
              mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
            : mIsError(false), mReadingState(), mBuffer(buffer),
              mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}

    ~DynamicPatriciaTrieReadingHelper() {}

@@ -48,21 +70,21 @@ class DynamicPatriciaTrieReadingHelper {
    }

    AK_FORCE_INLINE bool isEnd() const {
        return mPos == NOT_A_DICT_POS;
        return mReadingState.mPos == NOT_A_DICT_POS;
    }

    // Initialize reading state with the head position of a node array.
    AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
        if (nodeArrayPos == NOT_A_DICT_POS) {
            mPos = NOT_A_DICT_POS;
            mReadingState.mPos = NOT_A_DICT_POS;
        } else {
            mIsError = false;
            mPos = nodeArrayPos;
            mNodeCount = 0;
            mPrevTotalCodePointCount = 0;
            mTotalNodeCount = 0;
            mNodeArrayCount = 0;
            mPosOfLastForwardLinkField = NOT_A_DICT_POS;
            mReadingState.mPos = nodeArrayPos;
            mReadingState.mPrevTotalCodePointCount = 0;
            mReadingState.mTotalNodeCount = 0;
            mReadingState.mNodeArrayCount = 0;
            mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
            mReadingStateStack.clear();
            nextNodeArray();
            if (!isEnd()) {
                fetchNodeInfo();
@@ -73,15 +95,17 @@ class DynamicPatriciaTrieReadingHelper {
    // Initialize reading state with the head position of a node.
    AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
        if (nodePos == NOT_A_DICT_POS) {
            mPos = NOT_A_DICT_POS;
            mReadingState.mPos = NOT_A_DICT_POS;
        } else {
            mIsError = false;
            mPos = nodePos;
            mNodeCount = 1;
            mPrevTotalCodePointCount = 0;
            mTotalNodeCount = 1;
            mNodeArrayCount = 1;
            mPosOfLastForwardLinkField = NOT_A_DICT_POS;
            mReadingState.mPos = nodePos;
            mReadingState.mNodeCount = 1;
            mReadingState.mPrevTotalCodePointCount = 0;
            mReadingState.mTotalNodeCount = 1;
            mReadingState.mNodeArrayCount = 1;
            mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
            mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
            mReadingStateStack.clear();
            fetchNodeInfo();
        }
    }
@@ -100,12 +124,12 @@ class DynamicPatriciaTrieReadingHelper {

    // Return code point count exclude the last read node's code points.
    AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
        return mPrevTotalCodePointCount;
        return mReadingState.mPrevTotalCodePointCount;
    }

    // Return code point count include the last read node's code points.
    AK_FORCE_INLINE int getTotalCodePointCount() const {
        return mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
        return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
    }

    AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
@@ -121,9 +145,9 @@ class DynamicPatriciaTrieReadingHelper {
    }

    AK_FORCE_INLINE void readNextSiblingNode() {
        mNodeCount -= 1;
        mPos = mNodeReader.getSiblingNodePos();
        if (mNodeCount <= 0) {
        mReadingState.mNodeCount -= 1;
        mReadingState.mPos = mNodeReader.getSiblingNodePos();
        if (mReadingState.mNodeCount <= 0) {
            // All nodes in the current node array have been read.
            followForwardLink();
            if (!isEnd()) {
@@ -137,47 +161,64 @@ class DynamicPatriciaTrieReadingHelper {
    // Read the first child node of the current node.
    AK_FORCE_INLINE void readChildNode() {
        if (mNodeReader.hasChildren()) {
            mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
            mTotalNodeCount = 0;
            mNodeArrayCount = 0;
            mPos = mNodeReader.getChildrenPos();
            mPosOfLastForwardLinkField = NOT_A_DICT_POS;
            mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
            mReadingState.mTotalNodeCount = 0;
            mReadingState.mNodeArrayCount = 0;
            mReadingState.mPos = mNodeReader.getChildrenPos();
            mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
            // Read children node array.
            nextNodeArray();
            if (!isEnd()) {
                fetchNodeInfo();
            }
        } else {
            mPos = NOT_A_DICT_POS;
            mReadingState.mPos = NOT_A_DICT_POS;
        }
    }

    // Read the parent node of the current node.
    AK_FORCE_INLINE void readParentNode() {
        if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
            mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
            mTotalNodeCount = 1;
            mNodeArrayCount = 1;
            mNodeCount = 1;
            mPos = mNodeReader.getParentPos();
            mPosOfLastForwardLinkField = NOT_A_DICT_POS;
            mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
            mReadingState.mTotalNodeCount = 1;
            mReadingState.mNodeArrayCount = 1;
            mReadingState.mNodeCount = 1;
            mReadingState.mPos = mNodeReader.getParentPos();
            mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
            mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
            fetchNodeInfo();
        } else {
            mPos = NOT_A_DICT_POS;
            mReadingState.mPos = NOT_A_DICT_POS;
        }
    }

    AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
        return mPosOfLastForwardLinkField;
        return mReadingState.mPosOfLastForwardLinkField;
    }

    AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
        return mReadingState.mPosOfLastPtNodeArrayHead;
    }

    AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
        if (!isEnd()) {
            fetchNodeInfo();
        }
    }

    bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);

 private:
    DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);

    static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
    static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
    class ReadingState {
     public:
        // Note that copy constructor and assignment operator are used for this class to use
        // std::vector.
        ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
                mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
                mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}

    bool mIsError;
        int mPos;
        // Node count of a node array.
        int mNodeCount;
@@ -185,21 +226,52 @@ class DynamicPatriciaTrieReadingHelper {
        int mTotalNodeCount;
        int mNodeArrayCount;
        int mPosOfLastForwardLinkField;
        int mPosOfLastPtNodeArrayHead;
    };

    static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
    static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
    static const size_t MAX_READING_STATE_STACK_SIZE;

    bool mIsError;
    ReadingState mReadingState;
    const BufferWithExtendableBuffer *const mBuffer;
    DynamicPatriciaTrieNodeReader mNodeReader;
    int mMergedNodeCodePoints[MAX_WORD_LENGTH];
    std::vector<ReadingState> mReadingStateStack;

    void nextNodeArray();

    void followForwardLink();

    AK_FORCE_INLINE void fetchNodeInfo() {
        mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mPos, MAX_WORD_LENGTH,
                mMergedNodeCodePoints);
        mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mReadingState.mPos,
                MAX_WORD_LENGTH, mMergedNodeCodePoints);
        if (mNodeReader.getCodePointCount() <= 0) {
            // Empty node is not allowed.
            mIsError = true;
            mPos = NOT_A_DICT_POS;
            mReadingState.mPos = NOT_A_DICT_POS;
        }
    }

    AK_FORCE_INLINE void pushReadingStateToStack() {
        if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
            AKLOGI("Reading state stack overflow. Max size: %d", MAX_READING_STATE_STACK_SIZE);
            ASSERT(false);
            mIsError = true;
            mReadingState.mPos = NOT_A_DICT_POS;
        } else {
            mReadingStateStack.push_back(mReadingState);
        }
    }

    AK_FORCE_INLINE void popReadingStateFromStack() {
        if (mReadingStateStack.empty()) {
            mReadingState.mPos = NOT_A_DICT_POS;
        } else {
            mReadingState = mReadingStateStack.back();
            mReadingStateStack.pop_back();
            fetchNodeInfo();
        }
    }
};
Loading