Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit be611705 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Create Ver2PtNodeArrayReader.

Bug: 12810574
Change-Id: I7708d24d735680b2fe9e6700316076018e88c98d
parent cf2b0751
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \
    $(addprefix suggest/policyimpl/dictionary/structure/v2/, \
        patricia_trie_policy.cpp \
        patricia_trie_reading_utils.cpp \
        ver2_patricia_trie_node_reader.cpp) \
        ver2_patricia_trie_node_reader.cpp \
        ver2_pt_node_array_reader.cpp) \
    $(addprefix suggest/policyimpl/dictionary/structure/v4/, \
        ver4_dict_buffers.cpp \
        ver4_dict_constants.cpp \
+4 −83
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"

@@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
        const int length, const bool forceLowerCaseSearch) const {
    int pos = getRootPosition();
    int wordPos = 0;

    while (true) {
        // If we already traversed the tree further than the word is long, there means
        // there was no match (or we would have found it).
        if (wordPos >= length) return NOT_A_DICT_POS;
        int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
                &pos);
        const int wChar = forceLowerCaseSearch
                ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
        while (true) {
            // If there are no more PtNodes in this array, it means we could not
            // find a matching character for this depth, therefore there is no match.
            if (0 >= ptNodeCount) return NOT_A_DICT_POS;
            const int ptNodePos = pos;
            const PatriciaTrieReadingUtils::NodeFlags flags =
                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
            int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
                    &pos);
            if (character == wChar) {
                // This is the correct PtNode. Only one PtNode may start with the same char within
                // a PtNode array, so either we found our match in this array, or there is
                // no match and we can return NOT_A_DICT_POS. So we will check all the
                // characters in this PtNode indeed does match.
                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                    character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
                            &pos);
                    while (NOT_A_CODE_POINT != character) {
                        ++wordPos;
                        // If we shoot the length of the word we search for, or if we find a single
                        // character that does not match, as explained above, it means the word is
                        // not in the dictionary (by virtue of this PtNode being the only one to
                        // match the word on the first character, but not matching the whole word).
                        if (wordPos >= length) return NOT_A_DICT_POS;
                        if (inWord[wordPos] != character) return NOT_A_DICT_POS;
                        character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                                mDictRoot, &pos);
                    }
                }
                // If we come here we know that so far, we do match. Either we are on a terminal
                // and we match the length, in which case we found it, or we traverse children.
                // If we don't match the length AND don't have children, then a word in the
                // dictionary fully matches a prefix of the searched word but not the full word.
                ++wordPos;
                if (PatriciaTrieReadingUtils::isTerminal(flags)) {
                    if (wordPos == length) {
                        return ptNodePos;
                    }
                    PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
                }
                if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                    return NOT_A_DICT_POS;
                }
                // We have children and we are still shorter than the word we are searching for, so
                // we need to traverse children. Put the pointer on the children position, and
                // break
                pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
                        flags, &pos);
                break;
            } else {
                // This PtNode does not match, so skip the remaining part and go to the next.
                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
                            &pos);
                }
                if (PatriciaTrieReadingUtils::isTerminal(flags)) {
                    PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
                }
                if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                    PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
                            flags, &pos);
                }
                if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
                    mShortcutListPolicy.skipAllShortcuts(&pos);
                }
                if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
                    mBigramListPolicy.skipAllBigrams(&pos);
                }
            }
            --ptNodeCount;
        }
    }
    DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
    readingHelper.initWithPtNodeArrayPos(getRootPosition());
    return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}

int PatriciaTriePolicy::getProbability(const int unigramProbability,
+4 −1
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"

@@ -42,7 +43,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
              mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
                      - mHeaderPolicy.getSize()),
              mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {}
              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
              mPtNodeArrayReader(mDictRoot, mDictBufferSize) {}

    AK_FORCE_INLINE int getRootPosition() const {
        return 0;
@@ -146,6 +148,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    const BigramListPolicy mBigramListPolicy;
    const ShortcutListPolicy mShortcutListPolicy;
    const Ver2ParticiaTrieNodeReader mPtNodeReader;
    const Ver2PtNodeArrayReader mPtNodeArrayReader;

    int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
            DicNodeVector *const childDicNodes) const;
+54 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2014, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"

#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"

namespace latinime {

bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
        int *const outPtNodeCount, int *const outFirstPtNodePos) const {
    if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) {
        // Reading invalid position because of a bug or a broken dictionary.
        AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
                ptNodeArrayPos, mDictSize);
        ASSERT(false);
        return false;
    }
    int readingPos = ptNodeArrayPos;
    const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
            mDictBuffer, &readingPos);
    *outPtNodeCount = ptNodeCountInArray;
    *outFirstPtNodePos = readingPos;
    return true;
}

bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
        int *const outNextPtNodeArrayPos) const {
    if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) {
        // Reading invalid position because of bug or broken dictionary.
        AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
                forwordLinkPos, mDictSize);
        ASSERT(false);
        return false;
    }
    // Ver2 dicts don't have forward links.
    *outNextPtNodeArrayPos = NOT_A_DICT_POS;
    return true;
}

} // namespace latinime
+44 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2014, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H
#define LATINIME_VER2_PT_NODE_ARRAY_READER_H

#include <stdint.h>

#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"

namespace latinime {

class Ver2PtNodeArrayReader : public PtNodeArrayReader {
 public:
    Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize)
            : mDictBuffer(dictBuffer), mDictSize(dictSize) {};

    virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
            int *const outPtNodeCount, int *const outFirstPtNodePos) const;
    virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
            int *const outNextPtNodeArrayPos) const;

 private:
    DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader);

    const uint8_t *const mDictBuffer;
    const int mDictSize;
};
} // namespace latinime
#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */