Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fd9599e9 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Create Ver2PtNodeArrayReader."

parents f9e5b1ee be611705
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \
    $(addprefix suggest/policyimpl/dictionary/structure/v2/, \
        patricia_trie_policy.cpp \
        patricia_trie_reading_utils.cpp \
        ver2_patricia_trie_node_reader.cpp) \
        ver2_patricia_trie_node_reader.cpp \
        ver2_pt_node_array_reader.cpp) \
    $(addprefix suggest/policyimpl/dictionary/structure/v4/, \
        ver4_dict_buffers.cpp \
        ver4_dict_constants.cpp \
+4 −83
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"

@@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
        const int length, const bool forceLowerCaseSearch) const {
    int pos = getRootPosition();
    int wordPos = 0;

    while (true) {
        // If we already traversed the tree further than the word is long, there means
        // there was no match (or we would have found it).
        if (wordPos >= length) return NOT_A_DICT_POS;
        int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
                &pos);
        const int wChar = forceLowerCaseSearch
                ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
        while (true) {
            // If there are no more PtNodes in this array, it means we could not
            // find a matching character for this depth, therefore there is no match.
            if (0 >= ptNodeCount) return NOT_A_DICT_POS;
            const int ptNodePos = pos;
            const PatriciaTrieReadingUtils::NodeFlags flags =
                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
            int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
                    &pos);
            if (character == wChar) {
                // This is the correct PtNode. Only one PtNode may start with the same char within
                // a PtNode array, so either we found our match in this array, or there is
                // no match and we can return NOT_A_DICT_POS. So we will check all the
                // characters in this PtNode indeed does match.
                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                    character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
                            &pos);
                    while (NOT_A_CODE_POINT != character) {
                        ++wordPos;
                        // If we shoot the length of the word we search for, or if we find a single
                        // character that does not match, as explained above, it means the word is
                        // not in the dictionary (by virtue of this PtNode being the only one to
                        // match the word on the first character, but not matching the whole word).
                        if (wordPos >= length) return NOT_A_DICT_POS;
                        if (inWord[wordPos] != character) return NOT_A_DICT_POS;
                        character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                                mDictRoot, &pos);
                    }
                }
                // If we come here we know that so far, we do match. Either we are on a terminal
                // and we match the length, in which case we found it, or we traverse children.
                // If we don't match the length AND don't have children, then a word in the
                // dictionary fully matches a prefix of the searched word but not the full word.
                ++wordPos;
                if (PatriciaTrieReadingUtils::isTerminal(flags)) {
                    if (wordPos == length) {
                        return ptNodePos;
                    }
                    PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
                }
                if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                    return NOT_A_DICT_POS;
                }
                // We have children and we are still shorter than the word we are searching for, so
                // we need to traverse children. Put the pointer on the children position, and
                // break
                pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
                        flags, &pos);
                break;
            } else {
                // This PtNode does not match, so skip the remaining part and go to the next.
                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
                            &pos);
                }
                if (PatriciaTrieReadingUtils::isTerminal(flags)) {
                    PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
                }
                if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                    PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
                            flags, &pos);
                }
                if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
                    mShortcutListPolicy.skipAllShortcuts(&pos);
                }
                if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
                    mBigramListPolicy.skipAllBigrams(&pos);
                }
            }
            --ptNodeCount;
        }
    }
    DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
    readingHelper.initWithPtNodeArrayPos(getRootPosition());
    return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}

int PatriciaTriePolicy::getProbability(const int unigramProbability,
+4 −1
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"

@@ -42,7 +43,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
              mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
                      - mHeaderPolicy.getSize()),
              mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {}
              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
              mPtNodeArrayReader(mDictRoot, mDictBufferSize) {}

    AK_FORCE_INLINE int getRootPosition() const {
        return 0;
@@ -146,6 +148,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    const BigramListPolicy mBigramListPolicy;
    const ShortcutListPolicy mShortcutListPolicy;
    const Ver2ParticiaTrieNodeReader mPtNodeReader;
    const Ver2PtNodeArrayReader mPtNodeArrayReader;

    int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
            DicNodeVector *const childDicNodes) const;
+54 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2014, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"

#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"

namespace latinime {

bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
        int *const outPtNodeCount, int *const outFirstPtNodePos) const {
    if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) {
        // Reading invalid position because of a bug or a broken dictionary.
        AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
                ptNodeArrayPos, mDictSize);
        ASSERT(false);
        return false;
    }
    int readingPos = ptNodeArrayPos;
    const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
            mDictBuffer, &readingPos);
    *outPtNodeCount = ptNodeCountInArray;
    *outFirstPtNodePos = readingPos;
    return true;
}

bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
        int *const outNextPtNodeArrayPos) const {
    if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) {
        // Reading invalid position because of bug or broken dictionary.
        AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
                forwordLinkPos, mDictSize);
        ASSERT(false);
        return false;
    }
    // Ver2 dicts don't have forward links.
    *outNextPtNodeArrayPos = NOT_A_DICT_POS;
    return true;
}

} // namespace latinime
+44 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2014, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H
#define LATINIME_VER2_PT_NODE_ARRAY_READER_H

#include <stdint.h>

#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"

namespace latinime {

class Ver2PtNodeArrayReader : public PtNodeArrayReader {
 public:
    Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize)
            : mDictBuffer(dictBuffer), mDictSize(dictSize) {};

    virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
            int *const outPtNodeCount, int *const outFirstPtNodePos) const;
    virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
            int *const outNextPtNodeArrayPos) const;

 private:
    DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader);

    const uint8_t *const mDictBuffer;
    const int mDictSize;
};
} // namespace latinime
#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */