Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 649d040c authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Rename CharGroup to PtNode in native code."

parents 800225e0 27b12933
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
    int nextPos = dicNode->getChildrenPos();
    int totalChildCount = 0;
    do {
        const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
        const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
                mDictRoot, &nextPos);
        totalChildCount += childCount;
        if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
@@ -131,7 +131,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
        bool foundMatchedNode = false;
        int totalChildCount = 0;
        do {
            const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
            const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
                    mDictRoot, &pos);
            totalChildCount += childCount;
            if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
@@ -179,7 +179,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
            if (foundMatchedNode) {
                break;
            }
            // If the matched node is not found in the current node group, try to follow the
            // If the matched node is not found in the current PtNode array, try to follow the
            // forward link.
            pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
                    mDictRoot, pos);
+56 −56
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
        return;
    }
    int nextPos = dicNode->getChildrenPos();
    const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
    const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
            mDictRoot, &nextPos);
    for (int i = 0; i < childCount; i++) {
        nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
@@ -40,15 +40,15 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// This retrieves code points and the probability of the word by its terminal position.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
// it is possible to check for this with advantageous complexity. For each node, we search
// for groups with children and compare the children position with the position we look for.
// for PtNodes with children and compare the children position with the position we look for.
// When we shoot the position we look for, it means the word we look for is in the children
// of the previous group. The only tricky part is the fact that if we arrive at the end of a
// node with the last group's children position still less than what we are searching for, we
// must descend the last group's children (for example, if the word we are searching for starts
// with a z, it's the last group of the root node, so all children addresses will be smaller
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
// PtNode array with the last PtNode's children position still less than what we are searching for,
// we must descend the last PtNode's children (for example, if the word we are searching for starts
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
// than the position we look for, and we have to descend the z node).
/* Parameters :
 * nodePos: the byte position of the terminal chargroup of the word we are searching for (this is
 * nodePos: the byte position of the terminal PtNode of the word we are searching for (this is
 *   what is stored as the "bigram position" in each bigram)
 * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
 * outUnigramProbability: a pointer to an int to write the probability into.
@@ -60,18 +60,18 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
        int *const outUnigramProbability) const {
    int pos = getRootPosition();
    int wordPos = 0;
    // One iteration of the outer loop iterates through nodes. As stated above, we will only
    // traverse nodes that are actually a part of the terminal we are searching, so each time
    // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
    // only traverse nodes that are actually a part of the terminal we are searching, so each time
    // we enter this loop we are one depth level further than last time.
    // The only reason we count nodes is because we want to reduce the probability of infinite
    // looping in case there is a bug. Since we know there is an upper bound to the depth we are
    // supposed to traverse, it does not hurt to count iterations.
    for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
        int lastCandidateGroupPos = 0;
        // Let's loop through char groups in this node searching for either the terminal
        int lastCandidatePtNodePos = 0;
        // Let's loop through PtNodes in this PtNode array searching for either the terminal
        // or one of its ascendants.
        for (int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
                mDictRoot, &pos); charGroupCount > 0; --charGroupCount) {
        for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
                mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
            const int startPos = pos;
            const PatriciaTrieReadingUtils::NodeFlags flags =
                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
@@ -98,7 +98,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
                                &pos);
                return ++wordPos;
            }
            // We need to skip past this char group, so skip any remaining code points after the
            // We need to skip past this PtNode, so skip any remaining code points after the
            // first and possibly the probability.
            if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
@@ -106,8 +106,8 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
            if (PatriciaTrieReadingUtils::isTerminal(flags)) {
                PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
            }
            // The fact that this group has children is very important. Since we already know
            // that this group does not match, if it has no children we know it is irrelevant
            // The fact that this PtNode has children is very important. Since we already know
            // that this PtNode does not match, if it has no children we know it is irrelevant
            // to what we are searching for.
            const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags);
            // We will write in `found' whether we have passed the children position we are
@@ -122,45 +122,45 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
                        ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
                if (childrenPos > nodePos) {
                    // If the children pos is greater than the position, it means the previous
                    // chargroup, which position is stored in lastCandidateGroupPos, was the right
                    // PtNode, which position is stored in lastCandidatePtNodePos, was the right
                    // one.
                    found = true;
                } else if (1 >= charGroupCount) {
                    // However if we are on the LAST group of this node, and we have NOT shot the
                    // position we should descend THIS node. So we trick the lastCandidateGroupPos
                    // so that we will descend this node, not the previous one.
                    lastCandidateGroupPos = startPos;
                } else if (1 >= ptNodeCount) {
                    // However if we are on the LAST PtNode of this array, and we have NOT shot the
                    // position we should descend THIS node. So we trick the lastCandidatePtNodePos
                    // so that we will descend this PtNode, not the previous one.
                    lastCandidatePtNodePos = startPos;
                    found = true;
                } else {
                    // Else, we should continue looking.
                    found = false;
                }
            } else {
                // Even if we don't have children here, we could still be on the last group of this
                // node. If this is the case, we should descend the last group that had children,
                // and their position is already in lastCandidateGroup.
                found = (1 >= charGroupCount);
                // Even if we don't have children here, we could still be on the last PtNode of /
                // this array. If this is the case, we should descend the last PtNode that had
                // children, and their position is already in lastCandidatePtNodePos.
                found = (1 >= ptNodeCount);
            }

            if (found) {
                // Okay, we found the group we should descend. Its position is in
                // the lastCandidateGroupPos variable, so we just re-read it.
                if (0 != lastCandidateGroupPos) {
                // Okay, we found the PtNode we should descend. Its position is in
                // the lastCandidatePtNodePos variable, so we just re-read it.
                if (0 != lastCandidatePtNodePos) {
                    const PatriciaTrieReadingUtils::NodeFlags lastFlags =
                            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
                                    mDictRoot, &lastCandidateGroupPos);
                                    mDictRoot, &lastCandidatePtNodePos);
                    const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                            mDictRoot, &lastCandidateGroupPos);
                    // We copy all the characters in this group to the buffer
                            mDictRoot, &lastCandidatePtNodePos);
                    // We copy all the characters in this PtNode to the buffer
                    outCodePoints[wordPos] = lastChar;
                    if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
                        int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                                mDictRoot, &lastCandidateGroupPos);
                                mDictRoot, &lastCandidatePtNodePos);
                        int charCount = maxCodePointCount;
                        while (-1 != nextChar && --charCount > 0) {
                            outCodePoints[++wordPos] = nextChar;
                            nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                                    mDictRoot, &lastCandidateGroupPos);
                                    mDictRoot, &lastCandidatePtNodePos);
                        }
                    }
                    ++wordPos;
@@ -168,19 +168,19 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
                    // it's there, read pos, and break to resume the search at pos.
                    if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
                                &lastCandidateGroupPos);
                                &lastCandidatePtNodePos);
                    }
                    pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                            mDictRoot, lastFlags, &lastCandidateGroupPos);
                            mDictRoot, lastFlags, &lastCandidatePtNodePos);
                    break;
                } else {
                    // Here is a little tricky part: we come here if we found out that all children
                    // addresses in this group are bigger than the address we are searching for.
                    // addresses in this PtNode are bigger than the address we are searching for.
                    // Should we conclude the word is not in the dictionary? No! It could still be
                    // one of the remaining chargroups in this node, so we have to keep looking in
                    // this node until we find it (or we realize it's not there either, in which
                    // case it's actually not in the dictionary). Pass the end of this group, ready
                    // to start the next one.
                    // one of the remaining PtNodes in this array, so we have to keep looking in
                    // this array until we find it (or we realize it's not there either, in which
                    // case it's actually not in the dictionary). Pass the end of this PtNode,
                    // ready to start the next one.
                    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                                mDictRoot, flags, &pos);
@@ -195,9 +195,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
            } else {
                // If we did not find it, we should record the last children address for the next
                // iteration.
                if (hasChildren) lastCandidateGroupPos = startPos;
                // Now skip the end of this group (children pos and the attributes if any) so that
                // our pos is after the end of this char group, at the start of the next one.
                if (hasChildren) lastCandidatePtNodePos = startPos;
                // Now skip the end of this PtNode (children pos and the attributes if any) so that
                // our pos is after the end of this PtNode, at the start of the next one.
                if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                    PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                            mDictRoot, flags, &pos);
@@ -212,7 +212,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(

        }
    }
    // If we have looked through all the chargroups and found no match, the nodePos is
    // If we have looked through all the PtNodes and found no match, the nodePos is
    // not the position of a terminal in this dictionary.
    return 0;
}
@@ -228,24 +228,24 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
        // If we already traversed the tree further than the word is long, there means
        // there was no match (or we would have found it).
        if (wordPos >= length) return NOT_A_VALID_WORD_POS;
        int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(mDictRoot,
        int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
                &pos);
        const int wChar = forceLowerCaseSearch
                ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
        while (true) {
            // If there are no more character groups in this node, it means we could not
            // If there are no more PtNodes in this array, it means we could not
            // find a matching character for this depth, therefore there is no match.
            if (0 >= charGroupCount) return NOT_A_VALID_WORD_POS;
            const int charGroupPos = pos;
            if (0 >= ptNodeCount) return NOT_A_VALID_WORD_POS;
            const int ptNodePos = pos;
            const PatriciaTrieReadingUtils::NodeFlags flags =
                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
            int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
                    &pos);
            if (character == wChar) {
                // This is the correct node. Only one character group may start with the same
                // char within a node, so either we found our match in this node, or there is
                // This is the correct PtNode. Only one PtNode may start with the same char within
                // a PtNode array, so either we found our match in this array, or there is
                // no match and we can return NOT_A_VALID_WORD_POS. So we will check all the
                // characters in this character group indeed does match.
                // characters in this PtNode indeed does match.
                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                    character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
                            &pos);
@@ -253,7 +253,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
                        ++wordPos;
                        // If we shoot the length of the word we search for, or if we find a single
                        // character that does not match, as explained above, it means the word is
                        // not in the dictionary (by virtue of this chargroup being the only one to
                        // not in the dictionary (by virtue of this PtNode being the only one to
                        // match the word on the first character, but not matching the whole word).
                        if (wordPos >= length) return NOT_A_VALID_WORD_POS;
                        if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS;
@@ -268,7 +268,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
                ++wordPos;
                if (PatriciaTrieReadingUtils::isTerminal(flags)) {
                    if (wordPos == length) {
                        return charGroupPos;
                        return ptNodePos;
                    }
                    PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
                }
@@ -282,7 +282,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
                        flags, &pos);
                break;
            } else {
                // This chargroup does not match, so skip the remaining part and go to the next.
                // This PtNode does not match, so skip the remaining part and go to the next.
                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
                            &pos);
@@ -301,7 +301,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
                    mBigramListPolicy.skipAllBigrams(&pos);
                }
            }
            --charGroupCount;
            --ptNodeCount;
        }
    }
}
+10 −10
Original line number Diff line number Diff line
@@ -23,15 +23,15 @@ namespace latinime {

typedef PatriciaTrieReadingUtils PtReadingUtils;

const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0;

// Flag for single/multiple char group
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20;
// Flag for terminal groups
// Flag for terminal PtNodes
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10;
// Flag for shortcut targets presence
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08;
@@ -46,14 +46,14 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
        const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
    const int base = *pos;
    int offset = 0;
    switch (MASK_GROUP_ADDRESS_TYPE & flags) {
        case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
    switch (MASK_CHILDREN_POSITION_TYPE & flags) {
        case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE:
            offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
            break;
        case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
        case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES:
            offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
            break;
        case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
        case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES:
            offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
            break;
        default:
+7 −7
Original line number Diff line number Diff line
@@ -28,7 +28,7 @@ class PatriciaTrieReadingUtils {
 public:
    typedef uint8_t NodeFlags;

    static AK_FORCE_INLINE int getGroupCountAndAdvancePosition(
    static AK_FORCE_INLINE int getPtNodeArraySizeAndAdvancePosition(
            const uint8_t *const buffer, int *const pos) {
        const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
        if (firstByte < 0x80) {
@@ -116,17 +116,17 @@ class PatriciaTrieReadingUtils {
    }

    static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
        return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags);
        return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
    }

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);

    static const NodeFlags MASK_GROUP_ADDRESS_TYPE;
    static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS;
    static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
    static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
    static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
    static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
    static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;

    static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
    static const NodeFlags FLAG_IS_TERMINAL;