Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +3 −3 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d int nextPos = dicNode->getChildrenPos(); int totalChildCount = 0; do { const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &nextPos); totalChildCount += childCount; if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { Loading Loading @@ -131,7 +131,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in bool foundMatchedNode = false; int totalChildCount = 0; do { const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &pos); totalChildCount += childCount; if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { Loading Loading @@ -179,7 +179,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in if (foundMatchedNode) { break; } // If the matched node is not found in the current node group, try to follow the // If the matched node is not found in the current PtNode array, try to follow the // forward link. pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( mDictRoot, pos); Loading native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +56 −56 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, return; } int nextPos = dicNode->getChildrenPos(); const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &nextPos); for (int i = 0; i < childCount; i++) { nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes); Loading @@ -40,15 +40,15 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, // This retrieves code points and the probability of the word by its terminal position. // Due to the fact that words are ordered in the dictionary in a strict breadth-first order, // it is possible to check for this with advantageous complexity. For each node, we search // for groups with children and compare the children position with the position we look for. // for PtNodes with children and compare the children position with the position we look for. // When we shoot the position we look for, it means the word we look for is in the children // of the previous group. The only tricky part is the fact that if we arrive at the end of a // node with the last group's children position still less than what we are searching for, we // must descend the last group's children (for example, if the word we are searching for starts // with a z, it's the last group of the root node, so all children addresses will be smaller // of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a // PtNode array with the last PtNode's children position still less than what we are searching for, // we must descend the last PtNode's children (for example, if the word we are searching for starts // with a z, it's the last PtNode of the root array, so all children addresses will be smaller // than the position we look for, and we have to descend the z node). /* Parameters : * nodePos: the byte position of the terminal chargroup of the word we are searching for (this is * nodePos: the byte position of the terminal PtNode of the word we are searching for (this is * what is stored as the "bigram position" in each bigram) * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size. * outUnigramProbability: a pointer to an int to write the probability into. Loading @@ -60,18 +60,18 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int *const outUnigramProbability) const { int pos = getRootPosition(); int wordPos = 0; // One iteration of the outer loop iterates through nodes. As stated above, we will only // traverse nodes that are actually a part of the terminal we are searching, so each time // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will // only traverse nodes that are actually a part of the terminal we are searching, so each time // we enter this loop we are one depth level further than last time. // The only reason we count nodes is because we want to reduce the probability of infinite // looping in case there is a bug. Since we know there is an upper bound to the depth we are // supposed to traverse, it does not hurt to count iterations. for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { int lastCandidateGroupPos = 0; // Let's loop through char groups in this node searching for either the terminal int lastCandidatePtNodePos = 0; // Let's loop through PtNodes in this PtNode array searching for either the terminal // or one of its ascendants. for (int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( mDictRoot, &pos); charGroupCount > 0; --charGroupCount) { for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) { const int startPos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); Loading @@ -98,7 +98,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( &pos); return ++wordPos; } // We need to skip past this char group, so skip any remaining code points after the // We need to skip past this PtNode, so skip any remaining code points after the // first and possibly the probability. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); Loading @@ -106,8 +106,8 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( if (PatriciaTrieReadingUtils::isTerminal(flags)) { PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } // The fact that this group has children is very important. Since we already know // that this group does not match, if it has no children we know it is irrelevant // The fact that this PtNode has children is very important. Since we already know // that this PtNode does not match, if it has no children we know it is irrelevant // to what we are searching for. const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags); // We will write in `found' whether we have passed the children position we are Loading @@ -122,45 +122,45 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, ¤tPos); if (childrenPos > nodePos) { // If the children pos is greater than the position, it means the previous // chargroup, which position is stored in lastCandidateGroupPos, was the right // PtNode, which position is stored in lastCandidatePtNodePos, was the right // one. found = true; } else if (1 >= charGroupCount) { // However if we are on the LAST group of this node, and we have NOT shot the // position we should descend THIS node. So we trick the lastCandidateGroupPos // so that we will descend this node, not the previous one. lastCandidateGroupPos = startPos; } else if (1 >= ptNodeCount) { // However if we are on the LAST PtNode of this array, and we have NOT shot the // position we should descend THIS node. So we trick the lastCandidatePtNodePos // so that we will descend this PtNode, not the previous one. lastCandidatePtNodePos = startPos; found = true; } else { // Else, we should continue looking. found = false; } } else { // Even if we don't have children here, we could still be on the last group of this // node. If this is the case, we should descend the last group that had children, // and their position is already in lastCandidateGroup. found = (1 >= charGroupCount); // Even if we don't have children here, we could still be on the last PtNode of / // this array. If this is the case, we should descend the last PtNode that had // children, and their position is already in lastCandidatePtNodePos. found = (1 >= ptNodeCount); } if (found) { // Okay, we found the group we should descend. Its position is in // the lastCandidateGroupPos variable, so we just re-read it. if (0 != lastCandidateGroupPos) { // Okay, we found the PtNode we should descend. Its position is in // the lastCandidatePtNodePos variable, so we just re-read it. if (0 != lastCandidatePtNodePos) { const PatriciaTrieReadingUtils::NodeFlags lastFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition( mDictRoot, &lastCandidateGroupPos); mDictRoot, &lastCandidatePtNodePos); const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &lastCandidateGroupPos); // We copy all the characters in this group to the buffer mDictRoot, &lastCandidatePtNodePos); // We copy all the characters in this PtNode to the buffer outCodePoints[wordPos] = lastChar; if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) { int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &lastCandidateGroupPos); mDictRoot, &lastCandidatePtNodePos); int charCount = maxCodePointCount; while (-1 != nextChar && --charCount > 0) { outCodePoints[++wordPos] = nextChar; nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &lastCandidateGroupPos); mDictRoot, &lastCandidatePtNodePos); } } ++wordPos; Loading @@ -168,19 +168,19 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( // it's there, read pos, and break to resume the search at pos. if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) { PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &lastCandidateGroupPos); &lastCandidatePtNodePos); } pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, lastFlags, &lastCandidateGroupPos); mDictRoot, lastFlags, &lastCandidatePtNodePos); break; } else { // Here is a little tricky part: we come here if we found out that all children // addresses in this group are bigger than the address we are searching for. // addresses in this PtNode are bigger than the address we are searching for. // Should we conclude the word is not in the dictionary? No! It could still be // one of the remaining chargroups in this node, so we have to keep looking in // this node until we find it (or we realize it's not there either, in which // case it's actually not in the dictionary). Pass the end of this group, ready // to start the next one. // one of the remaining PtNodes in this array, so we have to keep looking in // this array until we find it (or we realize it's not there either, in which // case it's actually not in the dictionary). Pass the end of this PtNode, // ready to start the next one. if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, flags, &pos); Loading @@ -195,9 +195,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( } else { // If we did not find it, we should record the last children address for the next // iteration. if (hasChildren) lastCandidateGroupPos = startPos; // Now skip the end of this group (children pos and the attributes if any) so that // our pos is after the end of this char group, at the start of the next one. if (hasChildren) lastCandidatePtNodePos = startPos; // Now skip the end of this PtNode (children pos and the attributes if any) so that // our pos is after the end of this PtNode, at the start of the next one. if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, flags, &pos); Loading @@ -212,7 +212,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( } } // If we have looked through all the chargroups and found no match, the nodePos is // If we have looked through all the PtNodes and found no match, the nodePos is // not the position of a terminal in this dictionary. return 0; } Loading @@ -228,24 +228,24 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). if (wordPos >= length) return NOT_A_VALID_WORD_POS; int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(mDictRoot, int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, &pos); const int wChar = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { // If there are no more character groups in this node, it means we could not // If there are no more PtNodes in this array, it means we could not // find a matching character for this depth, therefore there is no match. if (0 >= charGroupCount) return NOT_A_VALID_WORD_POS; const int charGroupPos = pos; if (0 >= ptNodeCount) return NOT_A_VALID_WORD_POS; const int ptNodePos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); if (character == wChar) { // This is the correct node. Only one character group may start with the same // char within a node, so either we found our match in this node, or there is // This is the correct PtNode. Only one PtNode may start with the same char within // a PtNode array, so either we found our match in this array, or there is // no match and we can return NOT_A_VALID_WORD_POS. So we will check all the // characters in this character group indeed does match. // characters in this PtNode indeed does match. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); Loading @@ -253,7 +253,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, ++wordPos; // If we shoot the length of the word we search for, or if we find a single // character that does not match, as explained above, it means the word is // not in the dictionary (by virtue of this chargroup being the only one to // not in the dictionary (by virtue of this PtNode being the only one to // match the word on the first character, but not matching the whole word). if (wordPos >= length) return NOT_A_VALID_WORD_POS; if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS; Loading @@ -268,7 +268,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, ++wordPos; if (PatriciaTrieReadingUtils::isTerminal(flags)) { if (wordPos == length) { return charGroupPos; return ptNodePos; } PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } Loading @@ -282,7 +282,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, flags, &pos); break; } else { // This chargroup does not match, so skip the remaining part and go to the next. // This PtNode does not match, so skip the remaining part and go to the next. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); Loading @@ -301,7 +301,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, mBigramListPolicy.skipAllBigrams(&pos); } } --charGroupCount; --ptNodeCount; } } } Loading native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +10 −10 Original line number Diff line number Diff line Loading @@ -23,15 +23,15 @@ namespace latinime { typedef PatriciaTrieReadingUtils PtReadingUtils; const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0; // Flag for single/multiple char group const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20; // Flag for terminal groups // Flag for terminal PtNodes const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10; // Flag for shortcut targets presence const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08; Loading @@ -46,14 +46,14 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; const uint8_t *const buffer, const NodeFlags flags, int *const pos) { const int base = *pos; int offset = 0; switch (MASK_GROUP_ADDRESS_TYPE & flags) { case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: switch (MASK_CHILDREN_POSITION_TYPE & flags) { case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE: offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); break; case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES: offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos); break; case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES: offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos); break; default: Loading native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +7 −7 Original line number Diff line number Diff line Loading @@ -28,7 +28,7 @@ class PatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; static AK_FORCE_INLINE int getGroupCountAndAdvancePosition( static AK_FORCE_INLINE int getPtNodeArraySizeAndAdvancePosition( const uint8_t *const buffer, int *const pos) { const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); if (firstByte < 0x80) { Loading Loading @@ -116,17 +116,17 @@ class PatriciaTrieReadingUtils { } static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) { return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags); return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags); } private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); static const NodeFlags MASK_GROUP_ADDRESS_TYPE; static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS; static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE; static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES; static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES; static const NodeFlags MASK_CHILDREN_POSITION_TYPE; static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; static const NodeFlags FLAG_HAS_MULTIPLE_CHARS; static const NodeFlags FLAG_IS_TERMINAL; Loading Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +3 −3 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d int nextPos = dicNode->getChildrenPos(); int totalChildCount = 0; do { const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &nextPos); totalChildCount += childCount; if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { Loading Loading @@ -131,7 +131,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in bool foundMatchedNode = false; int totalChildCount = 0; do { const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &pos); totalChildCount += childCount; if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { Loading Loading @@ -179,7 +179,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in if (foundMatchedNode) { break; } // If the matched node is not found in the current node group, try to follow the // If the matched node is not found in the current PtNode array, try to follow the // forward link. pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( mDictRoot, pos); Loading
native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +56 −56 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, return; } int nextPos = dicNode->getChildrenPos(); const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &nextPos); for (int i = 0; i < childCount; i++) { nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes); Loading @@ -40,15 +40,15 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, // This retrieves code points and the probability of the word by its terminal position. // Due to the fact that words are ordered in the dictionary in a strict breadth-first order, // it is possible to check for this with advantageous complexity. For each node, we search // for groups with children and compare the children position with the position we look for. // for PtNodes with children and compare the children position with the position we look for. // When we shoot the position we look for, it means the word we look for is in the children // of the previous group. The only tricky part is the fact that if we arrive at the end of a // node with the last group's children position still less than what we are searching for, we // must descend the last group's children (for example, if the word we are searching for starts // with a z, it's the last group of the root node, so all children addresses will be smaller // of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a // PtNode array with the last PtNode's children position still less than what we are searching for, // we must descend the last PtNode's children (for example, if the word we are searching for starts // with a z, it's the last PtNode of the root array, so all children addresses will be smaller // than the position we look for, and we have to descend the z node). /* Parameters : * nodePos: the byte position of the terminal chargroup of the word we are searching for (this is * nodePos: the byte position of the terminal PtNode of the word we are searching for (this is * what is stored as the "bigram position" in each bigram) * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size. * outUnigramProbability: a pointer to an int to write the probability into. Loading @@ -60,18 +60,18 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int *const outUnigramProbability) const { int pos = getRootPosition(); int wordPos = 0; // One iteration of the outer loop iterates through nodes. As stated above, we will only // traverse nodes that are actually a part of the terminal we are searching, so each time // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will // only traverse nodes that are actually a part of the terminal we are searching, so each time // we enter this loop we are one depth level further than last time. // The only reason we count nodes is because we want to reduce the probability of infinite // looping in case there is a bug. Since we know there is an upper bound to the depth we are // supposed to traverse, it does not hurt to count iterations. for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { int lastCandidateGroupPos = 0; // Let's loop through char groups in this node searching for either the terminal int lastCandidatePtNodePos = 0; // Let's loop through PtNodes in this PtNode array searching for either the terminal // or one of its ascendants. for (int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( mDictRoot, &pos); charGroupCount > 0; --charGroupCount) { for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) { const int startPos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); Loading @@ -98,7 +98,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( &pos); return ++wordPos; } // We need to skip past this char group, so skip any remaining code points after the // We need to skip past this PtNode, so skip any remaining code points after the // first and possibly the probability. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); Loading @@ -106,8 +106,8 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( if (PatriciaTrieReadingUtils::isTerminal(flags)) { PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } // The fact that this group has children is very important. Since we already know // that this group does not match, if it has no children we know it is irrelevant // The fact that this PtNode has children is very important. Since we already know // that this PtNode does not match, if it has no children we know it is irrelevant // to what we are searching for. const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags); // We will write in `found' whether we have passed the children position we are Loading @@ -122,45 +122,45 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, ¤tPos); if (childrenPos > nodePos) { // If the children pos is greater than the position, it means the previous // chargroup, which position is stored in lastCandidateGroupPos, was the right // PtNode, which position is stored in lastCandidatePtNodePos, was the right // one. found = true; } else if (1 >= charGroupCount) { // However if we are on the LAST group of this node, and we have NOT shot the // position we should descend THIS node. So we trick the lastCandidateGroupPos // so that we will descend this node, not the previous one. lastCandidateGroupPos = startPos; } else if (1 >= ptNodeCount) { // However if we are on the LAST PtNode of this array, and we have NOT shot the // position we should descend THIS node. So we trick the lastCandidatePtNodePos // so that we will descend this PtNode, not the previous one. lastCandidatePtNodePos = startPos; found = true; } else { // Else, we should continue looking. found = false; } } else { // Even if we don't have children here, we could still be on the last group of this // node. If this is the case, we should descend the last group that had children, // and their position is already in lastCandidateGroup. found = (1 >= charGroupCount); // Even if we don't have children here, we could still be on the last PtNode of / // this array. If this is the case, we should descend the last PtNode that had // children, and their position is already in lastCandidatePtNodePos. found = (1 >= ptNodeCount); } if (found) { // Okay, we found the group we should descend. Its position is in // the lastCandidateGroupPos variable, so we just re-read it. if (0 != lastCandidateGroupPos) { // Okay, we found the PtNode we should descend. Its position is in // the lastCandidatePtNodePos variable, so we just re-read it. if (0 != lastCandidatePtNodePos) { const PatriciaTrieReadingUtils::NodeFlags lastFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition( mDictRoot, &lastCandidateGroupPos); mDictRoot, &lastCandidatePtNodePos); const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &lastCandidateGroupPos); // We copy all the characters in this group to the buffer mDictRoot, &lastCandidatePtNodePos); // We copy all the characters in this PtNode to the buffer outCodePoints[wordPos] = lastChar; if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) { int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &lastCandidateGroupPos); mDictRoot, &lastCandidatePtNodePos); int charCount = maxCodePointCount; while (-1 != nextChar && --charCount > 0) { outCodePoints[++wordPos] = nextChar; nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( mDictRoot, &lastCandidateGroupPos); mDictRoot, &lastCandidatePtNodePos); } } ++wordPos; Loading @@ -168,19 +168,19 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( // it's there, read pos, and break to resume the search at pos. if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) { PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &lastCandidateGroupPos); &lastCandidatePtNodePos); } pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, lastFlags, &lastCandidateGroupPos); mDictRoot, lastFlags, &lastCandidatePtNodePos); break; } else { // Here is a little tricky part: we come here if we found out that all children // addresses in this group are bigger than the address we are searching for. // addresses in this PtNode are bigger than the address we are searching for. // Should we conclude the word is not in the dictionary? No! It could still be // one of the remaining chargroups in this node, so we have to keep looking in // this node until we find it (or we realize it's not there either, in which // case it's actually not in the dictionary). Pass the end of this group, ready // to start the next one. // one of the remaining PtNodes in this array, so we have to keep looking in // this array until we find it (or we realize it's not there either, in which // case it's actually not in the dictionary). Pass the end of this PtNode, // ready to start the next one. if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, flags, &pos); Loading @@ -195,9 +195,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( } else { // If we did not find it, we should record the last children address for the next // iteration. if (hasChildren) lastCandidateGroupPos = startPos; // Now skip the end of this group (children pos and the attributes if any) so that // our pos is after the end of this char group, at the start of the next one. if (hasChildren) lastCandidatePtNodePos = startPos; // Now skip the end of this PtNode (children pos and the attributes if any) so that // our pos is after the end of this PtNode, at the start of the next one. if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, flags, &pos); Loading @@ -212,7 +212,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( } } // If we have looked through all the chargroups and found no match, the nodePos is // If we have looked through all the PtNodes and found no match, the nodePos is // not the position of a terminal in this dictionary. return 0; } Loading @@ -228,24 +228,24 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). if (wordPos >= length) return NOT_A_VALID_WORD_POS; int charGroupCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(mDictRoot, int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot, &pos); const int wChar = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { // If there are no more character groups in this node, it means we could not // If there are no more PtNodes in this array, it means we could not // find a matching character for this depth, therefore there is no match. if (0 >= charGroupCount) return NOT_A_VALID_WORD_POS; const int charGroupPos = pos; if (0 >= ptNodeCount) return NOT_A_VALID_WORD_POS; const int ptNodePos = pos; const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); if (character == wChar) { // This is the correct node. Only one character group may start with the same // char within a node, so either we found our match in this node, or there is // This is the correct PtNode. Only one PtNode may start with the same char within // a PtNode array, so either we found our match in this array, or there is // no match and we can return NOT_A_VALID_WORD_POS. So we will check all the // characters in this character group indeed does match. // characters in this PtNode indeed does match. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot, &pos); Loading @@ -253,7 +253,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, ++wordPos; // If we shoot the length of the word we search for, or if we find a single // character that does not match, as explained above, it means the word is // not in the dictionary (by virtue of this chargroup being the only one to // not in the dictionary (by virtue of this PtNode being the only one to // match the word on the first character, but not matching the whole word). if (wordPos >= length) return NOT_A_VALID_WORD_POS; if (inWord[wordPos] != character) return NOT_A_VALID_WORD_POS; Loading @@ -268,7 +268,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, ++wordPos; if (PatriciaTrieReadingUtils::isTerminal(flags)) { if (wordPos == length) { return charGroupPos; return ptNodePos; } PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); } Loading @@ -282,7 +282,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, flags, &pos); break; } else { // This chargroup does not match, so skip the remaining part and go to the next. // This PtNode does not match, so skip the remaining part and go to the next. if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); Loading @@ -301,7 +301,7 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, mBigramListPolicy.skipAllBigrams(&pos); } } --charGroupCount; --ptNodeCount; } } } Loading
native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp +10 −10 Original line number Diff line number Diff line Loading @@ -23,15 +23,15 @@ namespace latinime { typedef PatriciaTrieReadingUtils PtReadingUtils; const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80; const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0; // Flag for single/multiple char group const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20; // Flag for terminal groups // Flag for terminal PtNodes const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10; // Flag for shortcut targets presence const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08; Loading @@ -46,14 +46,14 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; const uint8_t *const buffer, const NodeFlags flags, int *const pos) { const int base = *pos; int offset = 0; switch (MASK_GROUP_ADDRESS_TYPE & flags) { case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: switch (MASK_CHILDREN_POSITION_TYPE & flags) { case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE: offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); break; case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES: offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos); break; case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES: offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos); break; default: Loading
native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h +7 −7 Original line number Diff line number Diff line Loading @@ -28,7 +28,7 @@ class PatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; static AK_FORCE_INLINE int getGroupCountAndAdvancePosition( static AK_FORCE_INLINE int getPtNodeArraySizeAndAdvancePosition( const uint8_t *const buffer, int *const pos) { const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); if (firstByte < 0x80) { Loading Loading @@ -116,17 +116,17 @@ class PatriciaTrieReadingUtils { } static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) { return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags); return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags); } private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); static const NodeFlags MASK_GROUP_ADDRESS_TYPE; static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS; static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE; static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES; static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES; static const NodeFlags MASK_CHILDREN_POSITION_TYPE; static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; static const NodeFlags FLAG_HAS_MULTIPLE_CHARS; static const NodeFlags FLAG_IS_TERMINAL; Loading