Loading native/src/correction.cpp +29 −0 Original line number Diff line number Diff line Loading @@ -38,8 +38,28 @@ inline static void initEditDistance(int *editDistanceTable) { } } inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength, const int outputLength) { if (DEBUG_DICT) { LOGI("EditDistanceTable"); for (int i = 0; i <= 10; ++i) { int c[11]; for (int j = 0; j <= 10; ++j) { if (j < inputLength + 1 && i < outputLength + 1) { c[j] = (editDistanceTable + i * (inputLength + 1))[j]; } else { c[j] = -1; } } LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]", c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]); } } } inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, const int inputLength, const unsigned short *output, const int outputLength) { // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j]. // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated, // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength]. Loading @@ -62,6 +82,9 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne inline static int getCurrentEditDistance( int *editDistanceTable, const int inputLength, const int outputLength) { if (DEBUG_DICT) { LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); } return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1]; } Loading Loading @@ -90,6 +113,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength, mInputLength = inputLength; mMaxDepth = maxDepth; mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; // TODO: This is not supposed to be required. Check what's going wrong with // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] initEditDistance(mEditDistanceTable); } void Correction::initCorrectionState( Loading Loading @@ -620,6 +646,9 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const // TODO: Calculate edit distance for transposed and excessive int ed = 0; if (DEBUG_DICT_FULL) { dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1); } int adjustedProximityMatchedCount = proximityMatchedCount; int finalFreq = freq; Loading native/src/defines.h +1 −1 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ static void prof_out(void) { #define DEBUG_SHOW_FOUND_WORD false #define DEBUG_NODE DEBUG_DICT_FULL #define DEBUG_TRACE DEBUG_DICT_FULL #define DEBUG_PROXIMITY_INFO true #define DEBUG_PROXIMITY_INFO false #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ true #define DEBUG_WORDS_PRIORITY_QUEUE true Loading native/src/proximity_info.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -103,7 +103,7 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { if (x < 0 || y < 0) { if (DEBUG_DICT) { LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y); assert(true); assert(false); } return false; } Loading native/src/unigram_dictionary.cpp +31 −17 Original line number Diff line number Diff line Loading @@ -243,14 +243,18 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, const int *yCoordinates, const int *codes, const int codesSize, WordsPriorityQueue *queue) { const int *yCoordinates, const int *codes, const int inputLength, WordsPriorityQueue *queue, Correction *correction) { if (DEBUG_DICT) { LOGI("initSuggest"); } proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates); proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); if (queue) { queue->clear(); } const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); correction->initCorrection(proximityInfo, inputLength, maxDepth); } static const char QUOTE = '\''; static const char SPACE = ' '; Loading @@ -260,19 +264,19 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue); if (DEBUG_DICT) assert(codesSize == inputLength); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); correction->initCorrection(proximityInfo, inputLength, maxDepth); getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue); initSuggestions( proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction); getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); } void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue *queue) { const int inputLength, Correction *correction, WordsPriorityQueue *queue, const bool doAutoCompletion, const int maxErrors) { // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); doAutoCompletion, maxErrors); int rootPosition = ROOT_POS; // Get the number of children of root, then increment the position int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); Loading Loading @@ -306,9 +310,6 @@ void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */, correction, queuePool); Loading @@ -318,9 +319,6 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos, correction, queuePool); Loading Loading @@ -362,6 +360,15 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo return; const int newWordLength = firstWordLength + secondWordLength + 1; // Space proximity preparation //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, //correction); //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false, //MAX_ERRORS_FOR_TWO_WORDS); // Allocating variable length array on stack unsigned short word[newWordLength]; const int firstFreq = getMostFrequentWordLike( Loading @@ -387,6 +394,13 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo word[i] = mWord[i - firstWordLength - 1]; } // TODO: Remove initSuggestions and correction->setCorrectionParams initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, 0 /* do not clear queue */, correction); correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, missingSpacePos, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word); if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); Loading native/src/unigram_dictionary.h +2 −2 Original line number Diff line number Diff line Loading @@ -90,13 +90,13 @@ private: WordsPriorityQueuePool* queuePool); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, WordsPriorityQueue *queue); WordsPriorityQueue *queue, Correction *correction); void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); void getSuggestionCandidates( const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue* queue); WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors); void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Loading Loading
native/src/correction.cpp +29 −0 Original line number Diff line number Diff line Loading @@ -38,8 +38,28 @@ inline static void initEditDistance(int *editDistanceTable) { } } inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength, const int outputLength) { if (DEBUG_DICT) { LOGI("EditDistanceTable"); for (int i = 0; i <= 10; ++i) { int c[11]; for (int j = 0; j <= 10; ++j) { if (j < inputLength + 1 && i < outputLength + 1) { c[j] = (editDistanceTable + i * (inputLength + 1))[j]; } else { c[j] = -1; } } LOGI("[ %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d ]", c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], c[9], c[10]); } } } inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, const int inputLength, const unsigned short *output, const int outputLength) { // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. // Let dp[i][j] be editDistanceTable[i * (inputLength + 1) + j]. // Assuming that dp[0][0] ... dp[outputLength - 1][inputLength] are already calculated, // and calculate dp[ouputLength][0] ... dp[outputLength][inputLength]. Loading @@ -62,6 +82,9 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne inline static int getCurrentEditDistance( int *editDistanceTable, const int inputLength, const int outputLength) { if (DEBUG_DICT) { LOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); } return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1]; } Loading Loading @@ -90,6 +113,9 @@ void Correction::initCorrection(const ProximityInfo *pi, const int inputLength, mInputLength = inputLength; mMaxDepth = maxDepth; mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; // TODO: This is not supposed to be required. Check what's going wrong with // editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] initEditDistance(mEditDistanceTable); } void Correction::initCorrectionState( Loading Loading @@ -620,6 +646,9 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const // TODO: Calculate edit distance for transposed and excessive int ed = 0; if (DEBUG_DICT_FULL) { dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1); } int adjustedProximityMatchedCount = proximityMatchedCount; int finalFreq = freq; Loading
native/src/defines.h +1 −1 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ static void prof_out(void) { #define DEBUG_SHOW_FOUND_WORD false #define DEBUG_NODE DEBUG_DICT_FULL #define DEBUG_TRACE DEBUG_DICT_FULL #define DEBUG_PROXIMITY_INFO true #define DEBUG_PROXIMITY_INFO false #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ true #define DEBUG_WORDS_PRIORITY_QUEUE true Loading
native/src/proximity_info.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -103,7 +103,7 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { if (x < 0 || y < 0) { if (DEBUG_DICT) { LOGI("HasSpaceProximity: Illegal coordinates (%d, %d)", x, y); assert(true); assert(false); } return false; } Loading
native/src/unigram_dictionary.cpp +31 −17 Original line number Diff line number Diff line Loading @@ -243,14 +243,18 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, const int *yCoordinates, const int *codes, const int codesSize, WordsPriorityQueue *queue) { const int *yCoordinates, const int *codes, const int inputLength, WordsPriorityQueue *queue, Correction *correction) { if (DEBUG_DICT) { LOGI("initSuggest"); } proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates); proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); if (queue) { queue->clear(); } const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); correction->initCorrection(proximityInfo, inputLength, maxDepth); } static const char QUOTE = '\''; static const char SPACE = ' '; Loading @@ -260,19 +264,19 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue); if (DEBUG_DICT) assert(codesSize == inputLength); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); correction->initCorrection(proximityInfo, inputLength, maxDepth); getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue); initSuggestions( proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction); getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); } void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue *queue) { const int inputLength, Correction *correction, WordsPriorityQueue *queue, const bool doAutoCompletion, const int maxErrors) { // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, -1 /* spaceProximityPos */, -1 /* missingSpacePos */, useFullEditDistance, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); doAutoCompletion, maxErrors); int rootPosition = ROOT_POS; // Get the number of children of root, then increment the position int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition); Loading Loading @@ -306,9 +310,6 @@ void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, -1 /* spaceProximityPos */, missingSpacePos, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */, correction, queuePool); Loading @@ -318,9 +319,6 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) { correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, -1 /* missingSpacePos */, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos, correction, queuePool); Loading Loading @@ -362,6 +360,15 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo return; const int newWordLength = firstWordLength + secondWordLength + 1; // Space proximity preparation //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, //correction); //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false, //MAX_ERRORS_FOR_TWO_WORDS); // Allocating variable length array on stack unsigned short word[newWordLength]; const int firstFreq = getMostFrequentWordLike( Loading @@ -387,6 +394,13 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo word[i] = mWord[i - firstWordLength - 1]; } // TODO: Remove initSuggestions and correction->setCorrectionParams initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, 0 /* do not clear queue */, correction); correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, missingSpacePos, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word); if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); Loading
native/src/unigram_dictionary.h +2 −2 Original line number Diff line number Diff line Loading @@ -90,13 +90,13 @@ private: WordsPriorityQueuePool* queuePool); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, WordsPriorityQueue *queue); WordsPriorityQueue *queue, Correction *correction); void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); void getSuggestionCandidates( const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue* queue); WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors); void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Loading