Loading native/Android.mk +6 −2 Original line number Diff line number Diff line Loading @@ -46,15 +46,19 @@ LOCAL_MODULE := libjni_latinime LOCAL_MODULE_TAGS := user # For STL LOCAL_C_INCLUDES += external/stlport/stlport bionic LOCAL_SHARED_LIBRARIES += libstlport ifeq ($(FLAG_DO_PROFILE), true) $(warning Making profiling version of native library) LOCAL_CFLAGS += -DFLAG_DO_PROFILE LOCAL_SHARED_LIBRARIES := libcutils libutils LOCAL_SHARED_LIBRARIES += libcutils libutils else # FLAG_DO_PROFILE ifeq ($(FLAG_DBG), true) $(warning Making debug version of native library) LOCAL_CFLAGS += -DFLAG_DBG LOCAL_SHARED_LIBRARIES := libcutils libutils LOCAL_SHARED_LIBRARIES += libcutils libutils endif # FLAG_DBG endif # FLAG_DO_PROFILE Loading native/src/defines.h +2 −0 Original line number Diff line number Diff line Loading @@ -101,6 +101,7 @@ static void prof_out(void) { #define DEBUG_PROXIMITY_INFO true #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ true #define DEBUG_WORDS_PRIORITY_QUEUE true #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) Loading @@ -125,6 +126,7 @@ static void dumpWord(const unsigned short* word, const int length) { #define DEBUG_PROXIMITY_INFO false #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false #define DUMP_WORD(word, length) Loading native/src/unigram_dictionary.cpp +18 −75 Original line number Diff line number Diff line Loading @@ -49,10 +49,12 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed LOGI("UnigramDictionary - constructor"); } mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); mWordsPriorityQueue = new WordsPriorityQueue(maxWords, maxWordLength); } UnigramDictionary::~UnigramDictionary() { delete mCorrection; delete mWordsPriorityQueue; } static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize, Loading Loading @@ -88,7 +90,7 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) { const int currentDepth, int* codesDest) { if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) { for (int i = 0; i < codesRemain; ++i) { Loading @@ -105,8 +107,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords, frequencies); currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS); // Copy the second char of the digraph in place, then continue processing on // the remaining part of the word. Loading @@ -115,8 +116,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit BYTES_IN_ONE_CHAR); getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords, frequencies); codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS); return; } } Loading @@ -132,8 +132,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit memcpy(codesDest, codesSrc, remainingBytes); getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer, (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies, flags); (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags); } int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, Loading @@ -144,28 +143,24 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x { // Incrementally tune the word and try all possibilities int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies); codesSize, flags, codes, codesSize, 0, codesBuffer); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies, flags); getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags); } PROF_START(20); // Get the word count int suggestedWordsCount = 0; while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) { suggestedWordsCount++; } const int suggestedWordsCount = mWordsPriorityQueue->outputSuggestions(frequencies, outWords); if (DEBUG_DICT) { LOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { #ifdef FLAG_DBG short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH; short unsigned int* w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; LOGI("%s %i", s, mFrequencies[j]); LOGI("%s %i", s, frequencies[j]); #endif } } Loading @@ -176,12 +171,12 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies, const int flags) { const int flags) { PROF_OPEN; PROF_START(0); initSuggestions( proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies); proximityInfo, xcoordinates, ycoordinates, codes, codesSize); if (DEBUG_DICT) assert(codesSize == mInputLength); const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); Loading Loading @@ -241,71 +236,19 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, const int *yCoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies) { const int *yCoordinates, const int *codes, const int codesSize) { if (DEBUG_DICT) { LOGI("initSuggest"); } mFrequencies = frequencies; mOutputChars = outWords; mInputLength = codesSize; proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates); mProximityInfo = proximityInfo; mWordsPriorityQueue->clear(); } // TODO: We need to optimize addWord by using STL or something // TODO: This needs to take an const unsigned short* and not tinker with its contents bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { word[length] = 0; if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) { #ifdef FLAG_DBG char s[length + 1]; for (int i = 0; i <= length; i++) s[i] = word[i]; LOGI("Found word = %s, freq = %d", s, frequency); #endif } if (length > MAX_WORD_LENGTH) { if (DEBUG_DICT) { LOGI("Exceeded max word length."); } return false; } // Find the right insertion point int insertAt = 0; while (insertAt < MAX_WORDS) { // TODO: How should we sort words with the same frequency? if (frequency > mFrequencies[insertAt]) { break; } insertAt++; } if (insertAt < MAX_WORDS) { if (DEBUG_DICT) { #ifdef FLAG_DBG char s[length + 1]; for (int i = 0; i <= length; i++) s[i] = word[i]; LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX); #endif } memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]), (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]), (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0])); mFrequencies[insertAt] = frequency; memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short), (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short), (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH); unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH; while (length--) { *dest++ = *word++; } *dest = 0; // NULL terminate if (DEBUG_DICT) { LOGI("Added word at %d", insertAt); } return true; } return false; void UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { mWordsPriorityQueue->push(frequency, word, length); } static const char QUOTE = '\''; Loading native/src/unigram_dictionary.h +6 −8 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "correction_state.h" #include "defines.h" #include "proximity_info.h" #include "words_priority_queue.h" namespace latinime { Loading Loading @@ -73,18 +74,16 @@ public: private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies, const int flags); const int *ycoordinates, const int *codes, const int codesSize, const int flags); bool isDigraph(const int* codes, const int i, const int codesSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies); const int currentDepth, int* codesDest); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies); const int *ycoordinates, const int *codes, const int codesSize); void getSuggestionCandidates(const bool useFullEditDistance); bool addWord(unsigned short *word, int length, int frequency); void addWord(unsigned short *word, int length, int frequency); void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction); void getMissingSpaceWords(const int inputLength, const int missingSpacePos, Correction *correction, const bool useFullEditDistance); Loading Loading @@ -123,8 +122,7 @@ private: }; static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[]; int *mFrequencies; unsigned short *mOutputChars; WordsPriorityQueue *mWordsPriorityQueue; ProximityInfo *mProximityInfo; Correction *mCorrection; int mInputLength; Loading native/src/words_priority_queue.h 0 → 100644 +146 −0 Original line number Diff line number Diff line /* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_WORDS_PRIORITY_QUEUE_H #define LATINIME_WORDS_PRIORITY_QUEUE_H #include <iostream> #include <queue> #include "defines.h" namespace latinime { class WordsPriorityQueue { private: class SuggestedWord { public: int mScore; unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; int mWordLength; bool mUsed; void setParams(int score, unsigned short* word, int wordLength) { mScore = score; mWordLength = wordLength; memcpy(mWord, word, sizeof(unsigned short) * wordLength); mUsed = true; } }; struct wordComparator { bool operator ()(SuggestedWord * left, SuggestedWord * right) { return left->mScore > right->mScore; } }; SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, int wordLength) { for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { if (!mSuggestedWords[i].mUsed) { mSuggestedWords[i].setParams(score, word, wordLength); return &mSuggestedWords[i]; } } return 0; } typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>, wordComparator> Suggestions; Suggestions mSuggestions; const unsigned int MAX_WORDS; const unsigned int MAX_WORD_LENGTH; SuggestedWord* mSuggestedWords; public: WordsPriorityQueue(int maxWords, int maxWordLength) : MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH( (unsigned int) maxWordLength) { mSuggestedWords = new SuggestedWord[maxWordLength]; for (int i = 0; i < maxWordLength; ++i) { mSuggestedWords[i].mUsed = false; } } ~WordsPriorityQueue() { delete[] mSuggestedWords; } void push(int score, unsigned short* word, int wordLength) { SuggestedWord* sw = 0; if (mSuggestions.size() >= MAX_WORDS) { sw = mSuggestions.top(); const int minScore = sw->mScore; if (minScore >= score) { return; } else { sw->mUsed = false; mSuggestions.pop(); } } if (sw == 0) { sw = getFreeSuggestedWord(score, word, wordLength); } else { sw->setParams(score, word, wordLength); } if (sw == 0) { LOGE("SuggestedWord is accidentally null."); return; } if (DEBUG_WORDS_PRIORITY_QUEUE) { LOGI("Push word. %d, %d", score, wordLength); DUMP_WORD(word, wordLength); } mSuggestions.push(sw); } int outputSuggestions(int *frequencies, unsigned short *outputChars) { const unsigned int size = min(MAX_WORDS, mSuggestions.size()); int index = size - 1; while (!mSuggestions.empty() && index >= 0) { SuggestedWord* sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { LOGI("dump word. %d", sw->mScore); DUMP_WORD(sw->mWord, sw->mWordLength); } const unsigned int wordLength = sw->mWordLength; char* targetAdr = (char*) outputChars + (index) * MAX_WORD_LENGTH * sizeof(short); frequencies[index] = sw->mScore; memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short)); if (wordLength < MAX_WORD_LENGTH) { ((unsigned short*) targetAdr)[wordLength] = 0; } sw->mUsed = false; mSuggestions.pop(); --index; } return size; } void clear() { while (!mSuggestions.empty()) { SuggestedWord* sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { LOGI("Clear word. %d", sw->mScore); DUMP_WORD(sw->mWord, sw->mWordLength); } sw->mUsed = false; mSuggestions.pop(); } } }; } #endif // LATINIME_WORDS_PRIORITY_QUEUE_H Loading
native/Android.mk +6 −2 Original line number Diff line number Diff line Loading @@ -46,15 +46,19 @@ LOCAL_MODULE := libjni_latinime LOCAL_MODULE_TAGS := user # For STL LOCAL_C_INCLUDES += external/stlport/stlport bionic LOCAL_SHARED_LIBRARIES += libstlport ifeq ($(FLAG_DO_PROFILE), true) $(warning Making profiling version of native library) LOCAL_CFLAGS += -DFLAG_DO_PROFILE LOCAL_SHARED_LIBRARIES := libcutils libutils LOCAL_SHARED_LIBRARIES += libcutils libutils else # FLAG_DO_PROFILE ifeq ($(FLAG_DBG), true) $(warning Making debug version of native library) LOCAL_CFLAGS += -DFLAG_DBG LOCAL_SHARED_LIBRARIES := libcutils libutils LOCAL_SHARED_LIBRARIES += libcutils libutils endif # FLAG_DBG endif # FLAG_DO_PROFILE Loading
native/src/defines.h +2 −0 Original line number Diff line number Diff line Loading @@ -101,6 +101,7 @@ static void prof_out(void) { #define DEBUG_PROXIMITY_INFO true #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ true #define DEBUG_WORDS_PRIORITY_QUEUE true #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) Loading @@ -125,6 +126,7 @@ static void dumpWord(const unsigned short* word, const int length) { #define DEBUG_PROXIMITY_INFO false #define DEBUG_CORRECTION false #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false #define DUMP_WORD(word, length) Loading
native/src/unigram_dictionary.cpp +18 −75 Original line number Diff line number Diff line Loading @@ -49,10 +49,12 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed LOGI("UnigramDictionary - constructor"); } mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier); mWordsPriorityQueue = new WordsPriorityQueue(maxWords, maxWordLength); } UnigramDictionary::~UnigramDictionary() { delete mCorrection; delete mWordsPriorityQueue; } static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize, Loading Loading @@ -88,7 +90,7 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) { const int currentDepth, int* codesDest) { if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) { for (int i = 0; i < codesRemain; ++i) { Loading @@ -105,8 +107,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords, frequencies); currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS); // Copy the second char of the digraph in place, then continue processing on // the remaining part of the word. Loading @@ -115,8 +116,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit BYTES_IN_ONE_CHAR); getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords, frequencies); codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS); return; } } Loading @@ -132,8 +132,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit memcpy(codesDest, codesSrc, remainingBytes); getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer, (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies, flags); (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags); } int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, Loading @@ -144,28 +143,24 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x { // Incrementally tune the word and try all possibilities int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies); codesSize, flags, codes, codesSize, 0, codesBuffer); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies, flags); getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags); } PROF_START(20); // Get the word count int suggestedWordsCount = 0; while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) { suggestedWordsCount++; } const int suggestedWordsCount = mWordsPriorityQueue->outputSuggestions(frequencies, outWords); if (DEBUG_DICT) { LOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { #ifdef FLAG_DBG short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH; short unsigned int* w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; LOGI("%s %i", s, mFrequencies[j]); LOGI("%s %i", s, frequencies[j]); #endif } } Loading @@ -176,12 +171,12 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies, const int flags) { const int flags) { PROF_OPEN; PROF_START(0); initSuggestions( proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies); proximityInfo, xcoordinates, ycoordinates, codes, codesSize); if (DEBUG_DICT) assert(codesSize == mInputLength); const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); Loading Loading @@ -241,71 +236,19 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, const int *yCoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies) { const int *yCoordinates, const int *codes, const int codesSize) { if (DEBUG_DICT) { LOGI("initSuggest"); } mFrequencies = frequencies; mOutputChars = outWords; mInputLength = codesSize; proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates); mProximityInfo = proximityInfo; mWordsPriorityQueue->clear(); } // TODO: We need to optimize addWord by using STL or something // TODO: This needs to take an const unsigned short* and not tinker with its contents bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { word[length] = 0; if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) { #ifdef FLAG_DBG char s[length + 1]; for (int i = 0; i <= length; i++) s[i] = word[i]; LOGI("Found word = %s, freq = %d", s, frequency); #endif } if (length > MAX_WORD_LENGTH) { if (DEBUG_DICT) { LOGI("Exceeded max word length."); } return false; } // Find the right insertion point int insertAt = 0; while (insertAt < MAX_WORDS) { // TODO: How should we sort words with the same frequency? if (frequency > mFrequencies[insertAt]) { break; } insertAt++; } if (insertAt < MAX_WORDS) { if (DEBUG_DICT) { #ifdef FLAG_DBG char s[length + 1]; for (int i = 0; i <= length; i++) s[i] = word[i]; LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX); #endif } memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]), (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]), (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0])); mFrequencies[insertAt] = frequency; memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short), (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short), (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH); unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH; while (length--) { *dest++ = *word++; } *dest = 0; // NULL terminate if (DEBUG_DICT) { LOGI("Added word at %d", insertAt); } return true; } return false; void UnigramDictionary::addWord(unsigned short *word, int length, int frequency) { mWordsPriorityQueue->push(frequency, word, length); } static const char QUOTE = '\''; Loading
native/src/unigram_dictionary.h +6 −8 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "correction_state.h" #include "defines.h" #include "proximity_info.h" #include "words_priority_queue.h" namespace latinime { Loading Loading @@ -73,18 +74,16 @@ public: private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies, const int flags); const int *ycoordinates, const int *codes, const int codesSize, const int flags); bool isDigraph(const int* codes, const int i, const int codesSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies); const int currentDepth, int* codesDest); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies); const int *ycoordinates, const int *codes, const int codesSize); void getSuggestionCandidates(const bool useFullEditDistance); bool addWord(unsigned short *word, int length, int frequency); void addWord(unsigned short *word, int length, int frequency); void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction); void getMissingSpaceWords(const int inputLength, const int missingSpacePos, Correction *correction, const bool useFullEditDistance); Loading Loading @@ -123,8 +122,7 @@ private: }; static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[]; int *mFrequencies; unsigned short *mOutputChars; WordsPriorityQueue *mWordsPriorityQueue; ProximityInfo *mProximityInfo; Correction *mCorrection; int mInputLength; Loading
native/src/words_priority_queue.h 0 → 100644 +146 −0 Original line number Diff line number Diff line /* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_WORDS_PRIORITY_QUEUE_H #define LATINIME_WORDS_PRIORITY_QUEUE_H #include <iostream> #include <queue> #include "defines.h" namespace latinime { class WordsPriorityQueue { private: class SuggestedWord { public: int mScore; unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; int mWordLength; bool mUsed; void setParams(int score, unsigned short* word, int wordLength) { mScore = score; mWordLength = wordLength; memcpy(mWord, word, sizeof(unsigned short) * wordLength); mUsed = true; } }; struct wordComparator { bool operator ()(SuggestedWord * left, SuggestedWord * right) { return left->mScore > right->mScore; } }; SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word, int wordLength) { for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { if (!mSuggestedWords[i].mUsed) { mSuggestedWords[i].setParams(score, word, wordLength); return &mSuggestedWords[i]; } } return 0; } typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>, wordComparator> Suggestions; Suggestions mSuggestions; const unsigned int MAX_WORDS; const unsigned int MAX_WORD_LENGTH; SuggestedWord* mSuggestedWords; public: WordsPriorityQueue(int maxWords, int maxWordLength) : MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH( (unsigned int) maxWordLength) { mSuggestedWords = new SuggestedWord[maxWordLength]; for (int i = 0; i < maxWordLength; ++i) { mSuggestedWords[i].mUsed = false; } } ~WordsPriorityQueue() { delete[] mSuggestedWords; } void push(int score, unsigned short* word, int wordLength) { SuggestedWord* sw = 0; if (mSuggestions.size() >= MAX_WORDS) { sw = mSuggestions.top(); const int minScore = sw->mScore; if (minScore >= score) { return; } else { sw->mUsed = false; mSuggestions.pop(); } } if (sw == 0) { sw = getFreeSuggestedWord(score, word, wordLength); } else { sw->setParams(score, word, wordLength); } if (sw == 0) { LOGE("SuggestedWord is accidentally null."); return; } if (DEBUG_WORDS_PRIORITY_QUEUE) { LOGI("Push word. %d, %d", score, wordLength); DUMP_WORD(word, wordLength); } mSuggestions.push(sw); } int outputSuggestions(int *frequencies, unsigned short *outputChars) { const unsigned int size = min(MAX_WORDS, mSuggestions.size()); int index = size - 1; while (!mSuggestions.empty() && index >= 0) { SuggestedWord* sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { LOGI("dump word. %d", sw->mScore); DUMP_WORD(sw->mWord, sw->mWordLength); } const unsigned int wordLength = sw->mWordLength; char* targetAdr = (char*) outputChars + (index) * MAX_WORD_LENGTH * sizeof(short); frequencies[index] = sw->mScore; memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short)); if (wordLength < MAX_WORD_LENGTH) { ((unsigned short*) targetAdr)[wordLength] = 0; } sw->mUsed = false; mSuggestions.pop(); --index; } return size; } void clear() { while (!mSuggestions.empty()) { SuggestedWord* sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { LOGI("Clear word. %d", sw->mScore); DUMP_WORD(sw->mWord, sw->mWordLength); } sw->mUsed = false; mSuggestions.pop(); } } }; } #endif // LATINIME_WORDS_PRIORITY_QUEUE_H