Loading native/jni/Android.mk +1 −0 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ LATIN_IME_CORE_SRC_FILES := \ dic_nodes_cache.cpp) \ $(addprefix suggest/core/dictionary/, \ bigram_dictionary.cpp \ binary_dictionary_bigrams_reading_utils.cpp \ binary_dictionary_format_utils.cpp \ binary_dictionary_header.cpp \ binary_dictionary_header_reading_utils.cpp \ Loading native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +1 −2 Original line number Diff line number Diff line Loading @@ -233,8 +233,7 @@ namespace latinime { return multiBigramMap->getBigramProbability( binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability); } return BinaryFormat::getBigramProbability( binaryDictionaryInfo->getDictRoot(), prevWordPos, wordPos, unigramProbability); return ProbabilityUtils::backoff(unigramProbability); } /////////////////////////////////////// Loading native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +21 −24 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ #include "bigram_dictionary.h" #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/dictionary.h" Loading Loading @@ -100,12 +101,11 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodePoints, int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints, int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const { // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); int pos = getBigramListPositionForWord(prevWord, prevWordLength, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams Loading @@ -116,21 +116,20 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i } // If still no bigrams, we really don't have them! if (0 == pos) return 0; uint8_t bigramFlags; int bigramCount = 0; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); int bigramBuffer[MAX_WORD_LENGTH]; int unigramProbability = 0; const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, &pos); const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); int bigramBuffer[MAX_WORD_LENGTH]; for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); bigramsIt.hasNext(); /* no-op */) { bigramsIt.next(); const int length = BinaryFormat::getWordAtAddress( mBinaryDictionaryInfo->getDictRoot(), bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // inputSize == 0 means we are trying to find bigram predictions. if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) { const int bigramProbabilityTemp = BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags; const int bigramProbabilityTemp = bigramsIt.getProbability(); // Due to space constraints, the probability for bigrams is approximate - the lower the // unigram probability, the worse the precision. The theoritical maximum error in // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 Loading @@ -142,7 +141,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i outputTypes); ++bigramCount; } } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); } return min(bigramCount, MAX_RESULTS); } Loading Loading @@ -187,22 +186,20 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons bool BigramDictionary::isValidBigram(const int *word1, int length1, const int *word2, int length2) const { const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (0 == pos) return false; int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2, false /* forceLowerCaseSearch */); int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(), word2, length2, false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == nextWordPos) return false; uint8_t bigramFlags; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, &pos); if (bigramPos == nextWordPos) { for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); bigramsIt.hasNext(); /* no-op */) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { return true; } } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); } return false; } Loading native/jni/src/suggest/core/dictionary/bigram_dictionary.h +2 −2 Original line number Diff line number Diff line Loading @@ -27,8 +27,8 @@ class BigramDictionary { public: BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo); int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); Loading native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h 0 → 100644 +67 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h" #include "suggest/core/dictionary/binary_dictionary_info.h" namespace latinime { class BinaryDictionaryBigramsIterator { public: BinaryDictionaryBigramsIterator( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos) : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0), mBigramPos(0), mHasNext(true) {} AK_FORCE_INLINE bool hasNext() const { return mHasNext; } AK_FORCE_INLINE void next() { mBigramFlags = BinaryDictionaryBigramsReadingUtils::getFlagsAndForwardPointer( mBinaryDictionaryInfo, &mPos); mBigramPos = BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer( mBinaryDictionaryInfo, mBigramFlags, &mPos); mHasNext = BinaryDictionaryBigramsReadingUtils::hasNext(mBigramFlags); } AK_FORCE_INLINE int getProbability() const { return BinaryDictionaryBigramsReadingUtils::getBigramProbability(mBigramFlags); } AK_FORCE_INLINE int getBigramPos() const { return mBigramPos; } AK_FORCE_INLINE int getFlags() const { return mBigramFlags; } private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator); const BinaryDictionaryInfo *const mBinaryDictionaryInfo; int mPos; BinaryDictionaryBigramsReadingUtils::BigramFlags mBigramFlags; int mBigramPos; bool mHasNext; }; } // namespace latinime #endif // LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H Loading
native/jni/Android.mk +1 −0 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ LATIN_IME_CORE_SRC_FILES := \ dic_nodes_cache.cpp) \ $(addprefix suggest/core/dictionary/, \ bigram_dictionary.cpp \ binary_dictionary_bigrams_reading_utils.cpp \ binary_dictionary_format_utils.cpp \ binary_dictionary_header.cpp \ binary_dictionary_header_reading_utils.cpp \ Loading
native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +1 −2 Original line number Diff line number Diff line Loading @@ -233,8 +233,7 @@ namespace latinime { return multiBigramMap->getBigramProbability( binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability); } return BinaryFormat::getBigramProbability( binaryDictionaryInfo->getDictRoot(), prevWordPos, wordPos, unigramProbability); return ProbabilityUtils::backoff(unigramProbability); } /////////////////////////////////////// Loading
native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +21 −24 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ #include "bigram_dictionary.h" #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/dictionary.h" Loading Loading @@ -100,12 +101,11 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodePoints, int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints, int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const { // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); int pos = getBigramListPositionForWord(prevWord, prevWordLength, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams Loading @@ -116,21 +116,20 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i } // If still no bigrams, we really don't have them! if (0 == pos) return 0; uint8_t bigramFlags; int bigramCount = 0; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); int bigramBuffer[MAX_WORD_LENGTH]; int unigramProbability = 0; const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, &pos); const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); int bigramBuffer[MAX_WORD_LENGTH]; for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); bigramsIt.hasNext(); /* no-op */) { bigramsIt.next(); const int length = BinaryFormat::getWordAtAddress( mBinaryDictionaryInfo->getDictRoot(), bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // inputSize == 0 means we are trying to find bigram predictions. if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) { const int bigramProbabilityTemp = BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags; const int bigramProbabilityTemp = bigramsIt.getProbability(); // Due to space constraints, the probability for bigrams is approximate - the lower the // unigram probability, the worse the precision. The theoritical maximum error in // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 Loading @@ -142,7 +141,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i outputTypes); ++bigramCount; } } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); } return min(bigramCount, MAX_RESULTS); } Loading Loading @@ -187,22 +186,20 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons bool BigramDictionary::isValidBigram(const int *word1, int length1, const int *word2, int length2) const { const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (0 == pos) return false; int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2, false /* forceLowerCaseSearch */); int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(), word2, length2, false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == nextWordPos) return false; uint8_t bigramFlags; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, &pos); if (bigramPos == nextWordPos) { for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); bigramsIt.hasNext(); /* no-op */) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { return true; } } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); } return false; } Loading
native/jni/src/suggest/core/dictionary/bigram_dictionary.h +2 −2 Original line number Diff line number Diff line Loading @@ -27,8 +27,8 @@ class BigramDictionary { public: BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo); int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); Loading
native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h 0 → 100644 +67 −0 Original line number Diff line number Diff line /* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h" #include "suggest/core/dictionary/binary_dictionary_info.h" namespace latinime { class BinaryDictionaryBigramsIterator { public: BinaryDictionaryBigramsIterator( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos) : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0), mBigramPos(0), mHasNext(true) {} AK_FORCE_INLINE bool hasNext() const { return mHasNext; } AK_FORCE_INLINE void next() { mBigramFlags = BinaryDictionaryBigramsReadingUtils::getFlagsAndForwardPointer( mBinaryDictionaryInfo, &mPos); mBigramPos = BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer( mBinaryDictionaryInfo, mBigramFlags, &mPos); mHasNext = BinaryDictionaryBigramsReadingUtils::hasNext(mBigramFlags); } AK_FORCE_INLINE int getProbability() const { return BinaryDictionaryBigramsReadingUtils::getBigramProbability(mBigramFlags); } AK_FORCE_INLINE int getBigramPos() const { return mBigramPos; } AK_FORCE_INLINE int getFlags() const { return mBigramFlags; } private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator); const BinaryDictionaryInfo *const mBinaryDictionaryInfo; int mPos; BinaryDictionaryBigramsReadingUtils::BigramFlags mBigramFlags; int mBigramPos; bool mHasNext; }; } // namespace latinime #endif // LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H