Loading java/src/com/android/inputmethod/latin/makedict/WordProperty.java +13 −6 Original line number Diff line number Diff line Loading @@ -87,7 +87,7 @@ public final class WordProperty implements Comparable<WordProperty> { final boolean isPossiblyOffensive, final boolean hasBigram, final boolean hasShortcuts, final boolean isBeginningOfSentence, final int[] probabilityInfo, final ArrayList<int[][]> ngramPrevWordsArray, final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray, final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray, final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo, final ArrayList<int[]> shortcutTargets, final ArrayList<Integer> shortcutProbabilities) { Loading @@ -102,16 +102,22 @@ public final class WordProperty implements Comparable<WordProperty> { mHasNgrams = hasBigram; final int relatedNgramCount = ngramTargets.size(); final WordInfo currentWordInfo = mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO : new WordInfo(mWord); final NgramContext ngramContext = new NgramContext(currentWordInfo); for (int i = 0; i < relatedNgramCount; i++) { final String ngramTargetString = StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i)); final WeightedString ngramTarget = new WeightedString(ngramTargetString, createProbabilityInfoFromArray(ngramProbabilityInfo.get(i))); // TODO: Support n-gram. final int[][] prevWords = ngramPrevWordsArray.get(i); final boolean[] isBeginningOfSentenceArray = ngramPrevWordIsBeginningOfSentenceArray.get(i); final WordInfo[] wordInfoArray = new WordInfo[prevWords.length]; for (int j = 0; j < prevWords.length; j++) { wordInfoArray[j] = isBeginningOfSentenceArray[j] ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray( prevWords[j])); } final NgramContext ngramContext = new NgramContext(wordInfoArray); ngrams.add(new NgramProperty(ngramTarget, ngramContext)); } mNgrams = ngrams.isEmpty() ? null : ngrams; Loading @@ -126,6 +132,7 @@ public final class WordProperty implements Comparable<WordProperty> { } // TODO: Remove @UsedForTesting public ArrayList<WeightedString> getBigrams() { if (null == mNgrams) { return null; Loading java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java +14 −4 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ package com.android.inputmethod.latin.utils; import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.NgramProperty; import com.android.inputmethod.latin.makedict.ProbabilityInfo; import com.android.inputmethod.latin.makedict.WeightedString; import com.android.inputmethod.latin.makedict.WordProperty; Loading @@ -26,6 +27,8 @@ import java.util.HashMap; public class CombinedFormatUtils { public static final String DICTIONARY_TAG = "dictionary"; public static final String BIGRAM_TAG = "bigram"; public static final String NGRAM_TAG = "ngram"; public static final String NGRAM_PREV_WORD_TAG = "prev_word"; public static final String SHORTCUT_TAG = "shortcut"; public static final String PROBABILITY_TAG = "f"; public static final String HISTORICAL_INFO_TAG = "historicalInfo"; Loading Loading @@ -76,12 +79,19 @@ public class CombinedFormatUtils { } } if (wordProperty.mHasNgrams) { // TODO: Support ngram. for (final WeightedString bigram : wordProperty.getBigrams()) { builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord); for (final NgramProperty ngramProperty : wordProperty.mNgrams) { builder.append(" " + NGRAM_TAG + "=" + ngramProperty.mTargetWord.mWord); builder.append(","); builder.append(formatProbabilityInfo(bigram.mProbabilityInfo)); builder.append(formatProbabilityInfo(ngramProperty.mTargetWord.mProbabilityInfo)); builder.append("\n"); for (int i = 0; i < ngramProperty.mNgramContext.getPrevWordCount(); i++) { builder.append(" " + NGRAM_PREV_WORD_TAG + "[" + i + "]=" + ngramProperty.mNgramContext.getNthPrevWord(i + 1)); if (ngramProperty.mNgramContext.isNthPrevWordBeginningOfSontence(i + 1)) { builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true"); } builder.append("\n"); } } } return builder.toString(); Loading native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +3 −2 Original line number Diff line number Diff line Loading @@ -327,8 +327,8 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject /* outNgramPrevWordsArray */, jobject /* outNgramPrevWordIsBeginningOfSentenceArray */, jobject outNgramTargets, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, jobject outNgramProbabilityInfo, jobject outShortcutTargets, jobject outShortcutProbabilities) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Loading @@ -352,6 +352,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, const WordProperty wordProperty = dictionary->getWordProperty( CodePointArrayView(wordCodePoints, codePointCount)); wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo, outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets, outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); } Loading native/jni/src/suggest/core/dictionary/property/word_property.cpp +36 −12 Original line number Diff line number Diff line Loading @@ -22,8 +22,9 @@ namespace latinime { void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, jobject outShortcutTargets, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const { JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), Loading @@ -43,16 +44,39 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jclass arrayListClass = env->FindClass("java/util/ArrayList"); jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); // Output bigrams. // TODO: Support n-gram // Output ngrams. jclass intArrayClass = env->FindClass("[I"); for (const auto &ngramProperty : mNgrams) { const std::vector<int> *const word1CodePoints = ngramProperty.getTargetCodePoints(); jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */, word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(), const NgramContext *const ngramContext = ngramProperty.getNgramContext(); jobjectArray prevWordWordCodePointsArray = env->NewObjectArray( ngramContext->getPrevWordCount(), intArrayClass, nullptr); jbooleanArray prevWordIsBeginningOfSentenceArray = env->NewBooleanArray(ngramContext->getPrevWordCount()); for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) { const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1); jintArray prevWordCodePoints = env->NewIntArray(codePoints.size()); JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */, codePoints.size(), codePoints.data(), codePoints.size(), false /* needsNullTermination */); env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); env->DeleteLocalRef(bigramWord1CodePointArray); env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints); env->DeleteLocalRef(prevWordCodePoints); JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i, ngramContext->isNthPrevWordBeginningOfSentence(i + 1)); } env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray); env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId, prevWordIsBeginningOfSentenceArray); env->DeleteLocalRef(prevWordWordCodePointsArray); env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray); const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints(); jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size()); JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */, targetWordCodePoints->size(), targetWordCodePoints->data(), targetWordCodePoints->size(), false /* needsNullTermination */); env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray); env->DeleteLocalRef(targetWordCodePointArray); const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo(); int bigramProbabilityInfo[] = {ngramProperty.getProbability(), ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(), Loading @@ -60,7 +84,7 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); env->CallBooleanMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray); env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray); env->DeleteLocalRef(bigramProbabilityInfoArray); } Loading native/jni/src/suggest/core/dictionary/property/word_property.h +4 −2 Original line number Diff line number Diff line Loading @@ -39,8 +39,10 @@ class WordProperty { mNgrams(*ngrams) {} void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const; jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const; const UnigramProperty *getUnigramProperty() const { return &mUnigramProperty; Loading Loading
java/src/com/android/inputmethod/latin/makedict/WordProperty.java +13 −6 Original line number Diff line number Diff line Loading @@ -87,7 +87,7 @@ public final class WordProperty implements Comparable<WordProperty> { final boolean isPossiblyOffensive, final boolean hasBigram, final boolean hasShortcuts, final boolean isBeginningOfSentence, final int[] probabilityInfo, final ArrayList<int[][]> ngramPrevWordsArray, final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray, final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray, final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo, final ArrayList<int[]> shortcutTargets, final ArrayList<Integer> shortcutProbabilities) { Loading @@ -102,16 +102,22 @@ public final class WordProperty implements Comparable<WordProperty> { mHasNgrams = hasBigram; final int relatedNgramCount = ngramTargets.size(); final WordInfo currentWordInfo = mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO : new WordInfo(mWord); final NgramContext ngramContext = new NgramContext(currentWordInfo); for (int i = 0; i < relatedNgramCount; i++) { final String ngramTargetString = StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i)); final WeightedString ngramTarget = new WeightedString(ngramTargetString, createProbabilityInfoFromArray(ngramProbabilityInfo.get(i))); // TODO: Support n-gram. final int[][] prevWords = ngramPrevWordsArray.get(i); final boolean[] isBeginningOfSentenceArray = ngramPrevWordIsBeginningOfSentenceArray.get(i); final WordInfo[] wordInfoArray = new WordInfo[prevWords.length]; for (int j = 0; j < prevWords.length; j++) { wordInfoArray[j] = isBeginningOfSentenceArray[j] ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray( prevWords[j])); } final NgramContext ngramContext = new NgramContext(wordInfoArray); ngrams.add(new NgramProperty(ngramTarget, ngramContext)); } mNgrams = ngrams.isEmpty() ? null : ngrams; Loading @@ -126,6 +132,7 @@ public final class WordProperty implements Comparable<WordProperty> { } // TODO: Remove @UsedForTesting public ArrayList<WeightedString> getBigrams() { if (null == mNgrams) { return null; Loading
java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java +14 −4 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ package com.android.inputmethod.latin.utils; import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.NgramProperty; import com.android.inputmethod.latin.makedict.ProbabilityInfo; import com.android.inputmethod.latin.makedict.WeightedString; import com.android.inputmethod.latin.makedict.WordProperty; Loading @@ -26,6 +27,8 @@ import java.util.HashMap; public class CombinedFormatUtils { public static final String DICTIONARY_TAG = "dictionary"; public static final String BIGRAM_TAG = "bigram"; public static final String NGRAM_TAG = "ngram"; public static final String NGRAM_PREV_WORD_TAG = "prev_word"; public static final String SHORTCUT_TAG = "shortcut"; public static final String PROBABILITY_TAG = "f"; public static final String HISTORICAL_INFO_TAG = "historicalInfo"; Loading Loading @@ -76,12 +79,19 @@ public class CombinedFormatUtils { } } if (wordProperty.mHasNgrams) { // TODO: Support ngram. for (final WeightedString bigram : wordProperty.getBigrams()) { builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord); for (final NgramProperty ngramProperty : wordProperty.mNgrams) { builder.append(" " + NGRAM_TAG + "=" + ngramProperty.mTargetWord.mWord); builder.append(","); builder.append(formatProbabilityInfo(bigram.mProbabilityInfo)); builder.append(formatProbabilityInfo(ngramProperty.mTargetWord.mProbabilityInfo)); builder.append("\n"); for (int i = 0; i < ngramProperty.mNgramContext.getPrevWordCount(); i++) { builder.append(" " + NGRAM_PREV_WORD_TAG + "[" + i + "]=" + ngramProperty.mNgramContext.getNthPrevWord(i + 1)); if (ngramProperty.mNgramContext.isNthPrevWordBeginningOfSontence(i + 1)) { builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true"); } builder.append("\n"); } } } return builder.toString(); Loading
native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +3 −2 Original line number Diff line number Diff line Loading @@ -327,8 +327,8 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject /* outNgramPrevWordsArray */, jobject /* outNgramPrevWordIsBeginningOfSentenceArray */, jobject outNgramTargets, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, jobject outNgramProbabilityInfo, jobject outShortcutTargets, jobject outShortcutProbabilities) { Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Loading @@ -352,6 +352,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, const WordProperty wordProperty = dictionary->getWordProperty( CodePointArrayView(wordCodePoints, codePointCount)); wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo, outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets, outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); } Loading
native/jni/src/suggest/core/dictionary/property/word_property.cpp +36 −12 Original line number Diff line number Diff line Loading @@ -22,8 +22,9 @@ namespace latinime { void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, jobject outShortcutTargets, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const { JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), Loading @@ -43,16 +44,39 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jclass arrayListClass = env->FindClass("java/util/ArrayList"); jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); // Output bigrams. // TODO: Support n-gram // Output ngrams. jclass intArrayClass = env->FindClass("[I"); for (const auto &ngramProperty : mNgrams) { const std::vector<int> *const word1CodePoints = ngramProperty.getTargetCodePoints(); jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size()); JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */, word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(), const NgramContext *const ngramContext = ngramProperty.getNgramContext(); jobjectArray prevWordWordCodePointsArray = env->NewObjectArray( ngramContext->getPrevWordCount(), intArrayClass, nullptr); jbooleanArray prevWordIsBeginningOfSentenceArray = env->NewBooleanArray(ngramContext->getPrevWordCount()); for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) { const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1); jintArray prevWordCodePoints = env->NewIntArray(codePoints.size()); JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */, codePoints.size(), codePoints.data(), codePoints.size(), false /* needsNullTermination */); env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); env->DeleteLocalRef(bigramWord1CodePointArray); env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints); env->DeleteLocalRef(prevWordCodePoints); JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i, ngramContext->isNthPrevWordBeginningOfSentence(i + 1)); } env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray); env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId, prevWordIsBeginningOfSentenceArray); env->DeleteLocalRef(prevWordWordCodePointsArray); env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray); const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints(); jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size()); JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */, targetWordCodePoints->size(), targetWordCodePoints->data(), targetWordCodePoints->size(), false /* needsNullTermination */); env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray); env->DeleteLocalRef(targetWordCodePointArray); const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo(); int bigramProbabilityInfo[] = {ngramProperty.getProbability(), ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(), Loading @@ -60,7 +84,7 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); env->CallBooleanMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray); env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray); env->DeleteLocalRef(bigramProbabilityInfoArray); } Loading
native/jni/src/suggest/core/dictionary/property/word_property.h +4 −2 Original line number Diff line number Diff line Loading @@ -39,8 +39,10 @@ class WordProperty { mNgrams(*ngrams) {} void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const; jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets, jobject outShortcutProbabilities) const; const UnigramProperty *getUnigramProperty() const { return &mUnigramProperty; Loading