Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 010909d4 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Support dumping ngram entries."

parents 90aa229f b5ef884f
Loading
Loading
Loading
Loading
+13 −6
Original line number Diff line number Diff line
@@ -87,7 +87,7 @@ public final class WordProperty implements Comparable<WordProperty> {
            final boolean isPossiblyOffensive, final boolean hasBigram, final boolean hasShortcuts,
            final boolean isBeginningOfSentence, final int[] probabilityInfo,
            final ArrayList<int[][]> ngramPrevWordsArray,
            final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
            final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray,
            final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo,
            final ArrayList<int[]> shortcutTargets,
            final ArrayList<Integer> shortcutProbabilities) {
@@ -102,16 +102,22 @@ public final class WordProperty implements Comparable<WordProperty> {
        mHasNgrams = hasBigram;

        final int relatedNgramCount = ngramTargets.size();
        final WordInfo currentWordInfo =
                mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
                        : new WordInfo(mWord);
        final NgramContext ngramContext = new NgramContext(currentWordInfo);
        for (int i = 0; i < relatedNgramCount; i++) {
            final String ngramTargetString =
                    StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i));
            final WeightedString ngramTarget = new WeightedString(ngramTargetString,
                    createProbabilityInfoFromArray(ngramProbabilityInfo.get(i)));
            // TODO: Support n-gram.
            final int[][] prevWords = ngramPrevWordsArray.get(i);
            final boolean[] isBeginningOfSentenceArray =
                    ngramPrevWordIsBeginningOfSentenceArray.get(i);
            final WordInfo[] wordInfoArray = new WordInfo[prevWords.length];
            for (int j = 0; j < prevWords.length; j++) {
                wordInfoArray[j] = isBeginningOfSentenceArray[j]
                        ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
                        : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray(
                                prevWords[j]));
            }
            final NgramContext ngramContext = new NgramContext(wordInfoArray);
            ngrams.add(new NgramProperty(ngramTarget, ngramContext));
        }
        mNgrams = ngrams.isEmpty() ? null : ngrams;
@@ -126,6 +132,7 @@ public final class WordProperty implements Comparable<WordProperty> {
    }

    // TODO: Remove
    @UsedForTesting
    public ArrayList<WeightedString> getBigrams() {
        if (null == mNgrams) {
            return null;
+14 −4
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
package com.android.inputmethod.latin.utils;

import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.makedict.NgramProperty;
import com.android.inputmethod.latin.makedict.ProbabilityInfo;
import com.android.inputmethod.latin.makedict.WeightedString;
import com.android.inputmethod.latin.makedict.WordProperty;
@@ -26,6 +27,8 @@ import java.util.HashMap;
public class CombinedFormatUtils {
    public static final String DICTIONARY_TAG = "dictionary";
    public static final String BIGRAM_TAG = "bigram";
    public static final String NGRAM_TAG = "ngram";
    public static final String NGRAM_PREV_WORD_TAG = "prev_word";
    public static final String SHORTCUT_TAG = "shortcut";
    public static final String PROBABILITY_TAG = "f";
    public static final String HISTORICAL_INFO_TAG = "historicalInfo";
@@ -76,12 +79,19 @@ public class CombinedFormatUtils {
            }
        }
        if (wordProperty.mHasNgrams) {
            // TODO: Support ngram.
            for (final WeightedString bigram : wordProperty.getBigrams()) {
                builder.append("  " + BIGRAM_TAG + "=" + bigram.mWord);
            for (final NgramProperty ngramProperty : wordProperty.mNgrams) {
                builder.append(" " + NGRAM_TAG + "=" + ngramProperty.mTargetWord.mWord);
                builder.append(",");
                builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
                builder.append(formatProbabilityInfo(ngramProperty.mTargetWord.mProbabilityInfo));
                builder.append("\n");
                for (int i = 0; i < ngramProperty.mNgramContext.getPrevWordCount(); i++) {
                    builder.append("  " + NGRAM_PREV_WORD_TAG + "[" + i + "]="
                            + ngramProperty.mNgramContext.getNthPrevWord(i + 1));
                    if (ngramProperty.mNgramContext.isNthPrevWordBeginningOfSontence(i + 1)) {
                        builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true");
                    }
                    builder.append("\n");
                }
            }
        }
        return builder.toString();
+3 −2
Original line number Diff line number Diff line
@@ -327,8 +327,8 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,

static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
        jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject /* outNgramPrevWordsArray */,
        jobject /* outNgramPrevWordIsBeginningOfSentenceArray */, jobject outNgramTargets,
        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
        jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
        jobject outNgramProbabilityInfo, jobject outShortcutTargets,
        jobject outShortcutProbabilities) {
    Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
@@ -352,6 +352,7 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
    const WordProperty wordProperty = dictionary->getWordProperty(
            CodePointArrayView(wordCodePoints, codePointCount));
    wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
            outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
            outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
            outShortcutProbabilities);
}
+36 −12
Original line number Diff line number Diff line
@@ -22,8 +22,9 @@
namespace latinime {

void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
        jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
        jobject outBigramProbabilities, jobject outShortcutTargets,
        jbooleanArray outFlags, jintArray outProbabilityInfo,
        jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray,
        jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets,
        jobject outShortcutProbabilities) const {
    JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
            MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(),
@@ -43,16 +44,39 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
    jclass arrayListClass = env->FindClass("java/util/ArrayList");
    jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");

    // Output bigrams.
    // TODO: Support n-gram
    // Output ngrams.
    jclass intArrayClass = env->FindClass("[I");
    for (const auto &ngramProperty : mNgrams) {
        const std::vector<int> *const word1CodePoints = ngramProperty.getTargetCodePoints();
        jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
        JniDataUtils::outputCodePoints(env, bigramWord1CodePointArray, 0 /* start */,
                word1CodePoints->size(), word1CodePoints->data(), word1CodePoints->size(),
        const NgramContext *const ngramContext = ngramProperty.getNgramContext();
        jobjectArray prevWordWordCodePointsArray = env->NewObjectArray(
                ngramContext->getPrevWordCount(), intArrayClass, nullptr);
        jbooleanArray prevWordIsBeginningOfSentenceArray =
                env->NewBooleanArray(ngramContext->getPrevWordCount());
        for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) {
            const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1);
            jintArray prevWordCodePoints = env->NewIntArray(codePoints.size());
            JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */,
                    codePoints.size(), codePoints.data(), codePoints.size(),
                    false /* needsNullTermination */);
        env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
        env->DeleteLocalRef(bigramWord1CodePointArray);
            env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints);
            env->DeleteLocalRef(prevWordCodePoints);
            JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i,
                    ngramContext->isNthPrevWordBeginningOfSentence(i + 1));
        }
        env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray);
        env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId,
                prevWordIsBeginningOfSentenceArray);
        env->DeleteLocalRef(prevWordWordCodePointsArray);
        env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray);

        const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints();
        jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size());
        JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */,
                targetWordCodePoints->size(), targetWordCodePoints->data(),
                targetWordCodePoints->size(), false /* needsNullTermination */);
        env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray);
        env->DeleteLocalRef(targetWordCodePointArray);

        const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
        int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
                ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
@@ -60,7 +84,7 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
        jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
        env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
                NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
        env->CallBooleanMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray);
        env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray);
        env->DeleteLocalRef(bigramProbabilityInfoArray);
    }

+4 −2
Original line number Diff line number Diff line
@@ -39,8 +39,10 @@ class WordProperty {
              mNgrams(*ngrams) {}

    void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
            jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
            jobject outShortcutTargets, jobject outShortcutProbabilities) const;
            jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
            jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
            jobject outNgramProbabilities, jobject outShortcutTargets,
            jobject outShortcutProbabilities) const;

    const UnigramProperty *getUnigramProperty() const {
        return &mUnigramProperty;
Loading