Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9221772a authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Get bigram information via getWordProperty().

Bug: 12810574
Change-Id: I2750a5659ccbc3e31307c28e35dd9a1dbdffbeee
parent 08d72c52
Loading
Loading
Loading
Loading
+21 −1
Original line number Diff line number Diff line
@@ -34,7 +34,27 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
    jclass arrayListClass = env->FindClass("java/util/ArrayList");
    jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");

    // TODO: Output bigrams.
    // Output bigrams.
    const int bigramCount = mBigrams.size();
    for (int i = 0; i < bigramCount; ++i) {
        const BigramProperty *const bigramProperty = &mBigrams[i];
        const std::vector<int> *const word1CodePoints = bigramProperty->getTargetCodePoints();
        jintArray bigramWord1CodePointArray = env->NewIntArray(word1CodePoints->size());
        env->SetIntArrayRegion(bigramWord1CodePointArray, 0 /* start */,
                word1CodePoints->size(), &word1CodePoints->at(0));
        env->CallVoidMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
        env->DeleteLocalRef(bigramWord1CodePointArray);

        int bigramProbabilityInfo[] = {bigramProperty->getProbability(),
                bigramProperty->getTimestamp(), bigramProperty->getLevel(),
                bigramProperty->getCount()};
        jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
        env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
                NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
        env->CallVoidMethod(outBigramProbabilities, addMethodId, bigramProbabilityInfoArray);
        env->DeleteLocalRef(bigramProbabilityInfoArray);
    }

    // Output shortcuts.
    const int shortcutTargetCount = mShortcuts.size();
    for (int i = 0; i < shortcutTargetCount; ++i) {
+20 −0
Original line number Diff line number Diff line
@@ -35,6 +35,26 @@ class WordProperty {
                : mTargetCodePoints(*targetCodePoints), mProbability(probability),
                  mTimestamp(timestamp), mLevel(level), mCount(count) {}

        const std::vector<int> *getTargetCodePoints() const {
            return &mTargetCodePoints;
        }

        int getProbability() const {
            return mProbability;
        }

        int getTimestamp() const {
            return mTimestamp;
        }

        int getLevel() const {
            return mLevel;
        }

        int getCount() const {
            return mCount;
        }

     private:
        std::vector<int> mTargetCodePoints;
        int mProbability;
+66 −11
Original line number Diff line number Diff line
@@ -878,7 +878,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
    private void testGetWordProperties(final int formatVersion) {
        final long seed = System.currentTimeMillis();
        final Random random = new Random(seed);
        final int ITERATION_COUNT = 1000;
        final int UNIGRAM_COUNT = 1000;
        final int BIGRAM_COUNT = 1000;
        final int codePointSetSize = 20;
        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);

@@ -895,7 +896,13 @@ public class BinaryDictionaryTests extends AndroidTestCase {
        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
        assertFalse(invalidWordProperty.isValid());

        for (int i = 0; i < ITERATION_COUNT; i++) {
        final ArrayList<String> words = new ArrayList<String>();
        final HashMap<String, Integer> wordProbabilities = new HashMap<String, Integer>();
        final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>();
        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
                new HashMap<Pair<String, String>, Integer>();

        for (int i = 0; i < UNIGRAM_COUNT; i++) {
            final String word = CodePointUtils.generateWord(random, codePointSet);
            final int unigramProbability = random.nextInt(0xFF);
            final boolean isNotAWord = random.nextBoolean();
@@ -904,15 +911,63 @@ public class BinaryDictionaryTests extends AndroidTestCase {
            binaryDictionary.addUnigramWord(word, unigramProbability,
                    null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
                    isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
            final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
            assertEquals(word, wordProperty.mCodePoints);
            assertTrue(wordProperty.isValid());
            assertEquals(isNotAWord, wordProperty.mIsNotAWord);
            assertEquals(isBlacklisted, wordProperty.mIsBlacklisted);
            assertEquals(false, wordProperty.mHasBigrams);
            assertEquals(false, wordProperty.mHasShortcuts);
            assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
            assertTrue(wordProperty.mShortcutTargets.isEmpty());
            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
                binaryDictionary.flushWithGC();
            }
            words.add(word);
            wordProbabilities.put(word, unigramProbability);
            final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
            assertEquals(word, unigramProperty.mCodePoints);
            assertTrue(unigramProperty.isValid());
            assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
            assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted);
            assertEquals(false, unigramProperty.mHasBigrams);
            assertEquals(false, unigramProperty.mHasShortcuts);
            assertEquals(unigramProbability, unigramProperty.mProbabilityInfo.mProbability);
            assertTrue(unigramProperty.mShortcutTargets.isEmpty());
        }

        for (int i = 0; i < BIGRAM_COUNT; i++) {
            final int word0Index = random.nextInt(wordProbabilities.size());
            final int word1Index = random.nextInt(wordProbabilities.size());
            if (word0Index == word1Index) {
                continue;
            }
            final String word0 = words.get(word0Index);
            final String word1 = words.get(word1Index);
            final int bigramProbability = random.nextInt(0xF);
            binaryDictionary.addBigramWords(word0, word1, bigramProbability,
                    BinaryDictionary.NOT_A_VALID_TIMESTAMP);
            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
                binaryDictionary.flushWithGC();
            }
            if (!bigrams.containsKey(word0)) {
                final HashSet<String> bigramWord1s = new HashSet<String>();
                bigrams.put(word0, bigramWord1s);
            }
            bigrams.get(word0).add(word1);
            bigramProbabilities.put(new Pair<String, String>(word0, word1), bigramProbability);
        }

        for (int i = 0; i < words.size(); i++) {
            final String word0 = words.get(i);
            if (!bigrams.containsKey(word0)) {
                continue;
            }
            final HashSet<String> bigramWord1s = bigrams.get(word0);
            final WordProperty unigramProperty = binaryDictionary.getWordProperty(word0);
            assertEquals(bigramWord1s.size(), unigramProperty.mBigramTargets.size());
            assertEquals(unigramProperty.mBigramTargets.size(),
                    unigramProperty.mBigramProbabilityInfo.size());
            for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) {
                final String word1 = unigramProperty.mBigramTargets.get(j).mWord;
                assertTrue(bigramWord1s.contains(word1));
                final int probability = unigramProperty.mBigramTargets.get(j).mFrequency;
                assertEquals((int)bigramProbabilities.get(new Pair<String, String>(word0, word1)),
                        probability);
                assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability,
                        probability);
            }
        }
    }