Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8890b015 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Remove unigram for v402 with historical info.

Bug: 15531638
Change-Id: If1b73ac693e45a80df987ea16b2dece5597697e8
parent 82be1873
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -425,6 +425,18 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
    return true;
}

bool Ver4PatriciaTrieNodeWriter::suppressUnigramEntry(const PtNodeParams *const ptNodeParams) {
    if (!mHeaderPolicy->hasHistoricalInfoOfWords()) {
        // Require historical info to suppress unigram entry.
        return false;
    }
    const HistoricalInfo suppressedHistorycalInfo(0 /* timestamp */, 0 /* level */, 0 /* count */);
    const ProbabilityEntry probabilityEntryToWrite =
            ProbabilityEntry().createEntryWithUpdatedHistoricalInfo(&suppressedHistorycalInfo);
    return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
            ptNodeParams->getTerminalId(), &probabilityEntryToWrite);
}

} // namespace v402
} // namespace backward
} // namespace latinime
+5 −0
Original line number Diff line number Diff line
@@ -111,6 +111,11 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {

    bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);

    // Suppress unigram not to use the word for generating suggestions. So, this method can be used
    // only for dictionaries with historical info. Also, suppressed entries are included in unigram
    // count. They will be removed from the dictionary during GC.
    bool suppressUnigramEntry(const PtNodeParams *const ptNodeParams);

 private:
    DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);

+14 −0
Original line number Diff line number Diff line
@@ -258,6 +258,20 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
    }
}

bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
    if (!mBuffers->isUpdatable()) {
        AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
        return false;
    }
    const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
            false /* forceLowerCaseSearch */);
    if (ptNodePos == NOT_A_DICT_POS) {
        return false;
    }
    const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
}

bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
        const BigramProperty *const bigramProperty) {
    if (!mBuffers->isUpdatable()) {
+1 −4
Original line number Diff line number Diff line
@@ -108,10 +108,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    bool addUnigramEntry(const int *const word, const int length,
            const UnigramProperty *const unigramProperty);

    bool removeUnigramEntry(const int *const word, const int length) {
        // Removing unigram entry is not supported.
        return false;
    }
    bool removeUnigramEntry(const int *const word, const int length);

    bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
            const BigramProperty *const bigramProperty);
+32 −0
Original line number Diff line number Diff line
@@ -689,4 +689,36 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
        binaryDictionary.close();
        dictFile.delete();
    }

    public void testRemoveUnigrams() {
        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
            testRemoveUnigrams(formatVersion);
        }
    }

    private void testRemoveUnigrams(final int formatVersion) {
        final int unigramInputCount = 20;
        setCurrentTimeForTestMode(mCurrentTime);
        File dictFile = null;
        try {
            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
        } catch (IOException e) {
            fail("IOException while writing an initial dictionary : " + e);
        }
        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);

        addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
        assertFalse(binaryDictionary.isValidWord("aaa"));
        for (int i = 0; i < unigramInputCount; i++) {
            addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
        }
        assertTrue(binaryDictionary.isValidWord("aaa"));
        assertTrue(binaryDictionary.removeUnigramEntry("aaa"));
        assertFalse(binaryDictionary.isValidWord("aaa"));

        binaryDictionary.close();
        dictFile.delete();
    }
}