Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c63d1834 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Implement PatriciaTriePolicy::getWordProperty().

Bug: 12810574
Change-Id: I7bcccfd3641ebbcf2b8d857d33bb4734c42af5eb
parent fd9599e9
Loading
Loading
Loading
Loading
+60 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@@ -303,4 +304,63 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
    return siblingPos;
}

const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints,
        const int codePointCount) const {
    const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
            false /* forceLowerCaseSearch */);
    if (ptNodePos == NOT_A_DICT_POS) {
        AKLOGE("getWordProperty was called for invalid word.");
        return WordProperty();
    }
    const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
    std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
            ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
    // Fetch bigram information.
    std::vector<WordProperty::BigramProperty> bigrams;
    const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
    int bigramWord1CodePoints[MAX_WORD_LENGTH];
    BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos);
    while (bigramsIt.hasNext()) {
        // Fetch the next bigram information and forward the iterator.
        bigramsIt.next();
        // Skip the entry if the entry has been deleted. This never happens for ver2 dicts.
        if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
            int word1Probability = NOT_A_PROBABILITY;
            int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
                    bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints,
                    &word1Probability);
            std::vector<int> word1(bigramWord1CodePoints,
                    bigramWord1CodePoints + word1CodePointCount);
            bigrams.push_back(WordProperty::BigramProperty(&word1, bigramsIt.getProbability(),
                    NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */));
        }
    }
    // Fetch shortcut information.
    std::vector<WordProperty::ShortcutProperty> shortcuts;
    int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
    if (shortcutPos != NOT_A_DICT_POS) {
        int shortcutTargetCodePoints[MAX_WORD_LENGTH];
        ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos);
        bool hasNext = true;
        while (hasNext) {
            const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
                    ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos);
            hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
            const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
                    mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
            std::vector<int> shortcutTarget(shortcutTargetCodePoints,
                    shortcutTargetCodePoints + shortcutTargetLength);
            const int shortcutProbability =
                    ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
            shortcuts.push_back(
                    WordProperty::ShortcutProperty(&shortcutTarget, shortcutProbability));
        }
    }
    return WordProperty(&codePointVector, ptNodeParams.isNotAWord(),
            ptNodeParams.isBlacklisted(), ptNodeParams.hasBigrams(),
            ptNodeParams.hasShortcutTargets(), ptNodeParams.getProbability(),
            NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */,
            &bigrams, &shortcuts);
}

} // namespace latinime
+1 −4
Original line number Diff line number Diff line
@@ -128,10 +128,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    }

    const WordProperty getWordProperty(const int *const codePoints,
            const int codePointCount) const {
        // getWordProperty is not supported.
        return WordProperty();
    }
            const int codePointCount) const;

    int getNextWordAndNextToken(const int token, int *const outCodePoints) {
        // getNextWordAndNextToken is not supported.
+36 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
@@ -596,4 +597,39 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
            Log.d(TAG, result);
        }
    }

    public void testVer2DictGetWordProperty() {
        final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
        final ArrayList<String> words = sWords;
        final HashMap<String, List<String>> shortcuts = sShortcuts;
        final String dictName = "testGetWordProperty";
        final String dictVersion = Long.toString(System.currentTimeMillis());
        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
        addUnigrams(words.size(), dict, words, shortcuts);
        addBigrams(dict, words, sEmptyBigrams);
        final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
                getContext().getCacheDir());
        file.delete();
        timeWritingDictToFile(file, dict, formatOptions);
        final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
                0 /* offset */, file.length(), true /* useFullEditDistance */,
                Locale.ENGLISH, dictName, false /* isUpdatable */);
        for (final String word : words) {
            final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
            assertEquals(word, wordProperty.mWord);
            assertEquals(UNIGRAM_FREQ, wordProperty.getProbability());
            if (shortcuts.containsKey(word)) {
                assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size());
                final List<String> shortcutList = shortcuts.get(word);
                assertTrue(wordProperty.mHasShortcuts);
                for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
                    assertTrue(shortcutList.contains(shortcutTarget.mWord));
                    assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
                    shortcutList.remove(shortcutTarget.mWord);
                }
                assertTrue(shortcutList.isEmpty());
            }
        }
    }
}