Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 459cd6f8 authored by Jean Chalard's avatar Jean Chalard
Browse files

Implement the heuristic for auto-commit.

Bug: 9059617
Change-Id: I066abf018df5aaeabf415425dd822ebe233e6008
parent cc81a93b
Loading
Loading
Loading
Loading
+4 −15
Original line number Diff line number Diff line
@@ -44,9 +44,9 @@ public final class BinaryDictionary extends Dictionary {
    private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;
    // Must be equal to MAX_RESULTS in native/jni/src/defines.h
    private static final int MAX_RESULTS = 18;
    // Required space count for auto commit.
    // TODO: Remove this heuristic.
    private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3;
    // The cutoff returned by native for auto-commit confidence.
    // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
    private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;

    @UsedForTesting
    public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
@@ -343,18 +343,7 @@ public final class BinaryDictionary extends Dictionary {

    @Override
    public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
        // TODO: actually use the confidence rather than use this completely broken heuristic
        final String word = candidate.mWord;
        final int length = word.length();
        int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT;
        for (int i = 0; i < length; ++i) {
            // This is okay because no low-surrogate and no high-surrogate can ever match the
            // space character, so we don't need to take care of iterating on code points.
            if (Constants.CODE_SPACE == word.charAt(i)) {
                if (0 >= --remainingSpaces) return true;
            }
        }
        return false;
        return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
    }

    @Override
+11 −1
Original line number Diff line number Diff line
@@ -298,9 +298,19 @@ static inline void prof_out(void) {
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
#define NOT_A_DICT_POS (S_INT_MIN)

// A special value to mean the first word confidence makes no sense in this case,
// e.g. this is not a multi-word suggestion.
#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MIN)
#define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MAX)
// How high the confidence needs to be for us to auto-commit. Arbitrary.
// This needs to be the same as CONFIDENCE_FOR_AUTO_COMMIT in BinaryDictionary.java
#define CONFIDENCE_FOR_AUTO_COMMIT (1000000)
// 80% of the full confidence
#define DISTANCE_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100)
// 100% of the full confidence
#define LENGTH_WEIGHT_FOR_AUTO_COMMIT (CONFIDENCE_FOR_AUTO_COMMIT)
// 80% of the full confidence
#define SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100)

#define KEYCODE_SPACE ' '
#define KEYCODE_SINGLE_QUOTE '\''
+10 −0
Original line number Diff line number Diff line
@@ -321,6 +321,16 @@ class DicNode {
        DUMP_WORD_AND_SCORE("OUTPUT");
    }

    // "Total" in this context (and other methods in this class) means the whole suggestion. When
    // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only
    // the one that corresponds to the last word of the suggestion, and all the previous words
    // are concatenated together in mPrevWord - which contains a space at the end.
    int getTotalNodeSpaceCount() const {
        if (isFirstWord()) return 0;
        return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
                mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength());
    }

    int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
        const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex();
        if (inputIndex == NOT_AN_INDEX) {
+54 −4
Original line number Diff line number Diff line
@@ -166,7 +166,11 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
    // TODO: have partial commit work even with multiple pointers.
    const bool outputSecondWordFirstLetterInputIndex =
            traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
    outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence();
    if (terminalSize > 0) {
        // If we have no suggestions, don't write this
        outputAutoCommitFirstWordConfidence[0] =
                computeFirstWordConfidence(&terminals[0]);
    }

    // Output suggestion results here
    for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
@@ -255,9 +259,55 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
    return outputWordIndex;
}

int Suggest::computeFirstWordConfidence() const {
    // TODO: implement this.
    return NOT_A_FIRST_WORD_CONFIDENCE;
int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const {
    // Get the number of spaces in the first suggestion
    const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
    // Get the number of characters in the first suggestion
    const int length = terminalDicNode->getTotalNodeCodePointCount();
    // Get the distance for the first word of the suggestion
    const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();

    // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
    // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
    // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
    // we are very confident.
    // Expected space count is 1 ~ 5
    static const int MIN_EXPECTED_SPACE_COUNT = 1;
    static const int MAX_EXPECTED_SPACE_COUNT = 5;
    // Expected length is about 4 ~ 30
    static const int MIN_EXPECTED_LENGTH = 4;
    static const int MAX_EXPECTED_LENGTH = 30;
    // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
    static const float MIN_EXPECTED_DISTANCE = 0.0;
    static const float MAX_EXPECTED_DISTANCE = 2.0;
    // This is not strict: it's where most stuff will be falling, but it's still fine if it's
    // outside these values. We want to output a value that reflects all of these. Each factor
    // contributes a bit.

    // We need at least a space.
    if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;

    // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
    // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
    // weight of the distance. Clamp to avoid overflows.
    const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
            : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
    const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
            * (MAX_EXPECTED_DISTANCE - clampedDistance)
            / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
    // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
    // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
    // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
    const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
            * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
    // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
    // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
    // weight of the space count.
    const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
            * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
            / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);

    return distanceContribution + lengthContribution + spaceContribution;
}

/**
+1 −1
Original line number Diff line number Diff line
@@ -58,7 +58,7 @@ class Suggest : public SuggestInterface {
    int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
            int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
            int *outputAutoCommitFirstWordConfidence) const;
    int computeFirstWordConfidence() const;
    int computeFirstWordConfidence(const DicNode *const terminalDicNode) const;
    void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const;
    void expandCurrentDicNodes(DicTraverseSession *traverseSession) const;
    void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
Loading