Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a4c1f1c1 authored by Yusuke Nojima's avatar Yusuke Nojima
Browse files

Calibrate the scores of the proximity chars according to the distances.

+1      75
-1      27
+2       0
-2       0
+3       0
-3       0
+4      30
-4      48
+5      37
-5      27
+6       4
-6      35
+7       9
-7      18

Change-Id: I3c6ab06a0084c18ab595147c36c2ff4b1e961a7b
parent ce9e4f92
Loading
Loading
Loading
Loading
+57 −56
Original line number Diff line number Diff line
@@ -118,7 +118,6 @@ bool Correction::initProcessState(const int outputIndex) {
    mInputIndex = mCorrectionStates[outputIndex].mInputIndex;
    mNeedsToTraverseAllNodes = mCorrectionStates[outputIndex].mNeedsToTraverseAllNodes;

    mSumOfDistance = mCorrectionStates[outputIndex].mSumOfDistance;
    mEquivalentCharCount = mCorrectionStates[outputIndex].mEquivalentCharCount;
    mProximityCount = mCorrectionStates[outputIndex].mProximityCount;
    mTransposedCount = mCorrectionStates[outputIndex].mTransposedCount;
@@ -174,7 +173,6 @@ void Correction::incrementOutputIndex() {
    mCorrectionStates[mOutputIndex].mInputIndex = mInputIndex;
    mCorrectionStates[mOutputIndex].mNeedsToTraverseAllNodes = mNeedsToTraverseAllNodes;

    mCorrectionStates[mOutputIndex].mSumOfDistance = mSumOfDistance;
    mCorrectionStates[mOutputIndex].mEquivalentCharCount = mEquivalentCharCount;
    mCorrectionStates[mOutputIndex].mProximityCount = mProximityCount;
    mCorrectionStates[mOutputIndex].mTransposedCount = mTransposedCount;
@@ -227,20 +225,26 @@ Correction::CorrectionType Correction::processCharAndCalcState(
    // TODO: Change the limit if we'll allow two or more corrections
    const bool noCorrectionsHappenedSoFar = correctionCount == 0;
    const bool canTryCorrection = noCorrectionsHappenedSoFar;
    int proximityIndex = 0;
    mDistances[mOutputIndex] = NOT_A_DISTANCE;

    if (mNeedsToTraverseAllNodes || isQuote(c)) {
        bool incremented = false;
        if (mLastCharExceeded && mInputIndex == mInputLength - 1) {
            // TODO: Do not check the proximity if EditDistance exceeds the threshold
            const ProximityInfo::ProximityType matchId =
                    mProximityInfo->getMatchedProximityId(mInputIndex, c, true);
                    mProximityInfo->getMatchedProximityId(mInputIndex, c, true, &proximityIndex);
            if (isEquivalentChar(matchId)) {
                mLastCharExceeded = false;
                --mExcessiveCount;
                mDistances[mOutputIndex] =
                        mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
            } else if (matchId == ProximityInfo::NEAR_PROXIMITY_CHAR) {
                mLastCharExceeded = false;
                --mExcessiveCount;
                ++mProximityCount;
                mDistances[mOutputIndex] =
                        mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
            }
            incrementInputIndex();
            incremented = true;
@@ -301,7 +305,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
    const bool checkProximityChars = noCorrectionsHappenedSoFar ||  mProximityCount == 0;
    ProximityInfo::ProximityType matchedProximityCharId = secondTransposing
            ? ProximityInfo::EQUIVALENT_CHAR
            : mProximityInfo->getMatchedProximityId(mInputIndex, c, checkProximityChars);
            : mProximityInfo->getMatchedProximityId(
                    mInputIndex, c, checkProximityChars, &proximityIndex);

    if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
        if (canTryCorrection && mOutputIndex > 0
@@ -323,8 +328,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
            // Here, we are doing something equivalent to matchedProximityCharId,
            // but we already know that "excessive char correction" just happened
            // so that we just need to check "mProximityCount == 0".
            matchedProximityCharId =
                    mProximityInfo->getMatchedProximityId(mInputIndex, c, mProximityCount == 0);
            matchedProximityCharId = mProximityInfo->getMatchedProximityId(
                    mInputIndex, c, mProximityCount == 0, &proximityIndex);
        }
    }

@@ -399,17 +404,12 @@ Correction::CorrectionType Correction::processCharAndCalcState(
    } else if (isEquivalentChar(matchedProximityCharId)) {
        mMatching = true;
        ++mEquivalentCharCount;
        if (mSumOfDistance != NOT_A_DISTANCE) {
            const int distance = mProximityInfo->getNormalizedSquaredDistance(mInputIndex);
            if (distance != NOT_A_DISTANCE) {
                mSumOfDistance += distance;
            } else {
                mSumOfDistance = NOT_A_DISTANCE;
            }
        }
        mDistances[mOutputIndex] = mProximityInfo->getNormalizedSquaredDistance(mInputIndex, 0);
    } else if (ProximityInfo::NEAR_PROXIMITY_CHAR == matchedProximityCharId) {
        mProximityMatching = true;
        ++mProximityCount;
        mDistances[mOutputIndex] =
                mProximityInfo->getNormalizedSquaredDistance(mInputIndex, proximityIndex);
    }

    mWord[mOutputIndex] = c;
@@ -583,8 +583,6 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
    const int transposedCount = correction->mTransposedCount / 2;
    const int excessiveCount = correction->mExcessiveCount + correction->mTransposedCount % 2;
    const int proximityMatchedCount = correction->mProximityCount;
    const int mSumOfDistance = correction->mSumOfDistance;
    const int mEquivalentCharCount = correction->mEquivalentCharCount;
    const bool lastCharExceeded = correction->mLastCharExceeded;
    const bool useFullEditDistance = correction->mUseFullEditDistance;
    const int outputLength = outputIndex + 1;
@@ -684,6 +682,41 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
        }
    }

    // Score calibration by touch coordinates is being done only for pure-fat finger typing error
    // cases.
    // TODO: Remove this constraint.
    if (CALIBRATE_SCORE_BY_TOUCH_COORDINATES && proximityInfo->touchPositionCorrectionEnabled()
            && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0) {
        for (int i = 0; i < outputLength; ++i) {
            const int squaredDistance = correction->mDistances[i];
            if (i < adjustedProximityMatchedCount) {
                multiplyIntCapped(typedLetterMultiplier, &finalFreq);
            }
            if (squaredDistance >= 0) {
                // Promote or demote the score according to the distance from the sweet spot
                static const float A = ZERO_DISTANCE_PROMOTION_RATE / 100.0f;
                static const float B = 1.0f;
                static const float C = 0.5f;
                static const float R1 = NEUTRAL_SCORE_SQUARED_RADIUS;
                static const float R2 = HALF_SCORE_SQUARED_RADIUS;
                const float x = (float)squaredDistance
                        / ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR;
                const float factor = (x < R1)
                    ? (A * (R1 - x) + B * x) / R1
                    : (B * (R2 - x) + C * (x - R1)) / (R2 - R1);
                // factor is piecewise linear function like:
                // A -_                  .
                //     ^-_               .
                // B      \              .
                //         \             .
                // C        \            .
                //   0   R1 R2
                multiplyRate((int)(factor * 100), &finalFreq);
            } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) {
                multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
            }
        }
    } else {
        // Promotion for a word with proximity characters
        for (int i = 0; i < adjustedProximityMatchedCount; ++i) {
            // A word with proximity corrections
@@ -693,38 +726,6 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
            multiplyIntCapped(typedLetterMultiplier, &finalFreq);
            multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
        }

    if (CALIBRATE_SCORE_BY_TOUCH_COORDINATES
            && mEquivalentCharCount > 0 && mSumOfDistance != NOT_A_DISTANCE) {
        // Let (x, y) be the coordinate of a user's touch, and let c be a key.
        // Assuming users' touch distribution is gauss distribution, the conditional probability of
        // the user touching (x, y) given he or she intends to hit c is:
        //   p(x, y | c) = exp(-(x - m_x) / (2 * s^2)) / (sqrt(2 * pi) * s)
        //               * exp(-(y - m_y) / (2 * s^2)) / (sqrt(2 * pi) * s)
        // where (m_x, m_y) is a mean of touches of c, and s is a variance of touches of c.
        // If user touches c1, c2, .., cn, the joint distribution is
        //   p(x1, y1 | c1) * p(x2, y2 | c2) * ... * p(xn, yn | cn)
        // We consider the logarithm of this value, that is
        //     sum_i log p(x_i, y_i | c_i) + const
        //   = sum_i ((x_i - m_x)^2 + (y_i - m_y)^2) / (2 * s^2) + const
        // Thus, we use the sum of squared distance as a score of the word.
        static const int UPPER = WORDS_WITH_EQUIVALENT_CHAR_STRONGEST_PROMOTION_RATE;
        static const int LOWER = WORDS_WITH_EQUIVALENT_CHAR_WEAKEST_DEMOTION_RATE;
        static const int MIDDLE = 100;
        static const int SHIFT = ProximityInfo::NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR_LOG_2;
        const int expected = mEquivalentCharCount << SHIFT;
        // factor is a function as described below:
        // U\            .
        //   \           .
        // M  \          .
        //     \         .
        // L    \------- .
        //  0 e
        // (x-axis is mSumOfDistance, y-axis is rate,
        //  and e, U, M, L are expected, UPPER, MIDDLE, LOWER respectively.
        const int factor =
                max((UPPER * expected - (UPPER - MIDDLE) * mSumOfDistance) / expected, LOWER);
        multiplyRate(factor, &finalFreq);
    }

    const int errorCount = adjustedProximityMatchedCount > 0
+4 −1
Original line number Diff line number Diff line
@@ -115,7 +115,11 @@ private:
    int mMissingSpacePos;
    int mTerminalInputIndex;
    int mTerminalOutputIndex;

    // The following arrays are state buffer.
    unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
    int mDistances[MAX_WORD_LENGTH_INTERNAL];

    // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
    // Caveat: Do not create multiple tables per thread as this table eats up RAM a lot.
    int mEditDistanceTable[(MAX_WORD_LENGTH_INTERNAL + 1) * (MAX_WORD_LENGTH_INTERNAL + 1)];
@@ -128,7 +132,6 @@ private:
    int mInputIndex;

    int mEquivalentCharCount;
    int mSumOfDistance;
    int mProximityCount;
    int mExcessiveCount;
    int mTransposedCount;
+0 −2
Original line number Diff line number Diff line
@@ -29,7 +29,6 @@ struct CorrectionState {
    uint16_t mChildCount;
    uint8_t mInputIndex;

    int32_t mSumOfDistance;
    uint8_t mEquivalentCharCount;
    uint8_t mProximityCount;
    uint8_t mTransposedCount;
@@ -65,7 +64,6 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos
    state->mExcessivePos = -1;
    state->mSkipPos = -1;

    state->mSumOfDistance = 0;
    state->mEquivalentCharCount = 0;
    state->mProximityCount = 0;
    state->mTransposedCount = 0;
+6 −2
Original line number Diff line number Diff line
@@ -163,6 +163,9 @@ static void dumpWord(const unsigned short* word, const int length) {
#define NOT_VALID_WORD -99
#define NOT_A_CHARACTER -1
#define NOT_A_DISTANCE -1
#define EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO -2
#define PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO -3
#define NOT_A_INDEX -1

#define KEYCODE_SPACE ' '

@@ -181,8 +184,6 @@ static void dumpWord(const unsigned short* word, const int length) {
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
#define WORDS_WITH_EQUIVALENT_CHAR_STRONGEST_PROMOTION_RATE 110
#define WORDS_WITH_EQUIVALENT_CHAR_WEAKEST_DEMOTION_RATE 90
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
#define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
@@ -191,6 +192,9 @@ static void dumpWord(const unsigned short* word, const int length) {
#define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70
#define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96
#define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50
#define ZERO_DISTANCE_PROMOTION_RATE 110
#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
#define HALF_SCORE_SQUARED_RADIUS 32.0f

// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions.
+54 −29
Original line number Diff line number Diff line
@@ -44,13 +44,21 @@ ProximityInfo::ProximityInfo(const int maxProximityCharsSize, const int keyboard
          CELL_WIDTH((keyboardWidth + gridWidth - 1) / gridWidth),
          CELL_HEIGHT((keyboardHeight + gridHeight - 1) / gridHeight),
          KEY_COUNT(min(keyCount, MAX_KEY_COUNT_IN_A_KEYBOARD)),
          mInputXCoordinates(NULL), mInputYCoordinates(NULL) {
          HAS_TOUCH_POSITION_CORRECTION_DATA(keyCount > 0 && keyXCoordinates && keyYCoordinates
                  && keyWidths && keyHeights && keyCharCodes && sweetSpotCenterXs
                  && sweetSpotCenterYs && sweetSpotRadii),
          mInputXCoordinates(NULL), mInputYCoordinates(NULL),
          mTouchPositionCorrectionEnabled(false) {
    const int len = GRID_WIDTH * GRID_HEIGHT * MAX_PROXIMITY_CHARS_SIZE;
    mProximityCharsArray = new uint32_t[len];
    mNormalizedSquaredDistances = new int[len];
    if (DEBUG_PROXIMITY_INFO) {
        LOGI("Create proximity info array %d", len);
    }
    memcpy(mProximityCharsArray, proximityCharsArray, len * sizeof(mProximityCharsArray[0]));
    for (int i = 0; i < len; ++i) {
        mNormalizedSquaredDistances[i] = NOT_A_DISTANCE;
    }

    copyOrFillZero(mKeyXCoordinates, keyXCoordinates, KEY_COUNT * sizeof(mKeyXCoordinates[0]));
    copyOrFillZero(mKeyYCoordinates, keyYCoordinates, KEY_COUNT * sizeof(mKeyYCoordinates[0]));
@@ -79,6 +87,7 @@ void ProximityInfo::initializeCodeToKeyIndex() {
}

ProximityInfo::~ProximityInfo() {
    delete[] mNormalizedSquaredDistances;
    delete[] mProximityCharsArray;
}

@@ -109,52 +118,61 @@ void ProximityInfo::setInputParams(const int* inputCodes, const int inputLength,
    mInputCodes = inputCodes;
    mInputXCoordinates = xCoordinates;
    mInputYCoordinates = yCoordinates;
    mTouchPositionCorrectionEnabled =
            HAS_TOUCH_POSITION_CORRECTION_DATA && xCoordinates && yCoordinates;
    mInputLength = inputLength;
    for (int i = 0; i < inputLength; ++i) {
        mPrimaryInputWord[i] = getPrimaryCharAt(i);
    }
    mPrimaryInputWord[inputLength] = 0;
    for (int i = 0; i < mInputLength; ++i) {
        float normalizedSquaredDistance = calculateNormalizedSquaredDistance(i);
        if (normalizedSquaredDistance >= 0.0f) {
            mNormalizedSquaredDistance[i] =
                (int)(normalizedSquaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
        const int *proximityChars = getProximityCharsAt(i);
        for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE && proximityChars[j] > 0; ++j) {
            const int currentChar = proximityChars[j];
            const int keyIndex = getKeyIndex(currentChar);
            const float squaredDistance = calculateNormalizedSquaredDistance(keyIndex, i);
            if (squaredDistance >= 0.0f) {
                mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] =
                        (int)(squaredDistance * NORMALIZED_SQUARED_DISTANCE_SCALING_FACTOR);
            } else {
            mNormalizedSquaredDistance[i] = NOT_A_DISTANCE;
                mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE + j] = (j == 0)
                        ? EQUIVALENT_CHAR_WITHOUT_DISTANCE_INFO
                        : PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO;
            }
        }
    }
}

inline float square(const float x) { return x * x; }

float ProximityInfo::calculateNormalizedSquaredDistance(int index) const {
float ProximityInfo::calculateNormalizedSquaredDistance(
        const int keyIndex, const int inputIndex) const {
    static const float NOT_A_DISTANCE_FLOAT = -1.0f;
    if (KEY_COUNT == 0 || !mInputXCoordinates || !mInputYCoordinates) {
        // We do not have the coordinate data
    if (keyIndex == NOT_A_INDEX) {
        return NOT_A_DISTANCE_FLOAT;
    }
    const int currentChar = getPrimaryCharAt(index);
    const unsigned short baseLowerC = Dictionary::toBaseLowerCase(currentChar);
    if (baseLowerC > MAX_CHAR_CODE) {
    if (!hasSweetSpotData(keyIndex)) {
        return NOT_A_DISTANCE_FLOAT;
    }
    const int keyIndex = mCodeToKeyIndex[baseLowerC];
    if (keyIndex < 0) {
        return NOT_A_DISTANCE_FLOAT;
    const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(keyIndex, inputIndex);
    const float squaredRadius = square(mSweetSpotRadii[keyIndex]);
    return squaredDistance / squaredRadius;
}

int ProximityInfo::getKeyIndex(const int c) const {
    if (KEY_COUNT == 0 || !mInputXCoordinates || !mInputYCoordinates) {
        // We do not have the coordinate data
        return NOT_A_INDEX;
    }
    const float radius = mSweetSpotRadii[keyIndex];
    if (radius <= 0.0) {
        // When there are no calibration data for a key,
        // the radius of the key is assigned to zero.
        return NOT_A_DISTANCE;
    const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
    if (baseLowerC > MAX_CHAR_CODE) {
        return NOT_A_INDEX;
    }
    const float squaredRadius = square(radius);
    const float squaredDistance = calculateSquaredDistanceFromSweetSpotCenter(keyIndex, index);
    return squaredDistance / squaredRadius;
    return mCodeToKeyIndex[baseLowerC];
}

float ProximityInfo::calculateSquaredDistanceFromSweetSpotCenter(
        int keyIndex, int inputIndex) const {
        const int keyIndex, const int inputIndex) const {
    const float sweetSpotCenterX = mSweetSpotCenterXs[keyIndex];
    const float sweetSpotCenterY = mSweetSpotCenterYs[keyIndex];
    const float inputX = (float)mInputXCoordinates[inputIndex];
@@ -202,11 +220,13 @@ bool ProximityInfo::existsAdjacentProximityChars(const int index) const {
// then the word contains at that position a character close to what the user
// typed.
// What the user typed is actually the first character of the array.
// proximityIndex is a pointer to the variable where getMatchedProximityId returns
// the index of c in the proximity chars of the input index.
// Notice : accented characters do not have a proximity list, so they are alone
// in their list. The non-accented version of the character should be considered
// "close", but not the other keys close to the non-accented version.
ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
        const int index, const unsigned short c, const bool checkProximityChars) const {
ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(const int index,
        const unsigned short c, const bool checkProximityChars, int *proximityIndex) const {
    const int *currentChars = getProximityCharsAt(index);
    const int firstChar = currentChars[0];
    const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
@@ -227,9 +247,14 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(

    // Not an exact nor an accent-alike match: search the list of close keys
    int j = 1;
    while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS_SIZE) {
    while (j < MAX_PROXIMITY_CHARS_SIZE && currentChars[j] > 0) {
        const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c);
        if (matched) return NEAR_PROXIMITY_CHAR;
        if (matched) {
            if (proximityIndex) {
                *proximityIndex = j;
            }
            return NEAR_PROXIMITY_CHAR;
        }
        ++j;
    }

Loading