Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5aeb0921 authored by Tom Ouyang's avatar Tom Ouyang
Browse files

Start-of-sentence should include newlines and non-period terminators.

Bug: 19795382
Change-Id: Id6cc4a494a06de03d351aa6257632bd3b82e2ec4
parent 320cef5f
Loading
Loading
Loading
Loading
+10 −4
Original line number Original line Diff line number Diff line
@@ -31,6 +31,7 @@ public final class NgramContextUtils {
        // Intentional empty constructor for utility class.
        // Intentional empty constructor for utility class.
    }
    }


    private static final Pattern NEWLINE_REGEX = Pattern.compile("[\\r\\n]+");
    private static final Pattern SPACE_REGEX = Pattern.compile("\\s+");
    private static final Pattern SPACE_REGEX = Pattern.compile("\\s+");
    // Get context information from nth word before the cursor. n = 1 retrieves the words
    // Get context information from nth word before the cursor. n = 1 retrieves the words
    // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
    // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
@@ -58,7 +59,11 @@ public final class NgramContextUtils {
    public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev,
    public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev,
            final SpacingAndPunctuations spacingAndPunctuations, final int n) {
            final SpacingAndPunctuations spacingAndPunctuations, final int n) {
        if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO;
        if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO;
        final String[] w = SPACE_REGEX.split(prev);
        final String[] lines = NEWLINE_REGEX.split(prev);
        if (lines.length == 0) {
            return new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
        }
        final String[] w = SPACE_REGEX.split(lines[lines.length - 1]);
        final WordInfo[] prevWordsInfo =
        final WordInfo[] prevWordsInfo =
                new WordInfo[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
                new WordInfo[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
        Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO);
        Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO);
@@ -81,16 +86,17 @@ public final class NgramContextUtils {
                prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
                prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
                break;
                break;
            }
            }

            final String focusedWord = w[focusedWordIndex];
            final String focusedWord = w[focusedWordIndex];
            // If the word is, the context is beginning-of-sentence.
            // If the word is empty, the context is beginning-of-sentence.
            final int length = focusedWord.length();
            final int length = focusedWord.length();
            if (length <= 0) {
            if (length <= 0) {
                prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
                prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
                break;
                break;
            }
            }
            // If ends in a sentence separator, the context is beginning-of-sentence.
            // If the word ends in a sentence terminator, the context is beginning-of-sentence.
            final char lastChar = focusedWord.charAt(length - 1);
            final char lastChar = focusedWord.charAt(length - 1);
            if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
            if (spacingAndPunctuations.isSentenceTerminator(lastChar)) {
                prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
                prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
                break;
                break;
            }
            }
+22 −0
Original line number Original line Diff line number Diff line
@@ -17,6 +17,8 @@
package com.android.inputmethod.latin;
package com.android.inputmethod.latin;


import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.NgramContextUtils;


import android.test.AndroidTestCase;
import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.SmallTest;
import android.test.suitebuilder.annotation.SmallTest;
@@ -120,4 +122,24 @@ public class NgramContextTests extends AndroidTestCase {
        assertEquals(1, ngramContext_a_empty.extractPrevWordsContextArray().length);
        assertEquals(1, ngramContext_a_empty.extractPrevWordsContextArray().length);
        assertEquals("a", ngramContext_a_empty.extractPrevWordsContextArray()[0]);
        assertEquals("a", ngramContext_a_empty.extractPrevWordsContextArray()[0]);
    }
    }

    public void testGetNgramContextFromNthPreviousWord() {
        SpacingAndPunctuations spacingAndPunctuations = new SpacingAndPunctuations(
                mContext.getResources());
        assertEquals("<S>", NgramContextUtils.getNgramContextFromNthPreviousWord("",
                spacingAndPunctuations, 1).extractPrevWordsContext());
        assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a. b ",
                spacingAndPunctuations, 1).extractPrevWordsContext());
        assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a? b ",
                spacingAndPunctuations, 1).extractPrevWordsContext());
        assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a! b ",
                spacingAndPunctuations, 1).extractPrevWordsContext());
        assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a\nb ",
                spacingAndPunctuations, 1).extractPrevWordsContext());
        assertEquals("<S> a b", NgramContextUtils.getNgramContextFromNthPreviousWord("a b ",
                spacingAndPunctuations, 1).extractPrevWordsContext());
        assertFalse(NgramContextUtils
                .getNgramContextFromNthPreviousWord("a b c d e", spacingAndPunctuations, 1)
                .extractPrevWordsContext().startsWith("<S>"));
    }
}
}