Merge "Start-of-sentence should include newlines and non-period terminators." (edcf5853) · Commits · e / os / android_packages_inputmethods_LatinIME

java/src/com/android/inputmethod/latin/utils/NgramContextUtils.java

+10 −4

Original line number	Diff line number	Diff line
		@@ -31,6 +31,7 @@ public final class NgramContextUtils {
		// Intentional empty constructor for utility class.
		}

		private static final Pattern NEWLINE_REGEX = Pattern.compile("[\\r\\n]+");
		private static final Pattern SPACE_REGEX = Pattern.compile("\\s+");
		// Get context information from nth word before the cursor. n = 1 retrieves the words
		// immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
		@@ -58,7 +59,11 @@ public final class NgramContextUtils {
		public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev,
		final SpacingAndPunctuations spacingAndPunctuations, final int n) {
		if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO;
		final String[] w = SPACE_REGEX.split(prev);
		final String[] lines = NEWLINE_REGEX.split(prev);
		if (lines.length == 0) {
		return new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
		}
		final String[] w = SPACE_REGEX.split(lines[lines.length - 1]);
		final WordInfo[] prevWordsInfo =
		new WordInfo[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
		Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO);
		@@ -81,16 +86,17 @@ public final class NgramContextUtils {
		prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
		break;
		}

		final String focusedWord = w[focusedWordIndex];
		// If the word is, the context is beginning-of-sentence.
		// If the word is empty, the context is beginning-of-sentence.
		final int length = focusedWord.length();
		if (length <= 0) {
		prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
		break;
		}
		// If ends in a sentence separator, the context is beginning-of-sentence.
		// If the word ends in a sentence terminator, the context is beginning-of-sentence.
		final char lastChar = focusedWord.charAt(length - 1);
		if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
		if (spacingAndPunctuations.isSentenceTerminator(lastChar)) {
		prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
		break;
		}

tests/src/com/android/inputmethod/latin/NgramContextTests.java

+22 −0

Original line number	Diff line number	Diff line
		@@ -17,6 +17,8 @@
		package com.android.inputmethod.latin;

		import com.android.inputmethod.latin.NgramContext.WordInfo;
		import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
		import com.android.inputmethod.latin.utils.NgramContextUtils;

		import android.test.AndroidTestCase;
		import android.test.suitebuilder.annotation.SmallTest;
		@@ -120,4 +122,24 @@ public class NgramContextTests extends AndroidTestCase {
		assertEquals(1, ngramContext_a_empty.extractPrevWordsContextArray().length);
		assertEquals("a", ngramContext_a_empty.extractPrevWordsContextArray()[0]);
		}

		public void testGetNgramContextFromNthPreviousWord() {
		SpacingAndPunctuations spacingAndPunctuations = new SpacingAndPunctuations(
		mContext.getResources());
		assertEquals("<S>", NgramContextUtils.getNgramContextFromNthPreviousWord("",
		spacingAndPunctuations, 1).extractPrevWordsContext());
		assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a. b ",
		spacingAndPunctuations, 1).extractPrevWordsContext());
		assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a? b ",
		spacingAndPunctuations, 1).extractPrevWordsContext());
		assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a! b ",
		spacingAndPunctuations, 1).extractPrevWordsContext());
		assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a\nb ",
		spacingAndPunctuations, 1).extractPrevWordsContext());
		assertEquals("<S> a b", NgramContextUtils.getNgramContextFromNthPreviousWord("a b ",
		spacingAndPunctuations, 1).extractPrevWordsContext());
		assertFalse(NgramContextUtils
		.getNgramContextFromNthPreviousWord("a b c d e", spacingAndPunctuations, 1)
		.extractPrevWordsContext().startsWith("<S>"));
		}
		}