Loading java/src/com/android/inputmethod/latin/StringUtils.java +81 −26 Original line number Diff line number Diff line Loading @@ -304,34 +304,89 @@ public final class StringUtils { } if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes; char c = cs.charAt(j - 1); if (c == Keyboard.CODE_PERIOD || c == Keyboard.CODE_QUESTION_MARK || c == Keyboard.CODE_EXCLAMATION_MARK) { // Here we found a marker for sentence end (we consider these to be one of // either . or ? or ! only). So this is probably the end of a sentence, but if we // found a period, we still want to check the case where this is a abbreviation // period rather than a full stop. To do this, we look for a period within a word // before the period we just found; if any, we take that to mean it was an // abbreviation. // A typical example of the above is "In the U.S. ", where the last period is // not a full stop and we should not capitalize. // TODO: the rule below is broken. In particular it fails for runs of periods, // whatever the reason. In the example "in the U.S..", the last period is a full // stop following the abbreviation period, and we should capitalize but we don't. // Likewise, "I don't know... " should capitalize, but fails to do so. if (c == Keyboard.CODE_PERIOD) { for (int k = j - 2; k >= 0; k--) { c = cs.charAt(k); if (c == Keyboard.CODE_PERIOD) { return TextUtils.CAP_MODE_CHARACTERS & reqModes; char c = cs.charAt(--j); // We found the next interesting chunk of text ; next we need to determine if it's the // end of a sentence. If we have a question mark or an exclamation mark, it's the end of // a sentence. If it's neither, the only remaining case is the period so we get the opposite // case out of the way. if (c == Keyboard.CODE_QUESTION_MARK || c == Keyboard.CODE_EXCLAMATION_MARK) { return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes; } if (c != Keyboard.CODE_PERIOD || j <= 0) { return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes; } // We found out that we have a period. We need to determine if this is a full stop or // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation // looks like (\w\.){2,} // To find out, we will have a simple state machine with the following states : // START, WORD, PERIOD, ABBREVIATION // On START : (just before the first period) // letter => WORD // whitespace => end with no caps (it was a stand-alone period) // otherwise => end with caps (several periods/symbols in a row) // On WORD : (within the word just before the first period) // letter => WORD // period => PERIOD // otherwise => end with caps (it was a word with a full stop at the end) // On PERIOD : (period within a potential abbreviation) // letter => LETTER // otherwise => end with caps (it was not an abbreviation) // On LETTER : (letter within a potential abbreviation) // letter => LETTER // period => PERIOD // otherwise => end with no caps (it was an abbreviation) // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This // should capitalize. final int START = 0; final int WORD = 1; final int PERIOD = 2; final int LETTER = 3; final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES) & reqModes; final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes; int state = START; while (j > 0) { c = cs.charAt(--j); switch (state) { case START: if (Character.isLetter(c)) { state = WORD; } else if (Character.isWhitespace(c)) { return noCaps; } else { return caps; } if (!Character.isLetter(c)) { break; case WORD: if (Character.isLetter(c)) { state = WORD; } else if (c == Keyboard.CODE_PERIOD) { state = PERIOD; } else { return caps; } break; case PERIOD: if (Character.isLetter(c)) { state = LETTER; } else { return caps; } break; case LETTER: if (Character.isLetter(c)) { state = LETTER; } else if (c == Keyboard.CODE_PERIOD) { state = PERIOD; } else { return noCaps; } return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes; } return TextUtils.CAP_MODE_CHARACTERS & reqModes; } // Here we arrived at the start of the line. This should behave exactly like whitespace. return (START == state || LETTER == state) ? noCaps : caps; } } tests/src/com/android/inputmethod/latin/StringUtilsTests.java +41 −0 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ package com.android.inputmethod.latin; import android.test.AndroidTestCase; import android.text.TextUtils; public class StringUtilsTests extends AndroidTestCase { public void testContainsInArray() { Loading Loading @@ -99,4 +100,44 @@ public class StringUtilsTests extends AndroidTestCase { assertFalse("lower-case string", StringUtils.hasUpperCase("string")); assertFalse("lower-case string with non-letters", StringUtils.hasUpperCase("he's")); } private void onePathForCaps(final CharSequence cs, final int expectedResult, final int mask) { int oneTimeResult = expectedResult & mask; assertEquals("After >" + cs + "<", oneTimeResult, StringUtils.getCapsMode(cs, mask)); } private void allPathsForCaps(final CharSequence cs, final int expectedResult) { final int c = TextUtils.CAP_MODE_CHARACTERS; final int w = TextUtils.CAP_MODE_WORDS; final int s = TextUtils.CAP_MODE_SENTENCES; onePathForCaps(cs, expectedResult, c | w | s); onePathForCaps(cs, expectedResult, w | s); onePathForCaps(cs, expectedResult, c | s); onePathForCaps(cs, expectedResult, c | w); onePathForCaps(cs, expectedResult, c); onePathForCaps(cs, expectedResult, w); onePathForCaps(cs, expectedResult, s); } public void testGetCapsMode() { final int c = TextUtils.CAP_MODE_CHARACTERS; final int w = TextUtils.CAP_MODE_WORDS; final int s = TextUtils.CAP_MODE_SENTENCES; allPathsForCaps("", c | w | s); allPathsForCaps("Word", c); allPathsForCaps("Word.", c); allPathsForCaps("Word ", c | w); allPathsForCaps("Word. ", c | w | s); allPathsForCaps("Word..", c); allPathsForCaps("Word.. ", c | w | s); allPathsForCaps("Word... ", c | w | s); allPathsForCaps("Word ... ", c | w | s); allPathsForCaps("Word . ", c | w); allPathsForCaps("In the U.S ", c | w); allPathsForCaps("In the U.S. ", c | w); allPathsForCaps("Some stuff (e.g. ", c | w); allPathsForCaps("In the U.S.. ", c | w | s); allPathsForCaps("\"Word.\" ", c | w | s); allPathsForCaps("\"Word\" ", c | w); } } Loading
java/src/com/android/inputmethod/latin/StringUtils.java +81 −26 Original line number Diff line number Diff line Loading @@ -304,34 +304,89 @@ public final class StringUtils { } if (j <= 0) return TextUtils.CAP_MODE_CHARACTERS & reqModes; char c = cs.charAt(j - 1); if (c == Keyboard.CODE_PERIOD || c == Keyboard.CODE_QUESTION_MARK || c == Keyboard.CODE_EXCLAMATION_MARK) { // Here we found a marker for sentence end (we consider these to be one of // either . or ? or ! only). So this is probably the end of a sentence, but if we // found a period, we still want to check the case where this is a abbreviation // period rather than a full stop. To do this, we look for a period within a word // before the period we just found; if any, we take that to mean it was an // abbreviation. // A typical example of the above is "In the U.S. ", where the last period is // not a full stop and we should not capitalize. // TODO: the rule below is broken. In particular it fails for runs of periods, // whatever the reason. In the example "in the U.S..", the last period is a full // stop following the abbreviation period, and we should capitalize but we don't. // Likewise, "I don't know... " should capitalize, but fails to do so. if (c == Keyboard.CODE_PERIOD) { for (int k = j - 2; k >= 0; k--) { c = cs.charAt(k); if (c == Keyboard.CODE_PERIOD) { return TextUtils.CAP_MODE_CHARACTERS & reqModes; char c = cs.charAt(--j); // We found the next interesting chunk of text ; next we need to determine if it's the // end of a sentence. If we have a question mark or an exclamation mark, it's the end of // a sentence. If it's neither, the only remaining case is the period so we get the opposite // case out of the way. if (c == Keyboard.CODE_QUESTION_MARK || c == Keyboard.CODE_EXCLAMATION_MARK) { return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes; } if (c != Keyboard.CODE_PERIOD || j <= 0) { return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes; } // We found out that we have a period. We need to determine if this is a full stop or // otherwise sentence-ending period, or an abbreviation like "e.g.". An abbreviation // looks like (\w\.){2,} // To find out, we will have a simple state machine with the following states : // START, WORD, PERIOD, ABBREVIATION // On START : (just before the first period) // letter => WORD // whitespace => end with no caps (it was a stand-alone period) // otherwise => end with caps (several periods/symbols in a row) // On WORD : (within the word just before the first period) // letter => WORD // period => PERIOD // otherwise => end with caps (it was a word with a full stop at the end) // On PERIOD : (period within a potential abbreviation) // letter => LETTER // otherwise => end with caps (it was not an abbreviation) // On LETTER : (letter within a potential abbreviation) // letter => LETTER // period => PERIOD // otherwise => end with no caps (it was an abbreviation) // "Not an abbreviation" in the above chart essentially covers cases like "...yes.". This // should capitalize. final int START = 0; final int WORD = 1; final int PERIOD = 2; final int LETTER = 3; final int caps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES) & reqModes; final int noCaps = (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes; int state = START; while (j > 0) { c = cs.charAt(--j); switch (state) { case START: if (Character.isLetter(c)) { state = WORD; } else if (Character.isWhitespace(c)) { return noCaps; } else { return caps; } if (!Character.isLetter(c)) { break; case WORD: if (Character.isLetter(c)) { state = WORD; } else if (c == Keyboard.CODE_PERIOD) { state = PERIOD; } else { return caps; } break; case PERIOD: if (Character.isLetter(c)) { state = LETTER; } else { return caps; } break; case LETTER: if (Character.isLetter(c)) { state = LETTER; } else if (c == Keyboard.CODE_PERIOD) { state = PERIOD; } else { return noCaps; } return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_SENTENCES) & reqModes; } return TextUtils.CAP_MODE_CHARACTERS & reqModes; } // Here we arrived at the start of the line. This should behave exactly like whitespace. return (START == state || LETTER == state) ? noCaps : caps; } }
tests/src/com/android/inputmethod/latin/StringUtilsTests.java +41 −0 Original line number Diff line number Diff line Loading @@ -17,6 +17,7 @@ package com.android.inputmethod.latin; import android.test.AndroidTestCase; import android.text.TextUtils; public class StringUtilsTests extends AndroidTestCase { public void testContainsInArray() { Loading Loading @@ -99,4 +100,44 @@ public class StringUtilsTests extends AndroidTestCase { assertFalse("lower-case string", StringUtils.hasUpperCase("string")); assertFalse("lower-case string with non-letters", StringUtils.hasUpperCase("he's")); } private void onePathForCaps(final CharSequence cs, final int expectedResult, final int mask) { int oneTimeResult = expectedResult & mask; assertEquals("After >" + cs + "<", oneTimeResult, StringUtils.getCapsMode(cs, mask)); } private void allPathsForCaps(final CharSequence cs, final int expectedResult) { final int c = TextUtils.CAP_MODE_CHARACTERS; final int w = TextUtils.CAP_MODE_WORDS; final int s = TextUtils.CAP_MODE_SENTENCES; onePathForCaps(cs, expectedResult, c | w | s); onePathForCaps(cs, expectedResult, w | s); onePathForCaps(cs, expectedResult, c | s); onePathForCaps(cs, expectedResult, c | w); onePathForCaps(cs, expectedResult, c); onePathForCaps(cs, expectedResult, w); onePathForCaps(cs, expectedResult, s); } public void testGetCapsMode() { final int c = TextUtils.CAP_MODE_CHARACTERS; final int w = TextUtils.CAP_MODE_WORDS; final int s = TextUtils.CAP_MODE_SENTENCES; allPathsForCaps("", c | w | s); allPathsForCaps("Word", c); allPathsForCaps("Word.", c); allPathsForCaps("Word ", c | w); allPathsForCaps("Word. ", c | w | s); allPathsForCaps("Word..", c); allPathsForCaps("Word.. ", c | w | s); allPathsForCaps("Word... ", c | w | s); allPathsForCaps("Word ... ", c | w | s); allPathsForCaps("Word . ", c | w); allPathsForCaps("In the U.S ", c | w); allPathsForCaps("In the U.S. ", c | w); allPathsForCaps("Some stuff (e.g. ", c | w); allPathsForCaps("In the U.S.. ", c | w | s); allPathsForCaps("\"Word.\" ", c | w | s); allPathsForCaps("\"Word\" ", c | w); } }