Loading java/src/com/android/inputmethod/research/FixedLogBuffer.java +28 −23 Original line number Diff line number Diff line Loading @@ -57,28 +57,29 @@ public class FixedLogBuffer extends LogBuffer { */ @Override public void shiftIn(final LogUnit newLogUnit) { if (!newLogUnit.hasWord()) { // This LogUnit isn't a word, so it doesn't count toward the word-limit. if (!newLogUnit.hasOneOrMoreWords()) { // This LogUnit doesn't contain any word, so it doesn't count toward the word-limit. super.shiftIn(newLogUnit); return; } final int numWordsIncoming = newLogUnit.getNumWords(); if (mNumActualWords >= mWordCapacity) { // Give subclass a chance to handle the buffer full condition by shifting out logUnits. onBufferFull(); // If still full, evict. if (mNumActualWords >= mWordCapacity) { shiftOutWords(1); shiftOutWords(numWordsIncoming); } } super.shiftIn(newLogUnit); mNumActualWords++; // Must be a word, or we wouldn't be here. mNumActualWords += numWordsIncoming; } @Override public LogUnit unshiftIn() { final LogUnit logUnit = super.unshiftIn(); if (logUnit != null && logUnit.hasWord()) { mNumActualWords--; if (logUnit != null && logUnit.hasOneOrMoreWords()) { mNumActualWords -= logUnit.getNumWords(); } return logUnit; } Loading Loading @@ -109,8 +110,8 @@ public class FixedLogBuffer extends LogBuffer { @Override public LogUnit shiftOut() { final LogUnit logUnit = super.shiftOut(); if (logUnit != null && logUnit.hasWord()) { mNumActualWords--; if (logUnit != null && logUnit.hasOneOrMoreWords()) { mNumActualWords -= logUnit.getNumWords(); } return logUnit; } Loading @@ -121,15 +122,15 @@ public class FixedLogBuffer extends LogBuffer { * If there are less than {@code numWords} word-containing {@link LogUnit}s, shifts out * all {@code LogUnit}s in the buffer. * * @param numWords the number of word-containing {@link LogUnit}s to shift out * @param numWords the minimum number of word-containing {@link LogUnit}s to shift out * @return the number of actual {@code LogUnit}s shifted out */ protected int shiftOutWords(final int numWords) { int numWordContainingLogUnitsShiftedOut = 0; for (LogUnit logUnit = shiftOut(); logUnit != null && numWordContainingLogUnitsShiftedOut < numWords; logUnit = shiftOut()) { if (logUnit.hasWord()) { numWordContainingLogUnitsShiftedOut++; if (logUnit.hasOneOrMoreWords()) { numWordContainingLogUnitsShiftedOut += logUnit.getNumWords(); } } return numWordContainingLogUnitsShiftedOut; Loading @@ -144,27 +145,31 @@ public class FixedLogBuffer extends LogBuffer { } /** * Returns a list of {@link LogUnit}s at the front of the buffer that have associated words. No * more than {@code n} LogUnits will have words associated with them. If there are not enough * LogUnits in the buffer to meet the word requirement, returns the all LogUnits. * Returns a list of {@link LogUnit}s at the front of the buffer that have words associated with * them. * * There will be no more than {@code n} words in the returned list. So if 2 words are * requested, and the first LogUnit has 3 words, it is not returned. If 2 words are requested, * and the first LogUnit has only 1 word, and the next LogUnit 2 words, only the first LogUnit * is returned. If the first LogUnit has no words associated with it, and the second LogUnit * has three words, then only the first LogUnit (which has no associated words) is returned. If * there are not enough LogUnits in the buffer to meet the word requirement, then all LogUnits * will be returned. * * @param n The maximum number of {@link LogUnit}s with words to return. * @return The list of the {@link LogUnit}s containing the first n words */ public ArrayList<LogUnit> peekAtFirstNWords(int n) { final LinkedList<LogUnit> logUnits = getLogUnits(); final int length = logUnits.size(); // Allocate space for n*2 logUnits. There will be at least n, one for each word, and // there may be additional for punctuation, between-word commands, etc. This should be // enough that reallocation won't be necessary. final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2); for (int i = 0; i < length && n > 0; i++) { final LogUnit logUnit = logUnits.get(i); list.add(logUnit); if (logUnit.hasWord()) { n--; } final ArrayList<LogUnit> resultList = new ArrayList<LogUnit>(n * 2); for (final LogUnit logUnit : logUnits) { n -= logUnit.getNumWords(); if (n < 0) break; resultList.add(logUnit); } return list; return resultList; } } java/src/com/android/inputmethod/research/LogUnit.java +75 −24 Original line number Diff line number Diff line Loading @@ -25,10 +25,10 @@ import com.android.inputmethod.latin.SuggestedWords; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.define.ProductionFlag; import java.io.IOException; import java.io.StringWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; /** * A group of log statements related to each other. Loading @@ -49,27 +49,45 @@ public class LogUnit { private static final boolean DEBUG = false && ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG; private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); private static final String[] EMPTY_STRING_ARRAY = new String[0]; private final ArrayList<LogStatement> mLogStatementList; private final ArrayList<Object[]> mValuesList; // Assume that mTimeList is sorted in increasing order. Do not insert null values into // mTimeList. private final ArrayList<Long> mTimeList; // Word that this LogUnit generates. Should be null if the LogUnit does not generate a genuine // word (i.e. separators alone do not count as a word). Should never be empty. private String mWord; // Words that this LogUnit generates. Should be null if the data in the LogUnit does not // generate a genuine word (i.e. separators alone do not count as a word). Should never be // empty. Note that if the user types spaces explicitly, then normally mWords should contain // only a single word; it will only contain space-separate multiple words if the user does not // enter a space, and the system enters one automatically. private String mWords; private String[] mWordArray = EMPTY_STRING_ARRAY; private boolean mMayContainDigit; private boolean mIsPartOfMegaword; private boolean mContainsCorrection; // mCorrectionType indicates whether the word was corrected at all, and if so, whether it was // to a different word or just a "typo" correction. It is considered a "typo" if the final // word was listed in the suggestions available the first time the word was gestured or // tapped. // mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the // correction. private int mCorrectionType; // LogUnits start in this state. If a word is entered without being corrected, it will have // this CorrectiontType. public static final int CORRECTIONTYPE_NO_CORRECTION = 0; // The LogUnit was corrected manually by the user in an unspecified way. public static final int CORRECTIONTYPE_CORRECTION = 1; // The LogUnit was corrected manually by the user to a word not in the list of suggestions of // the first word typed here. (Note: this is a heuristic value, it may be incorrect, for // example, if the user repositions the cursor). public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2; // The LogUnit was corrected manually by the user to a word that was in the list of suggestions // of the first word typed here. (Again, a heuristic). It is probably a typo correction. public static final int CORRECTIONTYPE_TYPO = 3; // TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's // state and statistics. This should include how many times it has been corrected, whether // other LogUnit edits were done between edits to this LogUnit, etc. Also track when a LogUnit // previously contained a word, but was corrected to empty (because it was deleted, and there is // no known replacement). private SuggestedWords mSuggestedWords; Loading Loading @@ -166,7 +184,7 @@ public class LogUnit { final LogStatement logStatement; if (canIncludePrivateData) { LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter, SystemClock.uptimeMillis(), getWord(), getCorrectionType()); SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType()); } else { LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter, SystemClock.uptimeMillis()); Loading @@ -181,22 +199,22 @@ public class LogUnit { } /** * Mark the current logUnit as containing data to generate {@code word}. * Mark the current logUnit as containing data to generate {@code newWords}. * * If {@code setWord()} was previously called for this LogUnit, then the method will try to * determine what kind of correction it is, and update its internal state of the correctionType * accordingly. * * @param word The word this LogUnit generates. Caller should not pass null or the empty * @param newWords The words this LogUnit generates. Caller should not pass null or the empty * string. */ public void setWord(final String word) { if (hasWord()) { public void setWords(final String newWords) { if (hasOneOrMoreWords()) { // The word was already set once, and it is now being changed. See if the new word // is close to the old word. If so, then the change is probably a typo correction. // If not, the user may have decided to enter a different word, so flag it. if (mSuggestedWords != null) { if (isInSuggestedWords(word, mSuggestedWords)) { if (isInSuggestedWords(newWords, mSuggestedWords)) { mCorrectionType = CORRECTIONTYPE_TYPO; } else { mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD; Loading @@ -206,38 +224,71 @@ public class LogUnit { // Mark it as a generic correction. mCorrectionType = CORRECTIONTYPE_CORRECTION; } } else { mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; } mWord = word; mWords = newWords; // Update mWordArray mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY : WHITESPACE_PATTERN.split(mWords); if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) { // Empty string at beginning of array. Must have been whitespace at the start of the // word. Remove the empty string. mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length); } } public String getWordsAsString() { return mWords; } /** * Retuns the words generated by the data in this LogUnit. * * The first word may be an empty string, if the data in the LogUnit started by generating * whitespace. * * @return the array of words. an empty list of there are no words associated with this LogUnit. */ public String[] getWordsAsStringArray() { return mWordArray; } public String getWord() { return mWord; public boolean hasOneOrMoreWords() { return mWordArray.length >= 1; } public boolean hasWord() { return mWord != null && !TextUtils.isEmpty(mWord.trim()); public int getNumWords() { return mWordArray.length; } // TODO: Refactor to eliminate getter/setters public void setMayContainDigit() { mMayContainDigit = true; } // TODO: Refactor to eliminate getter/setters public boolean mayContainDigit() { return mMayContainDigit; } // TODO: Refactor to eliminate getter/setters public void setContainsCorrection() { mContainsCorrection = true; } // TODO: Refactor to eliminate getter/setters public boolean containsCorrection() { return mContainsCorrection; } // TODO: Refactor to eliminate getter/setters public void setCorrectionType(final int correctionType) { mCorrectionType = correctionType; } // TODO: Refactor to eliminate getter/setters public int getCorrectionType() { return mCorrectionType; } Loading Loading @@ -267,7 +318,7 @@ public class LogUnit { new ArrayList<Object[]>(laterValues), new ArrayList<Long>(laterTimes), true /* isPartOfMegaword */); newLogUnit.mWord = null; newLogUnit.mWords = null; newLogUnit.mMayContainDigit = mMayContainDigit; newLogUnit.mContainsCorrection = mContainsCorrection; Loading @@ -287,9 +338,9 @@ public class LogUnit { mLogStatementList.addAll(logUnit.mLogStatementList); mValuesList.addAll(logUnit.mValuesList); mTimeList.addAll(logUnit.mTimeList); mWord = null; if (logUnit.mWord != null) { setWord(logUnit.mWord); mWords = null; if (logUnit.mWords != null) { setWords(logUnit.mWords); } mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit; mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection; Loading java/src/com/android/inputmethod/research/MainLogBuffer.java +23 −21 Original line number Diff line number Diff line Loading @@ -126,10 +126,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer { final int length = logUnits.size(); for (int i = 0; i < length; i++) { final LogUnit logUnit = logUnits.get(i); final String word = logUnit.getWord(); if (word != null) { numWordsInLogUnitList++; } numWordsInLogUnitList += logUnit.getNumWords(); } return numWordsInLogUnitList >= minNGramSize; } Loading @@ -153,29 +150,31 @@ public abstract class MainLogBuffer extends FixedLogBuffer { // the complete buffer contents in detail. int numWordsInLogUnitList = 0; final int length = logUnits.size(); for (int i = 0; i < length; i++) { final LogUnit logUnit = logUnits.get(i); if (!logUnit.hasWord()) { for (final LogUnit logUnit : logUnits) { if (!logUnit.hasOneOrMoreWords()) { // Digits outside words are a privacy threat. if (logUnit.mayContainDigit()) { return false; } } else { numWordsInLogUnitList++; final String word = logUnit.getWord(); numWordsInLogUnitList += logUnit.getNumWords(); final String[] words = logUnit.getWordsAsStringArray(); for (final String word : words) { // Words not in the dictionary are a privacy threat. if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) { if (DEBUG) { Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word) Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word) + ", isValid: " + (dictionary.isValidWord(word))); } return false; } } } } // Finally, only return true if the minNGramSize is met. return numWordsInLogUnitList >= minNGramSize; // Finally, only return true if the ngram is the right size. return numWordsInLogUnitList == minNGramSize; } public void shiftAndPublishAll() { Loading @@ -198,11 +197,14 @@ public abstract class MainLogBuffer extends FixedLogBuffer { shiftOutWords(N_GRAM_SIZE); mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams; } else { // No good n-gram at front, and buffer is full. Shift out the first word (or if there // is none, the existing logUnits). logUnits = peekAtFirstNWords(1); // No good n-gram at front, and buffer is full. Shift out up through the first logUnit // with associated words (or if there is none, all the existing logUnits). logUnits.clear(); for (LogUnit logUnit = shiftOut(); logUnit != null && !logUnit.hasOneOrMoreWords(); logUnit = shiftOut()) { logUnits.add(logUnit); } publish(logUnits, false /* canIncludePrivateData */); shiftOutWords(1); } } Loading java/src/com/android/inputmethod/research/ResearchLogger.java +21 −18 Original line number Diff line number Diff line Loading @@ -397,13 +397,14 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang protected void publish(final ArrayList<LogUnit> logUnits, boolean canIncludePrivateData) { canIncludePrivateData |= IS_LOGGING_EVERYTHING; final int length = logUnits.size(); for (int i = 0; i < length; i++) { final LogUnit logUnit = logUnits.get(i); final String word = logUnit.getWord(); if (word != null && word.length() > 0 && hasLetters(word)) { Log.d(TAG, "onPublish: " + word + ", hc: " + logUnit.containsCorrection()); for (final LogUnit logUnit : logUnits) { if (DEBUG) { final String wordsString = logUnit.getWordsAsString(); Log.d(TAG, "onPublish: '" + wordsString + "', hc: " + logUnit.containsCorrection() + ", cipd: " + canIncludePrivateData); } for (final String word : logUnit.getWordsAsStringArray()) { final Dictionary dictionary = getDictionary(); mStatistics.recordWordEntered( dictionary != null && dictionary.isValidWord(word), Loading Loading @@ -852,8 +853,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang /* package for test */ void commitCurrentLogUnit() { if (DEBUG) { Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ? ": " + mCurrentLogUnit.getWord() : "")); Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasOneOrMoreWords() ? ": " + mCurrentLogUnit.getWordsAsString() : "")); } if (!mCurrentLogUnit.isEmpty()) { if (mMainLogBuffer != null) { Loading Loading @@ -893,8 +894,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang // Check that expected word matches. if (oldLogUnit != null) { final String oldLogUnitWord = oldLogUnit.getWord(); if (oldLogUnitWord != null && !oldLogUnitWord.equals(expectedWord)) { final String oldLogUnitWords = oldLogUnit.getWordsAsString(); if (oldLogUnitWords != null && !oldLogUnitWords.equals(expectedWord)) { return; } } Loading @@ -916,7 +917,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang enqueueEvent(LOGSTATEMENT_UNCOMMIT_CURRENT_LOGUNIT); if (DEBUG) { Log.d(TAG, "uncommitCurrentLogUnit (dump=" + dumpCurrentLogUnit + ") back to " + (mCurrentLogUnit.hasWord() ? ": '" + mCurrentLogUnit.getWord() + "'" : "")); + (mCurrentLogUnit.hasOneOrMoreWords() ? ": '" + mCurrentLogUnit.getWordsAsString() + "'" : "")); } } Loading Loading @@ -950,8 +952,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang } for (LogUnit logUnit : logUnits) { if (DEBUG) { Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord() : "<wordless>") + ", correction?: " + logUnit.containsCorrection()); Log.d(TAG, "publishLogBuffer: " + (logUnit.hasOneOrMoreWords() ? logUnit.getWordsAsString() : "<wordless>") + ", correction?: " + logUnit.containsCorrection()); } researchLog.publish(logUnit, canIncludePrivateData); } Loading Loading @@ -986,7 +989,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang return; } if (word.length() > 0 && hasLetters(word)) { mCurrentLogUnit.setWord(word); mCurrentLogUnit.setWords(word); } final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime); enqueueCommitText(word, isBatchMode); Loading Loading @@ -1478,7 +1481,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang } if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) { if (logUnit != null) { logUnit.setWord(originallyTypedWord); logUnit.setWords(originallyTypedWord); } } researchLogger.enqueueEvent(logUnit != null ? logUnit : researchLogger.mCurrentLogUnit, Loading Loading @@ -1616,7 +1619,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang * Log a call to LatinIME.commitCurrentAutoCorrection(). * * SystemResponse: The IME has committed an auto-correction. An auto-correction changes the raw * text input to another word that the user more likely desired to type. * text input to another word (or words) that the user more likely desired to type. */ private static final LogStatement LOGSTATEMENT_LATINIME_COMMITCURRENTAUTOCORRECTION = new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord", Loading Loading @@ -1827,7 +1830,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang final int enteredWordPos, final SuggestedWords suggestedWords) { final ResearchLogger researchLogger = getInstance(); if (!TextUtils.isEmpty(enteredText) && hasLetters(enteredText.toString())) { researchLogger.mCurrentLogUnit.setWord(enteredText.toString()); researchLogger.mCurrentLogUnit.setWords(enteredText.toString()); } researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText, enteredWordPos); Loading Loading
java/src/com/android/inputmethod/research/FixedLogBuffer.java +28 −23 Original line number Diff line number Diff line Loading @@ -57,28 +57,29 @@ public class FixedLogBuffer extends LogBuffer { */ @Override public void shiftIn(final LogUnit newLogUnit) { if (!newLogUnit.hasWord()) { // This LogUnit isn't a word, so it doesn't count toward the word-limit. if (!newLogUnit.hasOneOrMoreWords()) { // This LogUnit doesn't contain any word, so it doesn't count toward the word-limit. super.shiftIn(newLogUnit); return; } final int numWordsIncoming = newLogUnit.getNumWords(); if (mNumActualWords >= mWordCapacity) { // Give subclass a chance to handle the buffer full condition by shifting out logUnits. onBufferFull(); // If still full, evict. if (mNumActualWords >= mWordCapacity) { shiftOutWords(1); shiftOutWords(numWordsIncoming); } } super.shiftIn(newLogUnit); mNumActualWords++; // Must be a word, or we wouldn't be here. mNumActualWords += numWordsIncoming; } @Override public LogUnit unshiftIn() { final LogUnit logUnit = super.unshiftIn(); if (logUnit != null && logUnit.hasWord()) { mNumActualWords--; if (logUnit != null && logUnit.hasOneOrMoreWords()) { mNumActualWords -= logUnit.getNumWords(); } return logUnit; } Loading Loading @@ -109,8 +110,8 @@ public class FixedLogBuffer extends LogBuffer { @Override public LogUnit shiftOut() { final LogUnit logUnit = super.shiftOut(); if (logUnit != null && logUnit.hasWord()) { mNumActualWords--; if (logUnit != null && logUnit.hasOneOrMoreWords()) { mNumActualWords -= logUnit.getNumWords(); } return logUnit; } Loading @@ -121,15 +122,15 @@ public class FixedLogBuffer extends LogBuffer { * If there are less than {@code numWords} word-containing {@link LogUnit}s, shifts out * all {@code LogUnit}s in the buffer. * * @param numWords the number of word-containing {@link LogUnit}s to shift out * @param numWords the minimum number of word-containing {@link LogUnit}s to shift out * @return the number of actual {@code LogUnit}s shifted out */ protected int shiftOutWords(final int numWords) { int numWordContainingLogUnitsShiftedOut = 0; for (LogUnit logUnit = shiftOut(); logUnit != null && numWordContainingLogUnitsShiftedOut < numWords; logUnit = shiftOut()) { if (logUnit.hasWord()) { numWordContainingLogUnitsShiftedOut++; if (logUnit.hasOneOrMoreWords()) { numWordContainingLogUnitsShiftedOut += logUnit.getNumWords(); } } return numWordContainingLogUnitsShiftedOut; Loading @@ -144,27 +145,31 @@ public class FixedLogBuffer extends LogBuffer { } /** * Returns a list of {@link LogUnit}s at the front of the buffer that have associated words. No * more than {@code n} LogUnits will have words associated with them. If there are not enough * LogUnits in the buffer to meet the word requirement, returns the all LogUnits. * Returns a list of {@link LogUnit}s at the front of the buffer that have words associated with * them. * * There will be no more than {@code n} words in the returned list. So if 2 words are * requested, and the first LogUnit has 3 words, it is not returned. If 2 words are requested, * and the first LogUnit has only 1 word, and the next LogUnit 2 words, only the first LogUnit * is returned. If the first LogUnit has no words associated with it, and the second LogUnit * has three words, then only the first LogUnit (which has no associated words) is returned. If * there are not enough LogUnits in the buffer to meet the word requirement, then all LogUnits * will be returned. * * @param n The maximum number of {@link LogUnit}s with words to return. * @return The list of the {@link LogUnit}s containing the first n words */ public ArrayList<LogUnit> peekAtFirstNWords(int n) { final LinkedList<LogUnit> logUnits = getLogUnits(); final int length = logUnits.size(); // Allocate space for n*2 logUnits. There will be at least n, one for each word, and // there may be additional for punctuation, between-word commands, etc. This should be // enough that reallocation won't be necessary. final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2); for (int i = 0; i < length && n > 0; i++) { final LogUnit logUnit = logUnits.get(i); list.add(logUnit); if (logUnit.hasWord()) { n--; } final ArrayList<LogUnit> resultList = new ArrayList<LogUnit>(n * 2); for (final LogUnit logUnit : logUnits) { n -= logUnit.getNumWords(); if (n < 0) break; resultList.add(logUnit); } return list; return resultList; } }
java/src/com/android/inputmethod/research/LogUnit.java +75 −24 Original line number Diff line number Diff line Loading @@ -25,10 +25,10 @@ import com.android.inputmethod.latin.SuggestedWords; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.define.ProductionFlag; import java.io.IOException; import java.io.StringWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; /** * A group of log statements related to each other. Loading @@ -49,27 +49,45 @@ public class LogUnit { private static final boolean DEBUG = false && ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG; private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); private static final String[] EMPTY_STRING_ARRAY = new String[0]; private final ArrayList<LogStatement> mLogStatementList; private final ArrayList<Object[]> mValuesList; // Assume that mTimeList is sorted in increasing order. Do not insert null values into // mTimeList. private final ArrayList<Long> mTimeList; // Word that this LogUnit generates. Should be null if the LogUnit does not generate a genuine // word (i.e. separators alone do not count as a word). Should never be empty. private String mWord; // Words that this LogUnit generates. Should be null if the data in the LogUnit does not // generate a genuine word (i.e. separators alone do not count as a word). Should never be // empty. Note that if the user types spaces explicitly, then normally mWords should contain // only a single word; it will only contain space-separate multiple words if the user does not // enter a space, and the system enters one automatically. private String mWords; private String[] mWordArray = EMPTY_STRING_ARRAY; private boolean mMayContainDigit; private boolean mIsPartOfMegaword; private boolean mContainsCorrection; // mCorrectionType indicates whether the word was corrected at all, and if so, whether it was // to a different word or just a "typo" correction. It is considered a "typo" if the final // word was listed in the suggestions available the first time the word was gestured or // tapped. // mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the // correction. private int mCorrectionType; // LogUnits start in this state. If a word is entered without being corrected, it will have // this CorrectiontType. public static final int CORRECTIONTYPE_NO_CORRECTION = 0; // The LogUnit was corrected manually by the user in an unspecified way. public static final int CORRECTIONTYPE_CORRECTION = 1; // The LogUnit was corrected manually by the user to a word not in the list of suggestions of // the first word typed here. (Note: this is a heuristic value, it may be incorrect, for // example, if the user repositions the cursor). public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2; // The LogUnit was corrected manually by the user to a word that was in the list of suggestions // of the first word typed here. (Again, a heuristic). It is probably a typo correction. public static final int CORRECTIONTYPE_TYPO = 3; // TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's // state and statistics. This should include how many times it has been corrected, whether // other LogUnit edits were done between edits to this LogUnit, etc. Also track when a LogUnit // previously contained a word, but was corrected to empty (because it was deleted, and there is // no known replacement). private SuggestedWords mSuggestedWords; Loading Loading @@ -166,7 +184,7 @@ public class LogUnit { final LogStatement logStatement; if (canIncludePrivateData) { LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter, SystemClock.uptimeMillis(), getWord(), getCorrectionType()); SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType()); } else { LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter, SystemClock.uptimeMillis()); Loading @@ -181,22 +199,22 @@ public class LogUnit { } /** * Mark the current logUnit as containing data to generate {@code word}. * Mark the current logUnit as containing data to generate {@code newWords}. * * If {@code setWord()} was previously called for this LogUnit, then the method will try to * determine what kind of correction it is, and update its internal state of the correctionType * accordingly. * * @param word The word this LogUnit generates. Caller should not pass null or the empty * @param newWords The words this LogUnit generates. Caller should not pass null or the empty * string. */ public void setWord(final String word) { if (hasWord()) { public void setWords(final String newWords) { if (hasOneOrMoreWords()) { // The word was already set once, and it is now being changed. See if the new word // is close to the old word. If so, then the change is probably a typo correction. // If not, the user may have decided to enter a different word, so flag it. if (mSuggestedWords != null) { if (isInSuggestedWords(word, mSuggestedWords)) { if (isInSuggestedWords(newWords, mSuggestedWords)) { mCorrectionType = CORRECTIONTYPE_TYPO; } else { mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD; Loading @@ -206,38 +224,71 @@ public class LogUnit { // Mark it as a generic correction. mCorrectionType = CORRECTIONTYPE_CORRECTION; } } else { mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; } mWord = word; mWords = newWords; // Update mWordArray mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY : WHITESPACE_PATTERN.split(mWords); if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) { // Empty string at beginning of array. Must have been whitespace at the start of the // word. Remove the empty string. mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length); } } public String getWordsAsString() { return mWords; } /** * Retuns the words generated by the data in this LogUnit. * * The first word may be an empty string, if the data in the LogUnit started by generating * whitespace. * * @return the array of words. an empty list of there are no words associated with this LogUnit. */ public String[] getWordsAsStringArray() { return mWordArray; } public String getWord() { return mWord; public boolean hasOneOrMoreWords() { return mWordArray.length >= 1; } public boolean hasWord() { return mWord != null && !TextUtils.isEmpty(mWord.trim()); public int getNumWords() { return mWordArray.length; } // TODO: Refactor to eliminate getter/setters public void setMayContainDigit() { mMayContainDigit = true; } // TODO: Refactor to eliminate getter/setters public boolean mayContainDigit() { return mMayContainDigit; } // TODO: Refactor to eliminate getter/setters public void setContainsCorrection() { mContainsCorrection = true; } // TODO: Refactor to eliminate getter/setters public boolean containsCorrection() { return mContainsCorrection; } // TODO: Refactor to eliminate getter/setters public void setCorrectionType(final int correctionType) { mCorrectionType = correctionType; } // TODO: Refactor to eliminate getter/setters public int getCorrectionType() { return mCorrectionType; } Loading Loading @@ -267,7 +318,7 @@ public class LogUnit { new ArrayList<Object[]>(laterValues), new ArrayList<Long>(laterTimes), true /* isPartOfMegaword */); newLogUnit.mWord = null; newLogUnit.mWords = null; newLogUnit.mMayContainDigit = mMayContainDigit; newLogUnit.mContainsCorrection = mContainsCorrection; Loading @@ -287,9 +338,9 @@ public class LogUnit { mLogStatementList.addAll(logUnit.mLogStatementList); mValuesList.addAll(logUnit.mValuesList); mTimeList.addAll(logUnit.mTimeList); mWord = null; if (logUnit.mWord != null) { setWord(logUnit.mWord); mWords = null; if (logUnit.mWords != null) { setWords(logUnit.mWords); } mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit; mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection; Loading
java/src/com/android/inputmethod/research/MainLogBuffer.java +23 −21 Original line number Diff line number Diff line Loading @@ -126,10 +126,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer { final int length = logUnits.size(); for (int i = 0; i < length; i++) { final LogUnit logUnit = logUnits.get(i); final String word = logUnit.getWord(); if (word != null) { numWordsInLogUnitList++; } numWordsInLogUnitList += logUnit.getNumWords(); } return numWordsInLogUnitList >= minNGramSize; } Loading @@ -153,29 +150,31 @@ public abstract class MainLogBuffer extends FixedLogBuffer { // the complete buffer contents in detail. int numWordsInLogUnitList = 0; final int length = logUnits.size(); for (int i = 0; i < length; i++) { final LogUnit logUnit = logUnits.get(i); if (!logUnit.hasWord()) { for (final LogUnit logUnit : logUnits) { if (!logUnit.hasOneOrMoreWords()) { // Digits outside words are a privacy threat. if (logUnit.mayContainDigit()) { return false; } } else { numWordsInLogUnitList++; final String word = logUnit.getWord(); numWordsInLogUnitList += logUnit.getNumWords(); final String[] words = logUnit.getWordsAsStringArray(); for (final String word : words) { // Words not in the dictionary are a privacy threat. if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) { if (DEBUG) { Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word) Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word) + ", isValid: " + (dictionary.isValidWord(word))); } return false; } } } } // Finally, only return true if the minNGramSize is met. return numWordsInLogUnitList >= minNGramSize; // Finally, only return true if the ngram is the right size. return numWordsInLogUnitList == minNGramSize; } public void shiftAndPublishAll() { Loading @@ -198,11 +197,14 @@ public abstract class MainLogBuffer extends FixedLogBuffer { shiftOutWords(N_GRAM_SIZE); mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams; } else { // No good n-gram at front, and buffer is full. Shift out the first word (or if there // is none, the existing logUnits). logUnits = peekAtFirstNWords(1); // No good n-gram at front, and buffer is full. Shift out up through the first logUnit // with associated words (or if there is none, all the existing logUnits). logUnits.clear(); for (LogUnit logUnit = shiftOut(); logUnit != null && !logUnit.hasOneOrMoreWords(); logUnit = shiftOut()) { logUnits.add(logUnit); } publish(logUnits, false /* canIncludePrivateData */); shiftOutWords(1); } } Loading
java/src/com/android/inputmethod/research/ResearchLogger.java +21 −18 Original line number Diff line number Diff line Loading @@ -397,13 +397,14 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang protected void publish(final ArrayList<LogUnit> logUnits, boolean canIncludePrivateData) { canIncludePrivateData |= IS_LOGGING_EVERYTHING; final int length = logUnits.size(); for (int i = 0; i < length; i++) { final LogUnit logUnit = logUnits.get(i); final String word = logUnit.getWord(); if (word != null && word.length() > 0 && hasLetters(word)) { Log.d(TAG, "onPublish: " + word + ", hc: " + logUnit.containsCorrection()); for (final LogUnit logUnit : logUnits) { if (DEBUG) { final String wordsString = logUnit.getWordsAsString(); Log.d(TAG, "onPublish: '" + wordsString + "', hc: " + logUnit.containsCorrection() + ", cipd: " + canIncludePrivateData); } for (final String word : logUnit.getWordsAsStringArray()) { final Dictionary dictionary = getDictionary(); mStatistics.recordWordEntered( dictionary != null && dictionary.isValidWord(word), Loading Loading @@ -852,8 +853,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang /* package for test */ void commitCurrentLogUnit() { if (DEBUG) { Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ? ": " + mCurrentLogUnit.getWord() : "")); Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasOneOrMoreWords() ? ": " + mCurrentLogUnit.getWordsAsString() : "")); } if (!mCurrentLogUnit.isEmpty()) { if (mMainLogBuffer != null) { Loading Loading @@ -893,8 +894,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang // Check that expected word matches. if (oldLogUnit != null) { final String oldLogUnitWord = oldLogUnit.getWord(); if (oldLogUnitWord != null && !oldLogUnitWord.equals(expectedWord)) { final String oldLogUnitWords = oldLogUnit.getWordsAsString(); if (oldLogUnitWords != null && !oldLogUnitWords.equals(expectedWord)) { return; } } Loading @@ -916,7 +917,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang enqueueEvent(LOGSTATEMENT_UNCOMMIT_CURRENT_LOGUNIT); if (DEBUG) { Log.d(TAG, "uncommitCurrentLogUnit (dump=" + dumpCurrentLogUnit + ") back to " + (mCurrentLogUnit.hasWord() ? ": '" + mCurrentLogUnit.getWord() + "'" : "")); + (mCurrentLogUnit.hasOneOrMoreWords() ? ": '" + mCurrentLogUnit.getWordsAsString() + "'" : "")); } } Loading Loading @@ -950,8 +952,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang } for (LogUnit logUnit : logUnits) { if (DEBUG) { Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord() : "<wordless>") + ", correction?: " + logUnit.containsCorrection()); Log.d(TAG, "publishLogBuffer: " + (logUnit.hasOneOrMoreWords() ? logUnit.getWordsAsString() : "<wordless>") + ", correction?: " + logUnit.containsCorrection()); } researchLog.publish(logUnit, canIncludePrivateData); } Loading Loading @@ -986,7 +989,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang return; } if (word.length() > 0 && hasLetters(word)) { mCurrentLogUnit.setWord(word); mCurrentLogUnit.setWords(word); } final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime); enqueueCommitText(word, isBatchMode); Loading Loading @@ -1478,7 +1481,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang } if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) { if (logUnit != null) { logUnit.setWord(originallyTypedWord); logUnit.setWords(originallyTypedWord); } } researchLogger.enqueueEvent(logUnit != null ? logUnit : researchLogger.mCurrentLogUnit, Loading Loading @@ -1616,7 +1619,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang * Log a call to LatinIME.commitCurrentAutoCorrection(). * * SystemResponse: The IME has committed an auto-correction. An auto-correction changes the raw * text input to another word that the user more likely desired to type. * text input to another word (or words) that the user more likely desired to type. */ private static final LogStatement LOGSTATEMENT_LATINIME_COMMITCURRENTAUTOCORRECTION = new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord", Loading Loading @@ -1827,7 +1830,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang final int enteredWordPos, final SuggestedWords suggestedWords) { final ResearchLogger researchLogger = getInstance(); if (!TextUtils.isEmpty(enteredText) && hasLetters(enteredText.toString())) { researchLogger.mCurrentLogUnit.setWord(enteredText.toString()); researchLogger.mCurrentLogUnit.setWords(enteredText.toString()); } researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText, enteredWordPos); Loading