Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 41b34cf2 authored by Kurt Partridge's avatar Kurt Partridge Committed by Android (Google) Code Review
Browse files

Merge "Allow LogUnits to hold >1 word"

parents 825da76c e92b5e14
Loading
Loading
Loading
Loading
+28 −23
Original line number Diff line number Diff line
@@ -57,28 +57,29 @@ public class FixedLogBuffer extends LogBuffer {
     */
    @Override
    public void shiftIn(final LogUnit newLogUnit) {
        if (!newLogUnit.hasWord()) {
            // This LogUnit isn't a word, so it doesn't count toward the word-limit.
        if (!newLogUnit.hasOneOrMoreWords()) {
            // This LogUnit doesn't contain any word, so it doesn't count toward the word-limit.
            super.shiftIn(newLogUnit);
            return;
        }
        final int numWordsIncoming = newLogUnit.getNumWords();
        if (mNumActualWords >= mWordCapacity) {
            // Give subclass a chance to handle the buffer full condition by shifting out logUnits.
            onBufferFull();
            // If still full, evict.
            if (mNumActualWords >= mWordCapacity) {
                shiftOutWords(1);
                shiftOutWords(numWordsIncoming);
            }
        }
        super.shiftIn(newLogUnit);
        mNumActualWords++; // Must be a word, or we wouldn't be here.
        mNumActualWords += numWordsIncoming;
    }

    @Override
    public LogUnit unshiftIn() {
        final LogUnit logUnit = super.unshiftIn();
        if (logUnit != null && logUnit.hasWord()) {
            mNumActualWords--;
        if (logUnit != null && logUnit.hasOneOrMoreWords()) {
            mNumActualWords -= logUnit.getNumWords();
        }
        return logUnit;
    }
@@ -109,8 +110,8 @@ public class FixedLogBuffer extends LogBuffer {
    @Override
    public LogUnit shiftOut() {
        final LogUnit logUnit = super.shiftOut();
        if (logUnit != null && logUnit.hasWord()) {
            mNumActualWords--;
        if (logUnit != null && logUnit.hasOneOrMoreWords()) {
            mNumActualWords -= logUnit.getNumWords();
        }
        return logUnit;
    }
@@ -121,15 +122,15 @@ public class FixedLogBuffer extends LogBuffer {
     * If there are less than {@code numWords} word-containing {@link LogUnit}s, shifts out
     * all {@code LogUnit}s in the buffer.
     *
     * @param numWords the number of word-containing {@link LogUnit}s to shift out
     * @param numWords the minimum number of word-containing {@link LogUnit}s to shift out
     * @return the number of actual {@code LogUnit}s shifted out
     */
    protected int shiftOutWords(final int numWords) {
        int numWordContainingLogUnitsShiftedOut = 0;
        for (LogUnit logUnit = shiftOut(); logUnit != null
                && numWordContainingLogUnitsShiftedOut < numWords; logUnit = shiftOut()) {
            if (logUnit.hasWord()) {
                numWordContainingLogUnitsShiftedOut++;
            if (logUnit.hasOneOrMoreWords()) {
                numWordContainingLogUnitsShiftedOut += logUnit.getNumWords();
            }
        }
        return numWordContainingLogUnitsShiftedOut;
@@ -144,27 +145,31 @@ public class FixedLogBuffer extends LogBuffer {
    }

    /**
     * Returns a list of {@link LogUnit}s at the front of the buffer that have associated words.  No
     * more than {@code n} LogUnits will have words associated with them.  If there are not enough
     * LogUnits in the buffer to meet the word requirement, returns the all LogUnits.
     * Returns a list of {@link LogUnit}s at the front of the buffer that have words associated with
     * them.
     *
     * There will be no more than {@code n} words in the returned list.  So if 2 words are
     * requested, and the first LogUnit has 3 words, it is not returned.  If 2 words are requested,
     * and the first LogUnit has only 1 word, and the next LogUnit 2 words, only the first LogUnit
     * is returned.  If the first LogUnit has no words associated with it, and the second LogUnit
     * has three words, then only the first LogUnit (which has no associated words) is returned.  If
     * there are not enough LogUnits in the buffer to meet the word requirement, then all LogUnits
     * will be returned.
     *
     * @param n The maximum number of {@link LogUnit}s with words to return.
     * @return The list of the {@link LogUnit}s containing the first n words
     */
    public ArrayList<LogUnit> peekAtFirstNWords(int n) {
        final LinkedList<LogUnit> logUnits = getLogUnits();
        final int length = logUnits.size();
        // Allocate space for n*2 logUnits.  There will be at least n, one for each word, and
        // there may be additional for punctuation, between-word commands, etc.  This should be
        // enough that reallocation won't be necessary.
        final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2);
        for (int i = 0; i < length && n > 0; i++) {
            final LogUnit logUnit = logUnits.get(i);
            list.add(logUnit);
            if (logUnit.hasWord()) {
                n--;
            }
        final ArrayList<LogUnit> resultList = new ArrayList<LogUnit>(n * 2);
        for (final LogUnit logUnit : logUnits) {
            n -= logUnit.getNumWords();
            if (n < 0) break;
            resultList.add(logUnit);
        }
        return list;
        return resultList;
    }
}
+75 −24
Original line number Diff line number Diff line
@@ -25,10 +25,10 @@ import com.android.inputmethod.latin.SuggestedWords;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.define.ProductionFlag;

import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;

/**
 * A group of log statements related to each other.
@@ -49,27 +49,45 @@ public class LogUnit {
    private static final boolean DEBUG = false
            && ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG;

    private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
    private static final String[] EMPTY_STRING_ARRAY = new String[0];

    private final ArrayList<LogStatement> mLogStatementList;
    private final ArrayList<Object[]> mValuesList;
    // Assume that mTimeList is sorted in increasing order.  Do not insert null values into
    // mTimeList.
    private final ArrayList<Long> mTimeList;
    // Word that this LogUnit generates.  Should be null if the LogUnit does not generate a genuine
    // word (i.e. separators alone do not count as a word).  Should never be empty.
    private String mWord;
    // Words that this LogUnit generates.  Should be null if the data in the LogUnit does not
    // generate a genuine word (i.e. separators alone do not count as a word).  Should never be
    // empty.  Note that if the user types spaces explicitly, then normally mWords should contain
    // only a single word; it will only contain space-separate multiple words if the user does not
    // enter a space, and the system enters one automatically.
    private String mWords;
    private String[] mWordArray = EMPTY_STRING_ARRAY;
    private boolean mMayContainDigit;
    private boolean mIsPartOfMegaword;
    private boolean mContainsCorrection;

    // mCorrectionType indicates whether the word was corrected at all, and if so, whether it was
    // to a different word or just a "typo" correction.  It is considered a "typo" if the final
    // word was listed in the suggestions available the first time the word was gestured or
    // tapped.
    // mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the
    // correction.
    private int mCorrectionType;
    // LogUnits start in this state.  If a word is entered without being corrected, it will have
    // this CorrectiontType.
    public static final int CORRECTIONTYPE_NO_CORRECTION = 0;
    // The LogUnit was corrected manually by the user in an unspecified way.
    public static final int CORRECTIONTYPE_CORRECTION = 1;
    // The LogUnit was corrected manually by the user to a word not in the list of suggestions of
    // the first word typed here.  (Note: this is a heuristic value, it may be incorrect, for
    // example, if the user repositions the cursor).
    public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2;
    // The LogUnit was corrected manually by the user to a word that was in the list of suggestions
    // of the first word typed here.  (Again, a heuristic).  It is probably a typo correction.
    public static final int CORRECTIONTYPE_TYPO = 3;
    // TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's
    // state and statistics.  This should include how many times it has been corrected, whether
    // other LogUnit edits were done between edits to this LogUnit, etc.  Also track when a LogUnit
    // previously contained a word, but was corrected to empty (because it was deleted, and there is
    // no known replacement).

    private SuggestedWords mSuggestedWords;

@@ -166,7 +184,7 @@ public class LogUnit {
        final LogStatement logStatement;
        if (canIncludePrivateData) {
            LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter,
                    SystemClock.uptimeMillis(), getWord(), getCorrectionType());
                    SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType());
        } else {
            LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter,
                    SystemClock.uptimeMillis());
@@ -181,22 +199,22 @@ public class LogUnit {
    }

    /**
     * Mark the current logUnit as containing data to generate {@code word}.
     * Mark the current logUnit as containing data to generate {@code newWords}.
     *
     * If {@code setWord()} was previously called for this LogUnit, then the method will try to
     * determine what kind of correction it is, and update its internal state of the correctionType
     * accordingly.
     *
     * @param word The word this LogUnit generates.  Caller should not pass null or the empty
     * @param newWords The words this LogUnit generates.  Caller should not pass null or the empty
     * string.
     */
    public void setWord(final String word) {
        if (hasWord()) {
    public void setWords(final String newWords) {
        if (hasOneOrMoreWords()) {
            // The word was already set once, and it is now being changed.  See if the new word
            // is close to the old word.  If so, then the change is probably a typo correction.
            // If not, the user may have decided to enter a different word, so flag it.
            if (mSuggestedWords != null) {
                if (isInSuggestedWords(word, mSuggestedWords)) {
                if (isInSuggestedWords(newWords, mSuggestedWords)) {
                    mCorrectionType = CORRECTIONTYPE_TYPO;
                } else {
                    mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD;
@@ -206,38 +224,71 @@ public class LogUnit {
                // Mark it as a generic correction.
                mCorrectionType = CORRECTIONTYPE_CORRECTION;
            }
        } else {
            mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
        }
        mWord = word;
        mWords = newWords;

        // Update mWordArray
        mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY
                : WHITESPACE_PATTERN.split(mWords);
        if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) {
            // Empty string at beginning of array.  Must have been whitespace at the start of the
            // word.  Remove the empty string.
            mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length);
        }
    }

    public String getWordsAsString() {
        return mWords;
    }

    /**
     * Retuns the words generated by the data in this LogUnit.
     *
     * The first word may be an empty string, if the data in the LogUnit started by generating
     * whitespace.
     *
     * @return the array of words. an empty list of there are no words associated with this LogUnit.
     */
    public String[] getWordsAsStringArray() {
        return mWordArray;
    }

    public String getWord() {
        return mWord;
    public boolean hasOneOrMoreWords() {
        return mWordArray.length >= 1;
    }

    public boolean hasWord() {
        return mWord != null && !TextUtils.isEmpty(mWord.trim());
    public int getNumWords() {
        return mWordArray.length;
    }

    // TODO: Refactor to eliminate getter/setters
    public void setMayContainDigit() {
        mMayContainDigit = true;
    }

    // TODO: Refactor to eliminate getter/setters
    public boolean mayContainDigit() {
        return mMayContainDigit;
    }

    // TODO: Refactor to eliminate getter/setters
    public void setContainsCorrection() {
        mContainsCorrection = true;
    }

    // TODO: Refactor to eliminate getter/setters
    public boolean containsCorrection() {
        return mContainsCorrection;
    }

    // TODO: Refactor to eliminate getter/setters
    public void setCorrectionType(final int correctionType) {
        mCorrectionType = correctionType;
    }

    // TODO: Refactor to eliminate getter/setters
    public int getCorrectionType() {
        return mCorrectionType;
    }
@@ -267,7 +318,7 @@ public class LogUnit {
                        new ArrayList<Object[]>(laterValues),
                        new ArrayList<Long>(laterTimes),
                        true /* isPartOfMegaword */);
                newLogUnit.mWord = null;
                newLogUnit.mWords = null;
                newLogUnit.mMayContainDigit = mMayContainDigit;
                newLogUnit.mContainsCorrection = mContainsCorrection;

@@ -287,9 +338,9 @@ public class LogUnit {
        mLogStatementList.addAll(logUnit.mLogStatementList);
        mValuesList.addAll(logUnit.mValuesList);
        mTimeList.addAll(logUnit.mTimeList);
        mWord = null;
        if (logUnit.mWord != null) {
            setWord(logUnit.mWord);
        mWords = null;
        if (logUnit.mWords != null) {
            setWords(logUnit.mWords);
        }
        mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit;
        mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection;
+23 −21
Original line number Diff line number Diff line
@@ -126,10 +126,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
            final int length = logUnits.size();
            for (int i = 0; i < length; i++) {
                final LogUnit logUnit = logUnits.get(i);
                final String word = logUnit.getWord();
                if (word != null) {
                    numWordsInLogUnitList++;
                }
                numWordsInLogUnitList += logUnit.getNumWords();
            }
            return numWordsInLogUnitList >= minNGramSize;
        }
@@ -153,29 +150,31 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
        // the complete buffer contents in detail.
        int numWordsInLogUnitList = 0;
        final int length = logUnits.size();
        for (int i = 0; i < length; i++) {
            final LogUnit logUnit = logUnits.get(i);
            if (!logUnit.hasWord()) {
        for (final LogUnit logUnit : logUnits) {
            if (!logUnit.hasOneOrMoreWords()) {
                // Digits outside words are a privacy threat.
                if (logUnit.mayContainDigit()) {
                    return false;
                }
            } else {
                numWordsInLogUnitList++;
                final String word = logUnit.getWord();
                numWordsInLogUnitList += logUnit.getNumWords();
                final String[] words = logUnit.getWordsAsStringArray();
                for (final String word : words) {
                    // Words not in the dictionary are a privacy threat.
                    if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
                        if (DEBUG) {
                        Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word)
                            Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: "
                                    + ResearchLogger.hasLetters(word)
                                    + ", isValid: " + (dictionary.isValidWord(word)));
                        }
                        return false;
                    }
                }
            }
        }

        // Finally, only return true if the minNGramSize is met.
        return numWordsInLogUnitList >= minNGramSize;
        // Finally, only return true if the ngram is the right size.
        return numWordsInLogUnitList == minNGramSize;
    }

    public void shiftAndPublishAll() {
@@ -198,11 +197,14 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
            shiftOutWords(N_GRAM_SIZE);
            mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
        } else {
            // No good n-gram at front, and buffer is full.  Shift out the first word (or if there
            // is none, the existing logUnits).
            logUnits = peekAtFirstNWords(1);
            // No good n-gram at front, and buffer is full.  Shift out up through the first logUnit
            // with associated words (or if there is none, all the existing logUnits).
            logUnits.clear();
            for (LogUnit logUnit = shiftOut(); logUnit != null && !logUnit.hasOneOrMoreWords();
                    logUnit = shiftOut()) {
                logUnits.add(logUnit);
            }
            publish(logUnits, false /* canIncludePrivateData */);
            shiftOutWords(1);
        }
    }

+21 −18
Original line number Diff line number Diff line
@@ -397,13 +397,14 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
                protected void publish(final ArrayList<LogUnit> logUnits,
                        boolean canIncludePrivateData) {
                    canIncludePrivateData |= IS_LOGGING_EVERYTHING;
                    final int length = logUnits.size();
                    for (int i = 0; i < length; i++) {
                        final LogUnit logUnit = logUnits.get(i);
                        final String word = logUnit.getWord();
                        if (word != null && word.length() > 0 && hasLetters(word)) {
                            Log.d(TAG, "onPublish: " + word + ", hc: "
                                    + logUnit.containsCorrection());
                    for (final LogUnit logUnit : logUnits) {
                        if (DEBUG) {
                            final String wordsString = logUnit.getWordsAsString();
                            Log.d(TAG, "onPublish: '" + wordsString
                                    + "', hc: " + logUnit.containsCorrection()
                                    + ", cipd: " + canIncludePrivateData);
                        }
                        for (final String word : logUnit.getWordsAsStringArray()) {
                            final Dictionary dictionary = getDictionary();
                            mStatistics.recordWordEntered(
                                    dictionary != null && dictionary.isValidWord(word),
@@ -852,8 +853,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang

    /* package for test */ void commitCurrentLogUnit() {
        if (DEBUG) {
            Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ?
                    ": " + mCurrentLogUnit.getWord() : ""));
            Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasOneOrMoreWords() ?
                    ": " + mCurrentLogUnit.getWordsAsString() : ""));
        }
        if (!mCurrentLogUnit.isEmpty()) {
            if (mMainLogBuffer != null) {
@@ -893,8 +894,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang

        // Check that expected word matches.
        if (oldLogUnit != null) {
            final String oldLogUnitWord = oldLogUnit.getWord();
            if (oldLogUnitWord != null && !oldLogUnitWord.equals(expectedWord)) {
            final String oldLogUnitWords = oldLogUnit.getWordsAsString();
            if (oldLogUnitWords != null && !oldLogUnitWords.equals(expectedWord)) {
                return;
            }
        }
@@ -916,7 +917,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
        enqueueEvent(LOGSTATEMENT_UNCOMMIT_CURRENT_LOGUNIT);
        if (DEBUG) {
            Log.d(TAG, "uncommitCurrentLogUnit (dump=" + dumpCurrentLogUnit + ") back to "
                    + (mCurrentLogUnit.hasWord() ? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
                    + (mCurrentLogUnit.hasOneOrMoreWords() ? ": '"
                        + mCurrentLogUnit.getWordsAsString() + "'" : ""));
        }
    }

@@ -950,8 +952,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
        }
        for (LogUnit logUnit : logUnits) {
            if (DEBUG) {
                Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
                        : "<wordless>") + ", correction?: " + logUnit.containsCorrection());
                Log.d(TAG, "publishLogBuffer: " + (logUnit.hasOneOrMoreWords()
                        ? logUnit.getWordsAsString() : "<wordless>")
                        + ", correction?: " + logUnit.containsCorrection());
            }
            researchLog.publish(logUnit, canIncludePrivateData);
        }
@@ -986,7 +989,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
            return;
        }
        if (word.length() > 0 && hasLetters(word)) {
            mCurrentLogUnit.setWord(word);
            mCurrentLogUnit.setWords(word);
        }
        final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime);
        enqueueCommitText(word, isBatchMode);
@@ -1478,7 +1481,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
        }
        if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) {
            if (logUnit != null) {
                logUnit.setWord(originallyTypedWord);
                logUnit.setWords(originallyTypedWord);
            }
        }
        researchLogger.enqueueEvent(logUnit != null ? logUnit : researchLogger.mCurrentLogUnit,
@@ -1616,7 +1619,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
     * Log a call to LatinIME.commitCurrentAutoCorrection().
     *
     * SystemResponse: The IME has committed an auto-correction.  An auto-correction changes the raw
     * text input to another word that the user more likely desired to type.
     * text input to another word (or words) that the user more likely desired to type.
     */
    private static final LogStatement LOGSTATEMENT_LATINIME_COMMITCURRENTAUTOCORRECTION =
            new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord",
@@ -1827,7 +1830,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
            final int enteredWordPos, final SuggestedWords suggestedWords) {
        final ResearchLogger researchLogger = getInstance();
        if (!TextUtils.isEmpty(enteredText) && hasLetters(enteredText.toString())) {
            researchLogger.mCurrentLogUnit.setWord(enteredText.toString());
            researchLogger.mCurrentLogUnit.setWords(enteredText.toString());
        }
        researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText,
                enteredWordPos);