Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 700ce8df authored by Kurt Partridge's avatar Kurt Partridge Committed by Android (Google) Code Review
Browse files

Merge "[Rlog56] Buffer words before pushing out LogUnit"

parents 4da2ed7a 403c4239
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -81,7 +81,7 @@ public class FixedLogBuffer extends LogBuffer {
        return logUnit;
    }

    private void shiftOutThroughFirstWord() {
    public void shiftOutThroughFirstWord() {
        final LinkedList<LogUnit> logUnits = getLogUnits();
        while (!logUnits.isEmpty()) {
            final LogUnit logUnit = logUnits.removeFirst();
+66 −45
Original line number Diff line number Diff line
@@ -26,18 +26,42 @@ import java.util.LinkedList;
import java.util.Random;

/**
 * Provide a log buffer of fixed length that enforces privacy restrictions.
 * MainLogBuffer is a FixedLogBuffer that tracks the state of LogUnits to make privacy guarantees.
 *
 * The privacy restrictions include making sure that no numbers are logged, that all logged words
 * are in the dictionary, and that words are recorded infrequently enough that the user's meaning
 * cannot be easily determined.
 * There are three forms of privacy protection: 1) only words in the main dictionary are allowed to
 * be logged in enough detail to determine their contents, 2) only a subset of words are logged
 * in detail, such as 10%, and 3) no numbers are logged.
 *
 * This class maintains a list of LogUnits, each corresponding to a word.  As the user completes
 * words, they are added here.  But if the user backs up over their current word to edit a word
 * entered earlier, then it is pulled out of this LogBuffer, changes are then added to the end of
 * the LogUnit, and it is pushed back in here when the user is done.  Because words may be pulled
 * back out even after they are pushed in, we must not publish the contents of this LogBuffer too
 * quickly.  However, we cannot let the contents pile up either, or it will limit the editing that
 * a user can perform.
 *
 * To balance these requirements (keep history so user can edit, flush history so it does not pile
 * up), the LogBuffer is considered "complete" when the user has entered enough words to form an
 * n-gram, followed by enough additional non-detailed words (that are in the 90%, as per above).
 * Once complete, the n-gram may be published to flash storage (via the ResearchLog class).
 * However, the additional non-detailed words are retained, in case the user backspaces to edit
 * them.  The MainLogBuffer then continues to add words, publishing individual non-detailed words
 * as new words arrive.  After enough non-detailed words have been pushed out to account for the
 * 90% between words, the words at the front of the LogBuffer can be published as an n-gram again.
 *
 * If the words that would form the valid n-gram are not in the dictionary, then words are pushed
 * through the LogBuffer one at a time until an n-gram is found that is entirely composed of
 * dictionary words.
 *
 * If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded
 * n-gram containing dictionary words.
 */
public class MainLogBuffer extends FixedLogBuffer {
    private static final String TAG = MainLogBuffer.class.getSimpleName();
    private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;

    // The size of the n-grams logged.  E.g. N_GRAM_SIZE = 2 means to sample bigrams.
    private static final int N_GRAM_SIZE = 2;
    public static final int N_GRAM_SIZE = 2;
    // The number of words between n-grams to omit from the log.  If debugging, record 50% of all
    // words.  Otherwise, only record 10%.
    private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
@@ -46,49 +70,31 @@ public class MainLogBuffer extends FixedLogBuffer {
    private final ResearchLog mResearchLog;
    private Suggest mSuggest;

    // The minimum periodicity with which n-grams can be sampled.  E.g. mWinWordPeriod is 10 if
    // every 10th bigram is sampled, i.e., words 1-8 are not, but the bigram at words 9 and 10, etc.
    // for 11-18, and the bigram at words 19 and 20.  If an n-gram is not safe (e.g. it  contains a
    // number in the middle or an out-of-vocabulary word), then sampling is delayed until a safe
    // n-gram does appear.
    /* package for test */ int mMinWordPeriod;
    /* package for test */ int mNumWordsBetweenNGrams;

    // Counter for words left to suppress before an n-gram can be sampled.  Reset to mMinWordPeriod
    // after a sample is taken.
    /* package for test */ int mWordsUntilSafeToSample;
    /* package for test */ int mNumWordsUntilSafeToSample;

    public MainLogBuffer(final ResearchLog researchLog) {
        super(N_GRAM_SIZE);
        super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES);
        mResearchLog = researchLog;
        mMinWordPeriod = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES + N_GRAM_SIZE;
        mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES;
        final Random random = new Random();
        mWordsUntilSafeToSample = random.nextInt(mMinWordPeriod);
        mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1);
    }

    public void setSuggest(final Suggest suggest) {
        mSuggest = suggest;
    }

    @Override
    public void shiftIn(final LogUnit newLogUnit) {
        super.shiftIn(newLogUnit);
        if (newLogUnit.hasWord()) {
            if (mWordsUntilSafeToSample > 0) {
                mWordsUntilSafeToSample--;
            }
        }
        if (DEBUG) {
            Log.d(TAG, "shiftedIn " + (newLogUnit.hasWord() ? newLogUnit.getWord() : ""));
        }
    }

    public void resetWordCounter() {
        mWordsUntilSafeToSample = mMinWordPeriod;
        mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
    }

    /**
     * Determines whether the content of the MainLogBuffer can be safely uploaded in its complete
     * form and still protect the user's privacy.
     * Determines whether uploading the n words at the front the MainLogBuffer will not violate
     * user privacy.
     *
     * The size of the MainLogBuffer is just enough to hold one n-gram, its corrections, and any
     * non-character data that is typed between words.  The decision about privacy is made based on
@@ -97,10 +103,10 @@ public class MainLogBuffer extends FixedLogBuffer {
     * the screen orientation and other characteristics about the device can be uploaded without
     * revealing much about the user.
     */
    public boolean isSafeToLog() {
    public boolean isNGramSafe() {
        // Check that we are not sampling too frequently.  Having sampled recently might disclose
        // too much of the user's intended meaning.
        if (mWordsUntilSafeToSample > 0) {
        if (mNumWordsUntilSafeToSample > 0) {
            return false;
        }
        if (mSuggest == null || !mSuggest.hasMainDictionary()) {
@@ -119,8 +125,8 @@ public class MainLogBuffer extends FixedLogBuffer {
        // complete buffer contents in detail.
        final LinkedList<LogUnit> logUnits = getLogUnits();
        final int length = logUnits.size();
        int wordsFound = 0;
        for (int i = 0; i < length; i++) {
        int wordsNeeded = N_GRAM_SIZE;
        for (int i = 0; i < length && wordsNeeded > 0; i++) {
            final LogUnit logUnit = logUnits.get(i);
            final String word = logUnit.getWord();
            if (word == null) {
@@ -136,26 +142,41 @@ public class MainLogBuffer extends FixedLogBuffer {
                                + ", isValid: " + (dictionary.isValidWord(word)));
                    }
                    return false;
                } else {
                    wordsFound++;
                }
            }
                }
        if (wordsFound < N_GRAM_SIZE) {
            // Not enough words.  Not unsafe, but reject anyway.
            if (DEBUG) {
                Log.d(TAG, "not enough words");
            }
            return false;
        }
        // All checks have passed; this buffer's content can be safely uploaded.
        return true;
    }

    public boolean isNGramComplete() {
        final LinkedList<LogUnit> logUnits = getLogUnits();
        final int length = logUnits.size();
        int wordsNeeded = N_GRAM_SIZE;
        for (int i = 0; i < length && wordsNeeded > 0; i++) {
            final LogUnit logUnit = logUnits.get(i);
            final String word = logUnit.getWord();
            if (word != null) {
                wordsNeeded--;
            }
        }
        return wordsNeeded == 0;
    }

    @Override
    protected void onShiftOut(final LogUnit logUnit) {
        if (mResearchLog != null) {
            mResearchLog.publish(logUnit, false /* isIncludingPrivateData */);
            mResearchLog.publish(logUnit,
                    ResearchLogger.IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
        }
        if (logUnit.hasWord()) {
            if (mNumWordsUntilSafeToSample > 0) {
                mNumWordsUntilSafeToSample--;
                Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
            }
        }
        if (DEBUG) {
            Log.d(TAG, "shiftedOut " + (logUnit.hasWord() ? logUnit.getWord() : ""));
        }
    }
}
+29 −9
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
    private static final String TAG = ResearchLogger.class.getSimpleName();
    private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
    // Whether all n-grams should be logged.  true will disclose private info.
    private static final boolean IS_LOGGING_EVERYTHING = false
    public static final boolean IS_LOGGING_EVERYTHING = false
            && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
    // Whether the TextView contents are logged at the end of the session.  true will disclose
    // private info.
@@ -394,8 +394,16 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
        commitCurrentLogUnit();

        if (mMainLogBuffer != null) {
            while (!mMainLogBuffer.isEmpty()) {
                if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
                        mMainResearchLog != null) {
                    publishLogBuffer(mMainLogBuffer, mMainResearchLog,
                    IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
                            true /* isIncludingPrivateData */);
                    mMainLogBuffer.resetWordCounter();
                } else {
                    mMainLogBuffer.shiftOutThroughFirstWord();
                }
            }
            mMainResearchLog.close(null /* callback */);
            mMainLogBuffer = null;
        }
@@ -702,8 +710,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
        }
        if (!mCurrentLogUnit.isEmpty()) {
            if (mMainLogBuffer != null) {
                if ((mMainLogBuffer.isSafeToLog() || IS_LOGGING_EVERYTHING)
                        && mMainResearchLog != null) {
                if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
                        mMainLogBuffer.isNGramComplete() &&
                        mMainResearchLog != null) {
                    publishLogBuffer(mMainLogBuffer, mMainResearchLog,
                            true /* isIncludingPrivateData */);
                    mMainLogBuffer.resetWordCounter();
@@ -714,6 +723,10 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
                mFeedbackLogBuffer.shiftIn(mCurrentLogUnit);
            }
            mCurrentLogUnit = new LogUnit();
        } else {
            if (DEBUG) {
                Log.d(TAG, "Warning: tried to commit empty log unit.");
            }
        }
    }

@@ -756,8 +769,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
            mFeedbackLogBuffer.unshiftIn();
        }
        if (DEBUG) {
            Log.d(TAG, "uncommitCurrentLogUnit back to " + (mCurrentLogUnit.hasWord()
                    ? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
            Log.d(TAG, "uncommitCurrentLogUnit (dump=" + dumpCurrentLogUnit + ") back to "
                    + (mCurrentLogUnit.hasWord() ? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
        }
    }

@@ -773,12 +786,16 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
                isIncludingPrivateData);
        researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */);
        LogUnit logUnit;
        while ((logUnit = logBuffer.shiftOut()) != null) {
        int numWordsToPublish = MainLogBuffer.N_GRAM_SIZE;
        while ((logUnit = logBuffer.shiftOut()) != null && numWordsToPublish > 0) {
            if (DEBUG) {
                Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
                        : "<wordless>"));
            }
            researchLog.publish(logUnit, isIncludingPrivateData);
            if (logUnit.getWord() != null) {
                numWordsToPublish--;
            }
        }
        final LogUnit closingLogUnit = new LogUnit();
        closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
@@ -1254,9 +1271,12 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
    public static void latinIME_revertCommit(final String committedWord,
            final String originallyTypedWord, final boolean isBatchMode) {
        final ResearchLogger researchLogger = getInstance();
        final LogUnit logUnit = researchLogger.mMainLogBuffer.peekLastLogUnit();
        // Assume that mCurrentLogUnit has been restored to contain the reverted word.
        final LogUnit logUnit = researchLogger.mCurrentLogUnit;
        if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) {
            if (logUnit != null) {
                // Probably not necessary, but setting as a precaution in case the word isn't
                // committed later.
                logUnit.setWord(originallyTypedWord);
            }
        }