Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8b788374 authored by Kurt Partridge's avatar Kurt Partridge Committed by Android (Google) Code Review
Browse files

Merge "[Rlog78b] Make log privacy filtering decisions on n-grams"

parents 08d9c996 80685aa4
Loading
Loading
Loading
Loading
+56 −25
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@

package com.android.inputmethod.research;

import java.util.ArrayList;
import java.util.LinkedList;

/**
@@ -65,8 +66,13 @@ public class FixedLogBuffer extends LogBuffer {
            super.shiftIn(newLogUnit);
            return;
        }
        if (mNumActualWords == mWordCapacity) {
            shiftOutThroughFirstWord();
        if (mNumActualWords >= mWordCapacity) {
            // Give subclass a chance to handle the buffer full condition by shifting out logUnits.
            onBufferFull();
            // If still full, evict.
            if (mNumActualWords >= mWordCapacity) {
                shiftOutWords(1);
            }
        }
        super.shiftIn(newLogUnit);
        mNumActualWords++; // Must be a word, or we wouldn't be here.
@@ -81,18 +87,8 @@ public class FixedLogBuffer extends LogBuffer {
        return logUnit;
    }

    public void shiftOutThroughFirstWord() {
        final LinkedList<LogUnit> logUnits = getLogUnits();
        while (!logUnits.isEmpty()) {
            final LogUnit logUnit = logUnits.removeFirst();
            onShiftOut(logUnit);
            if (logUnit.hasWord()) {
                // Successfully shifted out a word-containing LogUnit and made space for the new
                // LogUnit.
                mNumActualWords--;
                break;
            }
        }
    public int getNumWords() {
        return mNumActualWords;
    }

    /**
@@ -105,28 +101,63 @@ public class FixedLogBuffer extends LogBuffer {
    }

    /**
     * Called when a LogUnit is removed from the LogBuffer as a result of a shiftIn.  LogUnits are
     * removed in the order entered.  This method is not called when shiftOut is called directly.
     * Called when the buffer has just shifted in one more word than its maximum, and its about to
     * shift out LogUnits to bring it back down to the maximum.
     *
     * Base class does nothing; subclasses may override if they want to record non-privacy sensitive
     * events that fall off the end.
     */
    protected void onShiftOut(final LogUnit logUnit) {
    protected void onBufferFull() {
    }

    /**
     * Called to deliberately remove the oldest LogUnit.  Usually called when draining the
     * LogBuffer.
     */
    @Override
    public LogUnit shiftOut() {
        if (isEmpty()) {
            return null;
        }
        final LogUnit logUnit = super.shiftOut();
        if (logUnit.hasWord()) {
        if (logUnit != null && logUnit.hasWord()) {
            mNumActualWords--;
        }
        return logUnit;
    }

    protected void shiftOutWords(final int numWords) {
        final int targetNumWords = mNumActualWords - numWords;
        final LinkedList<LogUnit> logUnits = getLogUnits();
        while (mNumActualWords > targetNumWords && !logUnits.isEmpty()) {
            shiftOut();
        }
    }

    public void shiftOutAll() {
        final LinkedList<LogUnit> logUnits = getLogUnits();
        while (!logUnits.isEmpty()) {
            shiftOut();
        }
        mNumActualWords = 0;
    }

    /**
     * Returns a list of {@link LogUnit}s at the front of the buffer that have associated words.  No
     * more than {@code n} LogUnits will have words associated with them.  If there are not enough
     * LogUnits in the buffer to meet the word requirement, returns the all LogUnits.
     *
     * @param n The maximum number of {@link LogUnit}s with words to return.
     * @return The list of the {@link LogUnit}s containing the first n words
     */
    public ArrayList<LogUnit> peekAtFirstNWords(int n) {
        final LinkedList<LogUnit> logUnits = getLogUnits();
        final int length = logUnits.size();
        // Allocate space for n*2 logUnits.  There will be at least n, one for each word, and
        // there may be additional for punctuation, between-word commands, etc.  This should be
        // enough that reallocation won't be necessary.
        final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2);
        for (int i = 0; i < length && n > 0; i++) {
            final LogUnit logUnit = logUnits.get(i);
            list.add(logUnit);
            final String word = logUnit.getWord();
            if (word != null) {
                n--;
            }
        }
        return list;
    }
}
+7 −8
Original line number Diff line number Diff line
@@ -98,7 +98,7 @@ import java.util.Map;
     * Publish the contents of this LogUnit to researchLog.
     */
    public synchronized void publishTo(final ResearchLog researchLog,
            final boolean isIncludingPrivateData) {
            final boolean canIncludePrivateData) {
        // Prepare debugging output if necessary
        final StringWriter debugStringWriter;
        final JsonWriter debugJsonWriter;
@@ -123,7 +123,7 @@ import java.util.Map;
            JsonWriter jsonWriter = null;
            for (int i = 0; i < size; i++) {
                final LogStatement logStatement = mLogStatementList.get(i);
                if (!isIncludingPrivateData && logStatement.mIsPotentiallyPrivate) {
                if (!canIncludePrivateData && logStatement.mIsPotentiallyPrivate) {
                    continue;
                }
                if (mIsPartOfMegaword && logStatement.mIsPotentiallyRevealing) {
@@ -134,7 +134,7 @@ import java.util.Map;
                // will not have been opened for writing.
                if (jsonWriter == null) {
                    jsonWriter = researchLog.getValidJsonWriterLocked();
                    outputLogUnitStart(jsonWriter, isIncludingPrivateData);
                    outputLogUnitStart(jsonWriter, canIncludePrivateData);
                }
                outputLogStatementToLocked(jsonWriter, mLogStatementList.get(i), mValuesList.get(i),
                        mTimeList.get(i));
@@ -145,7 +145,7 @@ import java.util.Map;
            }
            if (jsonWriter != null) {
                // We must have called logUnitStart earlier, so emit a logUnitStop.
                outputLogUnitStop(jsonWriter, isIncludingPrivateData);
                outputLogUnitStop(jsonWriter);
            }
        }
        if (DEBUG) {
@@ -171,11 +171,11 @@ import java.util.Map;
    private static final String LOG_UNIT_END_KEY = "logUnitEnd";

    private void outputLogUnitStart(final JsonWriter jsonWriter,
            final boolean isIncludingPrivateData) {
            final boolean canIncludePrivateData) {
        try {
            jsonWriter.beginObject();
            jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
            if (isIncludingPrivateData) {
            if (canIncludePrivateData) {
                jsonWriter.name(WORD_KEY).value(getWord());
            }
            jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY);
@@ -186,8 +186,7 @@ import java.util.Map;
        }
    }

    private void outputLogUnitStop(final JsonWriter jsonWriter,
            final boolean isIncludingPrivateData) {
    private void outputLogUnitStop(final JsonWriter jsonWriter) {
        try {
            jsonWriter.beginObject();
            jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
+90 −39
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.Suggest;
import com.android.inputmethod.latin.define.ProductionFlag;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.Random;

@@ -56,19 +57,24 @@ import java.util.Random;
 * If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded
 * n-gram containing dictionary words.
 */
public class MainLogBuffer extends FixedLogBuffer {
public abstract class MainLogBuffer extends FixedLogBuffer {
    private static final String TAG = MainLogBuffer.class.getSimpleName();
    private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;

    // The size of the n-grams logged.  E.g. N_GRAM_SIZE = 2 means to sample bigrams.
    public static final int N_GRAM_SIZE = 2;
    // The number of words between n-grams to omit from the log.  If debugging, record 50% of all
    // words.  Otherwise, only record 10%.

    // Whether all words should be recorded, leaving unsampled word between bigrams.  Useful for
    // testing.
    /* package for test */ static final boolean IS_LOGGING_EVERYTHING = false
            && ProductionFlag.IS_EXPERIMENTAL_DEBUG;

    // The number of words between n-grams to omit from the log.
    private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
            ProductionFlag.IS_EXPERIMENTAL_DEBUG ? 2 : 18;
            IS_LOGGING_EVERYTHING ? 0 : (DEBUG ? 2 : 18);

    private final ResearchLog mResearchLog;
    private Suggest mSuggest;
    private boolean mIsStopping = false;

    /* package for test */ int mNumWordsBetweenNGrams;

@@ -76,9 +82,8 @@ public class MainLogBuffer extends FixedLogBuffer {
    // after a sample is taken.
    /* package for test */ int mNumWordsUntilSafeToSample;

    public MainLogBuffer(final ResearchLog researchLog) {
    public MainLogBuffer() {
        super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES);
        mResearchLog = researchLog;
        mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES;
        final Random random = new Random();
        mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1);
@@ -92,6 +97,10 @@ public class MainLogBuffer extends FixedLogBuffer {
        mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
    }

    public void setIsStopping() {
        mIsStopping = true;
    }

    /**
     * Determines whether uploading the n words at the front the MainLogBuffer will not violate
     * user privacy.
@@ -103,16 +112,36 @@ public class MainLogBuffer extends FixedLogBuffer {
     * the screen orientation and other characteristics about the device can be uploaded without
     * revealing much about the user.
     */
    public boolean isNGramSafe() {
    private boolean isSafeNGram(final ArrayList<LogUnit> logUnits, final int minNGramSize) {
        // Bypass privacy checks when debugging.
        if (IS_LOGGING_EVERYTHING) {
            if (mIsStopping) {
                return true;
            } else {
                // Only check that it is the right length.  If not, wait for later words to make
                // complete n-grams.
                int numWordsInLogUnitList = 0;
                final int length = logUnits.size();
                for (int i = 0; i < length; i++) {
                    final LogUnit logUnit = logUnits.get(i);
                    final String word = logUnit.getWord();
                    if (word != null) {
                        numWordsInLogUnitList++;
                    }
                }
                return numWordsInLogUnitList >= minNGramSize;
            }
        }

        // Check that we are not sampling too frequently.  Having sampled recently might disclose
        // too much of the user's intended meaning.
        if (mNumWordsUntilSafeToSample > 0) {
            return false;
        }
        if (mSuggest == null || !mSuggest.hasMainDictionary()) {
            // Main dictionary is unavailable.  Since we cannot check it, we cannot tell if a word
            // is out-of-vocabulary or not.  Therefore, we must judge the entire buffer contents to
            // potentially pose a privacy risk.
            // Main dictionary is unavailable.  Since we cannot check it, we cannot tell if a
            // word is out-of-vocabulary or not.  Therefore, we must judge the entire buffer
            // contents to potentially pose a privacy risk.
            return false;
        }
        // Reload the dictionary in case it has changed (e.g., because the user has changed
@@ -121,12 +150,12 @@ public class MainLogBuffer extends FixedLogBuffer {
        if (dictionary == null) {
            return false;
        }
        // Check each word in the buffer.  If any word poses a privacy threat, we cannot upload the
        // complete buffer contents in detail.
        final LinkedList<LogUnit> logUnits = getLogUnits();

        // Check each word in the buffer.  If any word poses a privacy threat, we cannot upload
        // the complete buffer contents in detail.
        int numWordsInLogUnitList = 0;
        final int length = logUnits.size();
        int wordsNeeded = N_GRAM_SIZE;
        for (int i = 0; i < length && wordsNeeded > 0; i++) {
        for (int i = 0; i < length; i++) {
            final LogUnit logUnit = logUnits.get(i);
            final String word = logUnit.getWord();
            if (word == null) {
@@ -135,6 +164,7 @@ public class MainLogBuffer extends FixedLogBuffer {
                    return false;
                }
            } else {
                numWordsInLogUnitList++;
                // Words not in the dictionary are a privacy threat.
                if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
                    if (DEBUG) {
@@ -145,38 +175,59 @@ public class MainLogBuffer extends FixedLogBuffer {
                }
            }
        }
        // All checks have passed; this buffer's content can be safely uploaded.
        return true;

        // Finally, only return true if the minNGramSize is met.
        return numWordsInLogUnitList >= minNGramSize;
    }

    public boolean isNGramComplete() {
    public void shiftAndPublishAll() {
        final LinkedList<LogUnit> logUnits = getLogUnits();
        final int length = logUnits.size();
        int wordsNeeded = N_GRAM_SIZE;
        for (int i = 0; i < length && wordsNeeded > 0; i++) {
            final LogUnit logUnit = logUnits.get(i);
            final String word = logUnit.getWord();
            if (word != null) {
                wordsNeeded--;
        while (!logUnits.isEmpty()) {
            publishLogUnitsAtFrontOfBuffer();
        }
    }
        return wordsNeeded == 0;
    }

    @Override
    protected void onShiftOut(final LogUnit logUnit) {
        if (mResearchLog != null) {
            mResearchLog.publish(logUnit,
                    ResearchLogger.IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
    protected final void onBufferFull() {
        publishLogUnitsAtFrontOfBuffer();
    }
        if (logUnit.hasWord()) {
            if (mNumWordsUntilSafeToSample > 0) {
                mNumWordsUntilSafeToSample--;
                Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);

    protected final void publishLogUnitsAtFrontOfBuffer() {
        ArrayList<LogUnit> logUnits = peekAtFirstNWords(N_GRAM_SIZE);
        if (isSafeNGram(logUnits, N_GRAM_SIZE)) {
            // Good n-gram at the front of the buffer.  Publish it, disclosing details.
            publish(logUnits, true /* canIncludePrivateData */);
            shiftOutWords(N_GRAM_SIZE);
            resetWordCounter();
        } else {
            // No good n-gram at front, and buffer is full.  Shift out the first word (or if there
            // is none, the existing logUnits).
            logUnits = peekAtFirstNWords(1);
            publish(logUnits, false /* canIncludePrivateData */);
            shiftOutWords(1);
        }
    }

    /**
     * Called when a list of logUnits should be published.
     *
     * It is the subclass's responsibility to implement the publication.
     *
     * @param logUnits The list of logUnits to be published.
     * @param canIncludePrivateData Whether the private data in the logUnits can be included in
     * publication.
     */
    protected abstract void publish(final ArrayList<LogUnit> logUnits,
            final boolean canIncludePrivateData);

    @Override
    protected void shiftOutWords(int numWords) {
        int oldNumActualWords = getNumActualWords();
        super.shiftOutWords(numWords);
        int numWordsShifted = oldNumActualWords - getNumActualWords();
        mNumWordsUntilSafeToSample -= numWordsShifted;
        if (DEBUG) {
            Log.d(TAG, "shiftedOut " + (logUnit.hasWord() ? logUnit.getWord() : ""));
            Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
        }
    }
}
+2 −2
Original line number Diff line number Diff line
@@ -185,12 +185,12 @@ public class ResearchLog {
        mFlushFuture = mExecutor.schedule(mFlushCallable, FLUSH_DELAY_IN_MS, TimeUnit.MILLISECONDS);
    }

    public synchronized void publish(final LogUnit logUnit, final boolean isIncludingPrivateData) {
    public synchronized void publish(final LogUnit logUnit, final boolean canIncludePrivateData) {
        try {
            mExecutor.submit(new Callable<Object>() {
                @Override
                public Object call() throws Exception {
                    logUnit.publishTo(ResearchLog.this, isIncludingPrivateData);
                    logUnit.publishTo(ResearchLog.this, canIncludePrivateData);
                    scheduleFlush();
                    return null;
                }
+64 −48

File changed.

Preview size limit exceeded, changes collapsed.