Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e070aa86 authored by Alexander Potapenko's avatar Alexander Potapenko
Browse files

BootReceiver: collect kernel memory corruptions from dmesg



Production memory error detection tools (KFENCE and potentially MTE-based KASAN)
issue "error_report_end" tracing events that can be monitored via a
tracefs pipe.
We change BootReceiver to watch for such events and, when a memory error
is reported, collect and parse dmesg output to grab the error report and
copy it to Dropbox.

Test: manual runs with KFENCE enabled
Test: atest BootReceiverTest
Bug: 172316664

Signed-off-by: default avatarAlexander Potapenko <glider@google.com>
Change-Id: Ib992fdb8c0f15ef69d020d978b64e25cda872ab9
parent 9676344a
Loading
Loading
Loading
Loading
+218 −0
Original line number Diff line number Diff line
@@ -16,6 +16,8 @@

package com.android.server;

import static android.system.OsConstants.O_RDONLY;

import android.content.BroadcastReceiver;
import android.content.Context;
import android.content.Intent;
@@ -24,12 +26,15 @@ import android.os.Build;
import android.os.DropBoxManager;
import android.os.Environment;
import android.os.FileUtils;
import android.os.MessageQueue.OnFileDescriptorEventListener;
import android.os.RecoverySystem;
import android.os.RemoteException;
import android.os.ServiceManager;
import android.os.SystemProperties;
import android.os.storage.StorageManager;
import android.provider.Downloads;
import android.system.ErrnoException;
import android.system.Os;
import android.text.TextUtils;
import android.util.AtomicFile;
import android.util.EventLog;
@@ -46,11 +51,15 @@ import com.android.internal.util.XmlUtils;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileDescriptor;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.regex.Matcher;
@@ -116,6 +125,12 @@ public class BootReceiver extends BroadcastReceiver {
    private static final String METRIC_SYSTEM_SERVER = "shutdown_system_server";
    private static final String METRIC_SHUTDOWN_TIME_START = "begin_shutdown";

    // Location of ftrace pipe for notifications from kernel memory tools like KFENCE and KASAN.
    private static final String ERROR_REPORT_TRACE_PIPE =
            "/sys/kernel/tracing/instances/bootreceiver/trace_pipe";
    // Avoid reporing the same bug from processDmesg() twice.
    private static String sLastReportedBug = null;

    @Override
    public void onReceive(final Context context, Intent intent) {
        // Log boot events in the background to avoid blocking the main thread with I/O
@@ -143,6 +158,209 @@ public class BootReceiver extends BroadcastReceiver {

            }
        }.start();

        FileDescriptor tracefd = null;
        try {
            tracefd = Os.open(ERROR_REPORT_TRACE_PIPE, O_RDONLY, 0600);
        } catch (ErrnoException e) {
            Slog.wtf(TAG, "Could not open " + ERROR_REPORT_TRACE_PIPE, e);
            return;
        }

        /*
         * Event listener to watch for memory tool error reports.
         * We read from /sys/kernel/tracing/instances/bootreceiver/trace_pipe (set up by the
         * system), which will print an ftrace event when a memory corruption is detected in the
         * kernel.
         * When an error is detected, we run the dmesg shell command and process its output.
         */
        OnFileDescriptorEventListener traceCallback = new OnFileDescriptorEventListener() {
            final int mBufferSize = 1024;
            byte[] mTraceBuffer = new byte[mBufferSize];
            @Override
            public int onFileDescriptorEvents(FileDescriptor fd, int events) {
                /*
                 * Read from the tracing pipe set up to monitor the error_report_end events.
                 * When a tracing event occurs, the kernel writes a short (~100 bytes) line to the
                 * pipe, e.g.:
                 *   ...-11210  [004] d..1   285.322307: error_report_end: [kfence] ffffff8938a05000
                 * The buffer size we use for reading should be enough to read the whole
                 * line, but to be on the safe side we keep reading until the buffer
                 * contains a '\n' character. In the unlikely case of a very buggy kernel
                 * the buffer may contain multiple tracing events that cannot be attributed
                 * to particular error reports. In that case the latest error report
                 * residing in dmesg is picked.
                 */
                try {
                    int nbytes = Os.read(fd, mTraceBuffer, 0, mBufferSize);
                    if (nbytes > 0) {
                        String readStr = new String(mTraceBuffer);
                        if (readStr.indexOf("\n") == -1) {
                            return OnFileDescriptorEventListener.EVENT_INPUT;
                        }
                        processDmesg(context);
                    }
                } catch (Exception e) {
                    Slog.wtf(TAG, "Error processing dmesg output", e);
                    return 0;  // Unregister the handler.
                }
                return OnFileDescriptorEventListener.EVENT_INPUT;
            }
        };

        IoThread.get().getLooper().getQueue().addOnFileDescriptorEventListener(
                tracefd, OnFileDescriptorEventListener.EVENT_INPUT, traceCallback);

    }

    /**
     * Check whether it is safe to collect this dmesg line or not.
     *
     * We only consider lines belonging to KASAN or KFENCE reports, but those may still contain
     * user information, namely the process name:
     *
     *   [   69.547684] [ T6006]c7   6006  CPU: 7 PID: 6006 Comm: sh Tainted: G S       C O      ...
     *
     * hardware information:
     *
     *   [   69.558923] [ T6006]c7   6006  Hardware name: <REDACTED>
     *
     * or register dump (in KASAN reports only):
     *
     *   ... RIP: 0033:0x7f96443109da
     *   ... RSP: 002b:00007ffcf0b51b08 EFLAGS: 00000202 ORIG_RAX: 00000000000000af
     *   ... RAX: ffffffffffffffda RBX: 000055dc3ee521a0 RCX: 00007f96443109da
     *
     * (on x86_64)
     *
     *   ... pc : lpm_cpuidle_enter+0x258/0x384
     *   ... lr : lpm_cpuidle_enter+0x1d4/0x384
     *   ... sp : ffffff800820bea0
     *   ... x29: ffffff800820bea0 x28: ffffffc2305f3ce0
     *   ... ...
     *   ... x9 : 0000000000000001 x8 : 0000000000000000
     * (on ARM64)
     *
     * We therefore omit the lines that contain "Comm:", "Hardware name:", or match the general
     * purpose register regexp.
     *
     * @param  line single line of `dmesg` output.
     * @return      updated line with sensitive data removed, or null if the line must be skipped.
     */
    public static String stripSensitiveData(String line) {
        /*
         * General purpose register names begin with "R" on x86_64 and "x" on ARM64. The letter is
         * followed by two symbols (numbers, letters or spaces) and a colon, which is followed by a
         * 16-digit hex number. The optional "_" prefix accounts for ORIG_RAX on x86.
         */
        final String registerRegex = "[ _][Rx]..: [0-9a-f]{16}";
        final Pattern registerPattern = Pattern.compile(registerRegex);

        final String corruptionRegex = "Detected corrupted memory at 0x[0-9a-f]+";
        final Pattern corruptionPattern = Pattern.compile(corruptionRegex);

        if (line.contains("Comm: ") || line.contains("Hardware name: ")) return null;
        if (registerPattern.matcher(line).find()) return null;

        Matcher cm = corruptionPattern.matcher(line);
        if (cm.find()) return cm.group(0);
        return line;
    }

    /*
     * Search dmesg output for the last error report from KFENCE or KASAN and copy it to Dropbox.
     *
     * Example report printed by the kernel (redacted to fit into 100 column limit):
     *   [   69.236673] [ T6006]c7   6006  =========================================================
     *   [   69.245688] [ T6006]c7   6006  BUG: KFENCE: out-of-bounds in kfence_handle_page_fault
     *   [   69.245688] [ T6006]c7   6006
     *   [   69.257816] [ T6006]c7   6006  Out-of-bounds access at 0xffffffca75c45000 (...)
     *   [   69.267102] [ T6006]c7   6006   kfence_handle_page_fault+0x1bc/0x208
     *   [   69.273536] [ T6006]c7   6006   __do_kernel_fault+0xa8/0x11c
     *   ...
     *   [   69.355427] [ T6006]c7   6006  kfence-#2 [0xffffffca75c46f30-0xffffffca75c46fff, ...
     *   [   69.366938] [ T6006]c7   6006   __d_alloc+0x3c/0x1b4
     *   [   69.371946] [ T6006]c7   6006   d_alloc_parallel+0x48/0x538
     *   [   69.377578] [ T6006]c7   6006   __lookup_slow+0x60/0x15c
     *   ...
     *   [   69.547684] [ T6006]c7   6006  CPU: 7 PID: 6006 Comm: sh Tainted: G S       C O      ...
     *   [   69.558923] [ T6006]c7   6006  Hardware name: <REDACTED>
     *   [   69.567059] [ T6006]c7   6006  =========================================================
     *
     *   We rely on the kernel printing task/CPU ID for every log line (CONFIG_PRINTK_CALLER=y).
     *   E.g. for the above report the task ID is T6006. Report lines may interleave with lines
     *   printed by other kernel tasks, which will have different task IDs, so in order to collect
     *   the report we:
     *    - find the next occurrence of the "BUG: " line in the kernel log, parse it to obtain the
     *      task ID and the tool name;
     *    - scan the rest of dmesg output and pick every line that has the same task ID, until we
     *      encounter a horizontal ruler, i.e.:
     *      [   69.567059] [ T6006]c7   6006  ======================================================
     *    - add that line to the error report, unless it contains sensitive information (see
     *      logLinePotentiallySensitive())
     *    - repeat the above steps till the last report is found.
     */
    private void processDmesg(Context ctx) throws IOException {

        /*
         * Only SYSTEM_KASAN_ERROR_REPORT and SYSTEM_KFENCE_ERROR_REPORT are supported at the
         * moment.
         */
        final String[] bugTypes = new String[] { "KASAN", "KFENCE" };
        final String tsRegex = "^\\[[^]]+\\] ";
        final String bugRegex =
                tsRegex + "\\[([^]]+)\\].*BUG: (" + String.join("|", bugTypes) + "):";
        final Pattern bugPattern = Pattern.compile(bugRegex);

        Process p = new ProcessBuilder("/system/bin/timeout", "-k", "90s", "60s",
                                       "dmesg").redirectErrorStream(true).start();
        BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()));
        String line = null;
        String task = null;
        String tool = null;
        String bugTitle = null;
        Pattern reportPattern = null;
        ArrayList<String> currentReport = null;
        String lastReport = null;

        while ((line = reader.readLine()) != null) {
            if (currentReport == null) {
                Matcher bm = bugPattern.matcher(line);
                if (!bm.find()) continue;
                task = bm.group(1);
                tool = bm.group(2);
                bugTitle = line;
                currentReport = new ArrayList<String>();
                currentReport.add(line);
                String reportRegex = tsRegex + "\\[" + task + "\\].*";
                reportPattern = Pattern.compile(reportRegex);
                continue;
            }
            Matcher rm = reportPattern.matcher(line);
            if (!rm.matches()) continue;
            if ((line = stripSensitiveData(line)) == null) continue;
            if (line.contains("================================")) {
                lastReport = String.join("\n", currentReport);
                currentReport = null;
                continue;
            }
            currentReport.add(line);
        }
        if (lastReport == null) {
            Slog.w(TAG, "Could not find report in dmesg.");
            return;
        }

        // Avoid sending the same bug report twice.
        if (bugTitle == sLastReportedBug) return;

        final String reportTag = "SYSTEM_" + tool + "_ERROR_REPORT";
        final DropBoxManager db = ctx.getSystemService(DropBoxManager.class);
        final String headers = getCurrentBootHeaders();
        final String reportText = headers + lastReport;

        addTextToDropBox(db, reportTag, reportText, "/dev/kmsg", LOG_SIZE);
        sLastReportedBug = bugTitle;
    }

    private void removeOldUpdatePackages(Context context) {
+72 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2021 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.server;

import android.test.AndroidTestCase;

/**
 * Tests for {@link com.android.server.BootReceiver}
 */
public class BootReceiverTest extends AndroidTestCase {
    public void testLogLinePotentiallySensitive() throws Exception {
        /*
         * Strings to be dropped from the log as potentially sensitive: register dumps, process
         * names, hardware info.
         */
        final String[] becomeNull = {
            "CPU: 4 PID: 120 Comm: kunit_try_catch Tainted: G        W         5.8.0-rc6+ #7",
            "Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014",
            "[    0.083207] RSP: 0000:ffffffff8fe07ca8 EFLAGS: 00010046 ORIG_RAX: 0000000000000000",
            "[    0.084709] RAX: 0000000000000000 RBX: ffffffffff240000 RCX: ffffffff815fcf01",
            "[    0.086109] RDX: dffffc0000000000 RSI: 0000000000000001 RDI: ffffffffff240004",
            "[    0.087509] RBP: ffffffff8fe07d60 R08: fffffbfff1fc0f21 R09: fffffbfff1fc0f21",
            "[    0.088911] R10: ffffffff8fe07907 R11: fffffbfff1fc0f20 R12: ffffffff8fe07d38",
            "R13: 0000000000000001 R14: 0000000000000001 R15: ffffffff8fe07e80",
            "x29: ffff00003ce07150 x28: ffff80001aa29cc0",
            "x1 : 0000000000000000 x0 : ffff00000f628000",
        };

        /* Strings to be left unchanged, including non-sensitive registers and parts of reports. */
        final String[] leftAsIs = {
            "FS:  0000000000000000(0000) GS:ffffffff92409000(0000) knlGS:0000000000000000",
            "[ 69.2366] [ T6006]c7   6006  =======================================================",
            "[ 69.245688] [ T6006] BUG: KFENCE: out-of-bounds in kfence_handle_page_fault",
            "[ 69.257816] [ T6006]c7   6006  Out-of-bounds access at 0xffffffca75c45000 ",
            "[ 69.273536] [ T6006]c7   6006   __do_kernel_fault+0xa8/0x11c",
            "pc : __mutex_lock+0x428/0x99c ",
            "sp : ffff00003ce07150",
            "Call trace:",
            "",
        };

        final String[][] stripped = {
            { "Detected corrupted memory at 0xffffffffb6797ff9 [ 0xac . . . . . . ]:",
              "Detected corrupted memory at 0xffffffffb6797ff9" },
        };
        for (int i = 0; i < becomeNull.length; i++) {
            assertEquals(BootReceiver.stripSensitiveData(becomeNull[i]), null);
        }

        for (int i = 0; i < leftAsIs.length; i++) {
            assertEquals(BootReceiver.stripSensitiveData(leftAsIs[i]), leftAsIs[i]);
        }

        for (int i = 0; i < stripped.length; i++) {
            assertEquals(BootReceiver.stripSensitiveData(stripped[i][0]), stripped[i][1]);
        }
    }
}