Merge "Add recoverable GWP-ASan." (6e0eb996) · Commits · e / os / android_system_core

debuggerd/Android.bp

+1 −0

Original line number	Diff line number	Diff line
		@@ -98,6 +98,7 @@ cc_library_static {
		"libbase_headers",
		"libdebuggerd_common_headers",
		"bionic_libc_platform_headers",
		"gwp_asan_headers",
		],

		whole_static_libs: [

debuggerd/debuggerd_test.cpp

+146 −92

Original line number	Diff line number	Diff line
		@@ -65,6 +65,7 @@

		#include "crash_test.h"
		#include "debuggerd/handler.h"
		#include "gtest/gtest.h"
		#include "libdebuggerd/utility.h"
		#include "protocol.h"
		#include "tombstoned/tombstoned.h"
		@@ -111,19 +112,6 @@ constexpr char kWaitForDebuggerKey[] = "debug.debuggerd.wait_for_debugger";
		ASSERT_MATCH(result, \
		R"(#\d\d pc [0-9a-f]+\s+ \S+ (\(offset 0x[0-9a-f]+\) )?\()" frame_name R"(\+)");

		// Enable GWP-ASan at the start of this process. GWP-ASan is enabled using
		// process sampling, so we need to ensure we force GWP-ASan on.
		__attribute__((constructor)) static void enable_gwp_asan() {
		android_mallopt_gwp_asan_options_t opts;
		// No, we're not an app, but let's turn ourselves on without sampling.
		// Technically, if someone's using the *.default_app sysprops, they'll adjust
		// our settings, but I don't think this will be common on a device that's
		// running debuggerd_tests.
		opts.desire = android_mallopt_gwp_asan_options_t::Action::TURN_ON_FOR_APP;
		opts.program_name = "";
		android_mallopt(M_INITIALIZE_GWP_ASAN, &opts, sizeof(android_mallopt_gwp_asan_options_t));
		}

		static void tombstoned_intercept(pid_t target_pid, unique_fd* intercept_fd, unique_fd* output_fd,
		InterceptStatus* status, DebuggerdDumpType intercept_type) {
		intercept_fd->reset(socket_local_client(kTombstonedInterceptSocketName,
		@@ -469,76 +457,6 @@ static void SetTagCheckingLevelAsync() {
		}
		#endif

		// Number of iterations required to reliably guarantee a GWP-ASan crash.
		// GWP-ASan's sample rate is not truly nondeterministic, it initialises a
		// thread-local counter at 2*SampleRate, and decrements on each malloc(). Once
		// the counter reaches zero, we provide a sampled allocation. Then, double that
		// figure to allow for left/right allocation alignment, as this is done randomly
		// without bias.
		#define GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH (0x20000)

		struct GwpAsanTestParameters {
		size_t alloc_size;
		bool free_before_access;
		int access_offset;
		std::string cause_needle; // Needle to be found in the "Cause: [GWP-ASan]" line.
		};

		struct GwpAsanCrasherTest : CrasherTest, testing::WithParamInterface<GwpAsanTestParameters> {};

		GwpAsanTestParameters gwp_asan_tests[] = {
		{/* alloc_size / 7, / free_before_access / true, / access_offset */ 0, "Use After Free, 0 bytes into a 7-byte allocation"},
		{/* alloc_size / 7, / free_before_access / true, / access_offset */ 1, "Use After Free, 1 byte into a 7-byte allocation"},
		{/* alloc_size / 7, / free_before_access / false, / access_offset */ 16, "Buffer Overflow, 9 bytes right of a 7-byte allocation"},
		{/* alloc_size / 16, / free_before_access / false, / access_offset */ -1, "Buffer Underflow, 1 byte left of a 16-byte allocation"},
		};

		INSTANTIATE_TEST_SUITE_P(GwpAsanTests, GwpAsanCrasherTest, testing::ValuesIn(gwp_asan_tests));

		TEST_P(GwpAsanCrasherTest, gwp_asan_uaf) {
		if (mte_supported()) {
		// Skip this test on MTE hardware, as MTE will reliably catch these errors
		// instead of GWP-ASan.
		GTEST_SKIP() << "Skipped on MTE.";
		}
		// Skip this test on HWASan, which will reliably catch test errors as well.
		SKIP_WITH_HWASAN;

		GwpAsanTestParameters params = GetParam();
		LogcatCollector logcat_collector;

		int intercept_result;
		unique_fd output_fd;
		StartProcess([&params]() {
		for (unsigned i = 0; i < GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH; ++i) {
		volatile char* p = reinterpret_cast<volatile char*>(malloc(params.alloc_size));
		if (params.free_before_access) free(static_cast<void>(const_cast<char>(p)));
		p[params.access_offset] = 42;
		if (!params.free_before_access) free(static_cast<void>(const_cast<char>(p)));
		}
		});

		StartIntercept(&output_fd);
		FinishCrasher();
		AssertDeath(SIGSEGV);
		FinishIntercept(&intercept_result);

		ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";

		std::vector<std::string> log_sources(2);
		ConsumeFd(std::move(output_fd), &log_sources[0]);
		logcat_collector.Collect(&log_sources[1]);

		for (const auto& result : log_sources) {
		ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))");
		ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle);
		if (params.free_before_access) {
		ASSERT_MATCH(result, R"(deallocated by thread .\n.#00 pc)");
		}
		ASSERT_MATCH(result, R"((^\|\s)allocated by thread .\n.#00 pc)");
		}
		}

		struct SizeParamCrasherTest : CrasherTest, testing::WithParamInterface<size_t> {};

		INSTANTIATE_TEST_SUITE_P(Sizes, SizeParamCrasherTest, testing::Values(0, 16, 131072));
		@@ -1279,7 +1197,11 @@ TEST_F(CrasherTest, fake_pid) {
		static const char* const kDebuggerdSeccompPolicy =
		"/system/etc/seccomp_policy/crash_dump." ABI_STRING ".policy";

		static pid_t seccomp_fork_impl(void (*prejail)()) {
		static void setup_jail(minijail* jail) {
		if (!jail) {
		LOG(FATAL) << "failed to create minijail";
		}

		std::string policy;
		if (!android::base::ReadFileToString(kDebuggerdSeccompPolicy, &policy)) {
		PLOG(FATAL) << "failed to read policy file";
		@@ -1306,15 +1228,15 @@ static pid_t seccomp_fork_impl(void (*prejail)()) {
		PLOG(FATAL) << "failed to seek tmp_fd";
		}

		ScopedMinijail jail{minijail_new()};
		if (!jail) {
		LOG(FATAL) << "failed to create minijail";
		minijail_no_new_privs(jail);
		minijail_log_seccomp_filter_failures(jail);
		minijail_use_seccomp_filter(jail);
		minijail_parse_seccomp_filters_from_fd(jail, tmp_fd.release());
		}

		minijail_no_new_privs(jail.get());
		minijail_log_seccomp_filter_failures(jail.get());
		minijail_use_seccomp_filter(jail.get());
		minijail_parse_seccomp_filters_from_fd(jail.get(), tmp_fd.release());
		static pid_t seccomp_fork_impl(void (*prejail)()) {
		ScopedMinijail jail{minijail_new()};
		setup_jail(jail.get());

		pid_t result = fork();
		if (result == -1) {
		@@ -1628,6 +1550,138 @@ TEST_F(CrasherTest, competing_tracer) {
		AssertDeath(SIGABRT);
		}

		struct GwpAsanTestParameters {
		size_t alloc_size;
		bool free_before_access;
		int access_offset;
		std::string cause_needle; // Needle to be found in the "Cause: [GWP-ASan]" line.
		};

		struct GwpAsanCrasherTest
		: CrasherTest,
		testing::WithParamInterface<
		std::tuple<GwpAsanTestParameters, /* recoverable / bool, / seccomp */ bool>> {};

		GwpAsanTestParameters gwp_asan_tests[] = {
		{/* alloc_size / 7, / free_before_access / true, / access_offset */ 0,
		"Use After Free, 0 bytes into a 7-byte allocation"},
		{/* alloc_size / 15, / free_before_access / true, / access_offset */ 1,
		"Use After Free, 1 byte into a 15-byte allocation"},
		{/* alloc_size / 4096, / free_before_access / false, / access_offset */ 4098,
		"Buffer Overflow, 2 bytes right of a 4096-byte allocation"},
		{/* alloc_size / 4096, / free_before_access / false, / access_offset */ -1,
		"Buffer Underflow, 1 byte left of a 4096-byte allocation"},
		};

		INSTANTIATE_TEST_SUITE_P(
		GwpAsanTests, GwpAsanCrasherTest,
		testing::Combine(testing::ValuesIn(gwp_asan_tests),
		/* recoverable */ testing::Bool(),
		/* seccomp */ testing::Bool()),
		[](const testing::TestParamInfo<
		std::tuple<GwpAsanTestParameters, /* recoverable / bool, / seccomp */ bool>>& info) {
		const GwpAsanTestParameters& params = std::get<0>(info.param);
		std::string name = params.free_before_access ? "UseAfterFree" : "Overflow";
		name += testing::PrintToString(params.alloc_size);
		name += "Alloc";
		if (params.access_offset < 0) {
		name += "Left";
		name += testing::PrintToString(params.access_offset * -1);
		} else {
		name += "Right";
		name += testing::PrintToString(params.access_offset);
		}
		name += "Bytes";
		if (std::get<1>(info.param)) name += "Recoverable";
		if (std::get<2>(info.param)) name += "Seccomp";
		return name;
		});

		TEST_P(GwpAsanCrasherTest, run_gwp_asan_test) {
		if (mte_supported()) {
		// Skip this test on MTE hardware, as MTE will reliably catch these errors
		// instead of GWP-ASan.
		GTEST_SKIP() << "Skipped on MTE.";
		}
		// Skip this test on HWASan, which will reliably catch test errors as well.
		SKIP_WITH_HWASAN;

		GwpAsanTestParameters params = std::get<0>(GetParam());
		bool recoverable = std::get<1>(GetParam());
		LogcatCollector logcat_collector;

		int intercept_result;
		unique_fd output_fd;
		StartProcess([&recoverable]() {
		const char* env[] = {"GWP_ASAN_SAMPLE_RATE=1", "GWP_ASAN_PROCESS_SAMPLING=1",
		"GWP_ASAN_MAX_ALLOCS=40000", nullptr, nullptr};
		if (recoverable) {
		env[3] = "GWP_ASAN_RECOVERABLE=true";
		}
		std::string test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
		test_name = std::regex_replace(test_name, std::regex("run_gwp_asan_test"),
		"DISABLED_run_gwp_asan_test");
		std::string test_filter = "--gtest_filter=*";
		test_filter += test_name;
		std::string this_binary = android::base::GetExecutablePath();
		const char* args[] = {this_binary.c_str(), "--gtest_also_run_disabled_tests",
		test_filter.c_str(), nullptr};
		// We check the crash report from a debuggerd handler and from logcat. The
		// echo from stdout/stderr of the subprocess trips up atest, because it
		// doesn't like that two tests started in a row without the first one
		// finishing (even though the second one is in a subprocess).
		close(STDOUT_FILENO);
		close(STDERR_FILENO);
		execve(this_binary.c_str(), const_cast<char>(args), const_cast<char>(env));
		});

		StartIntercept(&output_fd);
		FinishCrasher();
		if (recoverable) {
		AssertDeath(0);
		} else {
		AssertDeath(SIGSEGV);
		}
		FinishIntercept(&intercept_result);

		ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";

		std::vector<std::string> log_sources(2);
		ConsumeFd(std::move(output_fd), &log_sources[0]);
		logcat_collector.Collect(&log_sources[1]);

		// seccomp forces the fallback handler, which doesn't print GWP-ASan debugging
		// information. Make sure the recovery still works, but the report won't be
		// hugely useful, it looks like a regular SEGV.
		bool seccomp = std::get<2>(GetParam());
		if (!seccomp) {
		for (const auto& result : log_sources) {
		ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))");
		ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle);
		if (params.free_before_access) {
		ASSERT_MATCH(result, R"(deallocated by thread .\n.#00 pc)");
		}
		ASSERT_MATCH(result, R"((^\|\s)allocated by thread .\n.#00 pc)");
		}
		}
		}

		TEST_P(GwpAsanCrasherTest, DISABLED_run_gwp_asan_test) {
		GwpAsanTestParameters params = std::get<0>(GetParam());
		bool seccomp = std::get<2>(GetParam());
		if (seccomp) {
		ScopedMinijail jail{minijail_new()};
		setup_jail(jail.get());
		minijail_enter(jail.get());
		}

		// Use 'volatile' to prevent a very clever compiler eliminating the store.
		char* volatile p = reinterpret_cast<char* volatile>(malloc(params.alloc_size));
		if (params.free_before_access) free(static_cast<void>(const_cast<char>(p)));
		p[params.access_offset] = 42;
		if (!params.free_before_access) free(static_cast<void>(const_cast<char>(p)));
		}

		TEST_F(CrasherTest, fdsan_warning_abort_message) {
		int intercept_result;
		unique_fd output_fd;

debuggerd/handler/debuggerd_handler.cpp

+75 −2

Original line number	Diff line number	Diff line
		@@ -565,17 +565,38 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
		process_info = g_callbacks.get_process_info();
		}

		// GWP-ASan catches use-after-free and heap-buffer-overflow by using PROT_NONE
		// guard pages, which lead to SEGV. Normally, debuggerd prints a bug report
		// and the process terminates, but in some cases, we actually want to print
		// the bug report and let the signal handler return, and restart the process.
		// In order to do that, we need to disable GWP-ASan's guard pages. The
		// following callbacks handle this case.
		gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
		bool gwp_asan_recoverable = false;
		if (signal_number == SIGSEGV && signal_has_si_addr(info) &&
		gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery &&
		gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report &&
		gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report &&
		gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
		gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
		gwp_asan_recoverable = true;
		}

		// If sival_int is ~0, it means that the fallback handler has been called
		// once before and this function is being called again to dump the stack
		// of a specific thread. It is possible that the prctl call might return 1,
		// then return 0 in subsequent calls, so check the sival_int to determine if
		// the fallback handler should be called first.
		if (si_val == kDebuggerdFallbackSivalUintptrRequestDump \|\|
		prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1) {
		bool no_new_privs = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1;
		if (si_val == kDebuggerdFallbackSivalUintptrRequestDump \|\| no_new_privs) {
		// This check might be racy if another thread sets NO_NEW_PRIVS, but this should be unlikely,
		// you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing
		// ANR trace.
		debuggerd_fallback_handler(info, ucontext, process_info.abort_msg);
		if (no_new_privs && gwp_asan_recoverable) {
		gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
		return;
		}
		resend_signal(info);
		return;
		}
		@@ -649,6 +670,9 @@ static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* c
		// If the signal is fatal, don't unlock the mutex to prevent other crashing threads from
		// starting to dump right before our death.
		pthread_mutex_unlock(&crash_mutex);
		} else if (gwp_asan_recoverable) {
		gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
		pthread_mutex_unlock(&crash_mutex);
		}
		#ifdef __aarch64__
		else if (info->si_signo == SIGSEGV &&
		@@ -727,3 +751,52 @@ void debuggerd_init(debuggerd_callbacks_t* callbacks) {

		debuggerd_register_handlers(&action);
		}

		// When debuggerd's signal handler is the first handler called, it's great at
		// handling the recoverable GWP-ASan mode. For apps, sigchain (from libart) is
		// always the first signal handler, and so the following function is what
		// sigchain must call before processing the signal. This allows for processing
		// of a potentially recoverable GWP-ASan crash. If the signal requires GWP-ASan
		// recovery, then dump a report (via the regular debuggerd hanndler), and patch
		// up the allocator, and allow the process to continue (indicated by returning
		// 'true'). If the crash has nothing to do with GWP-ASan, or recovery isn't
		// possible, return 'false'.
		bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context) {
		if (signal_number != SIGSEGV \|\| !signal_has_si_addr(info)) return false;

		gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
		if (gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery == nullptr \|\|
		gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report == nullptr \|\|
		gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report == nullptr \|\|
		!gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
		return false;
		}

		// Only dump a crash report for the first GWP-ASan crash. ActivityManager
		// doesn't like it when an app crashes multiple times, and is even more strict
		// about an app crashing multiple times in a short time period. While the app
		// won't crash fully when we do GWP-ASan recovery, ActivityManager still gets
		// the information about the crash through the DropBoxManager service. If an
		// app has multiple back-to-back GWP-ASan crashes, this would lead to the app
		// being killed, which defeats the purpose of having the recoverable mode. To
		// mitigate against this, only generate a debuggerd crash report for the first
		// GWP-ASan crash encountered. We still need to do the patching up of the
		// allocator though, so do that.
		static pthread_mutex_t first_crash_mutex = PTHREAD_MUTEX_INITIALIZER;
		pthread_mutex_lock(&first_crash_mutex);
		static bool first_crash = true;

		if (first_crash) {
		// `debuggerd_signal_handler` will call
		// `debuggerd_gwp_asan_(pre\|post)_crash_report`, so no need to manually call
		// them here.
		debuggerd_signal_handler(signal_number, info, context);
		first_crash = false;
		} else {
		gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
		gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
		}

		pthread_mutex_unlock(&first_crash_mutex);
		return true;
		}

debuggerd/include/debuggerd/handler.h

+11 −0

Original line number	Diff line number	Diff line
		@@ -46,14 +46,25 @@ struct debugger_process_info {
		size_t scudo_ring_buffer_size;
		};

		// GWP-ASan calbacks to support the recoverable mode. Separate from the
		// debuggerd_callbacks_t because these values aren't available at debuggerd_init
		// time, and have to be synthesized on request.
		typedef struct {
		bool (debuggerd_needs_gwp_asan_recovery)(void fault_addr);
		void (debuggerd_gwp_asan_pre_crash_report)(void fault_addr);
		void (debuggerd_gwp_asan_post_crash_report)(void fault_addr);
		} gwp_asan_callbacks_t;

		// These callbacks are called in a signal handler, and thus must be async signal safe.
		// If null, the callbacks will not be called.
		typedef struct {
		debugger_process_info (*get_process_info)();
		gwp_asan_callbacks_t (*get_gwp_asan_callbacks)();
		void (*post_dump)();
		} debuggerd_callbacks_t;

		void debuggerd_init(debuggerd_callbacks_t* callbacks);
		bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context);

		// DEBUGGER_ACTION_DUMP_TOMBSTONE and DEBUGGER_ACTION_DUMP_BACKTRACE are both
		// triggered via BIONIC_SIGNAL_DEBUGGER. The debugger_action_t is sent via si_value