Merge "RPC Binder: socket ENOMEM handling" into main (e4546838) · Commits · e / os / android_frameworks_native

libs/binder/RpcState.cpp

+9 −1

Original line number	Diff line number	Diff line
		@@ -45,7 +45,7 @@ using android::binder::borrowed_fd;
		using android::binder::unique_fd;

		#if RPC_FLAKE_PRONE
		void rpcMaybeWaitToFlake() {
		static unsigned rpcFlakeUnsigned() {
		[[clang::no_destroy]] static std::random_device r;
		[[clang::no_destroy]] static RpcMutex m;
		unsigned num;
		@@ -53,6 +53,14 @@ void rpcMaybeWaitToFlake() {
		RpcMutexLockGuard lock(m);
		num = r();
		}
		return num;
		}
		bool rpcMaybeFlake() {
		return rpcFlakeUnsigned() % 10 == 0; // flake 10%
		// return rpcFlakeUnsigned() % 4 != 0; // flake 75%
		}
		void rpcMaybeWaitToFlake() {
		unsigned num = rpcFlakeUnsigned();
		if (num % 10 == 0) usleep(num % 1000);
		}
		#endif

libs/binder/RpcState.h

+8 −1

Original line number	Diff line number	Diff line
		@@ -38,7 +38,9 @@ struct RpcWireHeader;
		* a specific subset of logs to debug, this could be broken up like
		* IPCThreadState's.
		*/
		// DO NOT ENABLE IN PRODUCTION
		#define SHOULD_LOG_RPC_DETAIL false
		// DO NOT ENABLE IN PRODUCTION

		#if SHOULD_LOG_RPC_DETAIL
		#define LOG_RPC_DETAIL(...) ALOGI(__VA_ARGS__)
		@@ -46,12 +48,17 @@ struct RpcWireHeader;
		#define LOG_RPC_DETAIL(...) ALOGV(__VA_ARGS__) // for type checking
		#endif

		// DO NOT ENABLE IN PRODUCTION
		#define RPC_FLAKE_PRONE false
		// DO NOT ENABLE IN PRODUCTION

		#if RPC_FLAKE_PRONE
		void rpcMaybeWaitToFlake();
		LIBBINDER_INTERNAL_EXPORTED bool rpcMaybeFlake();
		LIBBINDER_INTERNAL_EXPORTED void rpcMaybeWaitToFlake();
		#define MAYBE_TRUE_IN_FLAKE_MODE rpcMaybeFlake()
		#define MAYBE_WAIT_IN_FLAKE_MODE rpcMaybeWaitToFlake()
		#else
		#define MAYBE_TRUE_IN_FLAKE_MODE false
		#define MAYBE_WAIT_IN_FLAKE_MODE do {} while (false)
		#endif

libs/binder/RpcTransportUtils.h

+92 −24

Original line number	Diff line number	Diff line
		@@ -21,6 +21,10 @@
		#include "FdTrigger.h"
		#include "RpcState.h"

		// For just this file:
		// #undef LOG_RPC_DETAIL
		// #define LOG_RPC_DETAIL ALOGE

		namespace android {

		template <typename SendOrReceive>
		@@ -56,10 +60,24 @@ status_t interruptableReadOrWrite(
		return OK;
		}

		// size to break up message
		constexpr size_t kChunkMax = 65536;
		const size_t kChunkMin = getpagesize(); // typical allocated granularity for sockets
		size_t chunkSize = kChunkMax;

		// b/419364025 - vhost-vsock, and perhaps other socket implementations may return
		// ENOMEM from blocking sockets, non-blocking
		// how long we are waiting on repeated enomems for memory to be available
		constexpr size_t kEnomemWaitStartUs = 10'000;
		constexpr size_t kEnomemWaitMaxUs = 1'000'000; // don't risk ANR
		constexpr size_t kEnomemWaitTotalMaxUs = 30'000'000; // ANR at 30s anyway, so avoid hang
		size_t enomemWaitUs = 0;
		size_t enomemTotalUs = 0;

		bool havePolled = false;
		while (true) {
		ssize_t processSize = -1;
		bool canSendFullTransaction = false;
		bool skipPollingAndContinue = false; // set when we should retry immediately

		// This block dynamically adjusts packet sizes down to work around a
		// limitation in the vsock driver where large packets are sometimes
		@@ -67,15 +85,15 @@ status_t interruptableReadOrWrite(
		// TODO: only apply this workaround on vsock ???
		// TODO: fix vsock
		{
		size_t limit = 65536;
		size_t chunkRemaining = chunkSize;
		int i = 0;
		for (i = 0; i < niovs; i++) {
		if (iovs[i].iov_len >= limit) {
		if (iovs[i].iov_len >= chunkRemaining) {
		break;
		}
		limit -= iovs[i].iov_len;
		chunkRemaining -= iovs[i].iov_len;
		}
		canSendFullTransaction = i == niovs;
		bool canSendFullTransaction = i == niovs;

		int old_niovs = niovs;
		size_t old_len = 0xDEADBEEF;
		@@ -84,14 +102,20 @@ status_t interruptableReadOrWrite(
		// pretend like we have fewer iovecs
		niovs = i + 1; // to restore (A)
		old_len = iovs[i].iov_len;
		// only send up to remaining limit from this iovec
		iovs[i].iov_len = limit; // to restore (B)
		LOG_ALWAYS_FATAL_IF(limit == 0,
		"limit should not be zero - see EMPTY IOVEC ISSUE above");
		// only send up to remaining chunkRemaining from this iovec
		iovs[i].iov_len = chunkRemaining; // to restore (B)
		LOG_ALWAYS_FATAL_IF(chunkRemaining == 0,
		"chunkRemaining never zero - see EMPTY IOVEC ISSUE above");
		}

		// MAIN ACTION
		if (MAYBE_TRUE_IN_FLAKE_MODE) {
		LOG_RPC_DETAIL("Injecting ENOMEM.");
		processSize = -1;
		errno = ENOMEM;
		} else {
		processSize = sendOrReceiveFun(iovs, niovs);
		}
		// MAIN ACTION

		if (!canSendFullTransaction) {
		@@ -101,20 +125,67 @@ status_t interruptableReadOrWrite(
		niovs = old_niovs; // (A) - restored
		iovs[i].iov_len = old_len; // (B) - restored
		}

		// altPoll may introduce long waiting since it assumes if it cannot write
		// data, that it needs to wait to send more to give time for the producer
		// consumer problem to be solved - otherwise it will busy loop. However,
		// for this worakround, we are breaking up the transaction intentionally,
		// not because the transaction won't fit, but to avoid a bug in the kernel
		// for how it combines messages. So, when we artificially simulate a
		// limited send, don't poll and just keep on sending data.
		skipPollingAndContinue = !canSendFullTransaction;
		}

		// HANDLE RESULT OF SEND OR RECEIVE
		if (processSize < 0) {
		int savedErrno = errno;

		if (savedErrno == ENOMEM) {
		LOG_RPC_DETAIL("RpcTransport %s(): %s", funName, strerror(savedErrno));

		// Since this is the limit only for this call to send this packet
		// we don't ever restore this. Assume it will be hard to get more
		// memory if we're already having difficulty sending this out.
		chunkSize = std::max(chunkSize / 2, kChunkMin);
		LOG_RPC_DETAIL("Chunk size is now %zu due to ENOMEM.", chunkSize);

		// When we've gotten down to the minimum send size, add a timer
		// to give time for more memory to be freed up. This means even
		// a single page is not available, so we have to wait.
		if (chunkSize <= kChunkMin) {
		if (enomemWaitUs == 0) enomemWaitUs = kEnomemWaitStartUs;
		enomemWaitUs = std::min(enomemWaitUs * 2, kEnomemWaitMaxUs);
		enomemTotalUs += enomemWaitUs;

		if (enomemTotalUs > kEnomemWaitTotalMaxUs) {
		// by this time WatchDog should be kicking in
		return -ENOMEM;
		}

		LOG_RPC_DETAIL("Sleeping %zuus due to ENOMEM.", enomemWaitUs);
		usleep(enomemWaitUs);
		}

		// Need to survey socket code to see if polling in this situation is
		// guaranteed to be non-blocking.
		// NOTE: if the other side needs to deallocate memory, and that is the
		// only deallocatable memory in the entire system, but we need altPoll
		// to drain commands to unstick it so it can do that, then this could
		// cause a deadlock, but this is not realistic on Android.
		skipPollingAndContinue = true;
		} else if (havePolled \|\| (savedErrno != EAGAIN && savedErrno != EWOULDBLOCK)) {
		// Still return the error on later passes, since it would expose
		// a problem with polling
		if (havePolled \|\| (savedErrno != EAGAIN && savedErrno != EWOULDBLOCK)) {
		LOG_RPC_DETAIL("RpcTransport %s(): %s", funName, strerror(savedErrno));
		return -savedErrno;
		}
		} else if (processSize == 0) {
		return DEAD_OBJECT;
		} else {
		// success - reset error exponential backoffs
		enomemWaitUs = 0;
		enomemTotalUs = 0;

		while (processSize > 0 && niovs > 0) {
		auto& iov = iovs[0];
		if (static_cast<size_t>(processSize) < iov.iov_len) {
		@@ -138,20 +209,17 @@ status_t interruptableReadOrWrite(
		}
		}

		// altPoll may introduce long waiting since it assumes if it cannot write
		// data, that it needs to wait to send more to give time for the producer
		// consumer problem to be solved - otherwise it will busy loop. However,
		// for this worakround, we are breaking up the transaction intentionally,
		// not because the transaction won't fit, but to avoid a bug in the kernel
		// for how it combines messages. So, when we artificially simulate a
		// limited send, don't poll and just keep on sending data.
		if (!canSendFullTransaction) continue;

		if (altPoll) {
		if (status_t status = (*altPoll)(); status != OK) return status;
		// METHOD OF POLLING
		if (skipPollingAndContinue) {
		if (fdTrigger->isTriggered()) {
		return DEAD_OBJECT;
		}
		// continue;
		} else if (altPoll) {
		if (status_t status = (*altPoll)(); status != OK) return status;
		if (fdTrigger->isTriggered()) { // altPoll may not check this
		return DEAD_OBJECT;
		}
		} else {
		if (status_t status = fdTrigger->triggerablePoll(socket, event); status != OK)
		return status;