Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5d48ef3e authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp_mem_pressure'



Eric Dumazet says:

====================
tcp: better handling of memory pressure

When testing commit 790ba456 ("tcp: set SOCK_NOSPACE under memory
pressure") using edge triggered epoll applications, I found various
issues under memory pressure and thousands of active sockets.

This patch series is a first round to solve these issues, in send
and receive paths. There are probably other fixes needed, but
with this series, my tests now all succeed.

v2: fix typo in "allow one skb to be received per socket under memory pressure",
as spotted by Jason Baron.
====================

Acked-by: default avatarJason Baron <jbaron@akamai.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4633c9e0 b66e91cc
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -1368,7 +1368,7 @@ static inline struct inode *SOCK_INODE(struct socket *socket)
 * Functions for memory accounting
 */
int __sk_mem_schedule(struct sock *sk, int size, int kind);
void __sk_mem_reclaim(struct sock *sk);
void __sk_mem_reclaim(struct sock *sk, int amount);

#define SK_MEM_QUANTUM ((int)PAGE_SIZE)
#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
@@ -1409,7 +1409,7 @@ static inline void sk_mem_reclaim(struct sock *sk)
	if (!sk_has_account(sk))
		return;
	if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
		__sk_mem_reclaim(sk);
		__sk_mem_reclaim(sk, sk->sk_forward_alloc);
}

static inline void sk_mem_reclaim_partial(struct sock *sk)
@@ -1417,7 +1417,7 @@ static inline void sk_mem_reclaim_partial(struct sock *sk)
	if (!sk_has_account(sk))
		return;
	if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
		__sk_mem_reclaim(sk);
		__sk_mem_reclaim(sk, sk->sk_forward_alloc - 1);
}

static inline void sk_mem_charge(struct sock *sk, int size)
+10 −0
Original line number Diff line number Diff line
@@ -286,6 +286,14 @@ extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
extern int tcp_memory_pressure;

/* optimized version of sk_under_memory_pressure() for TCP sockets */
static inline bool tcp_under_memory_pressure(const struct sock *sk)
{
	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
		return !!sk->sk_cgrp->memory_pressure;

	return tcp_memory_pressure;
}
/*
 * The next routines deal with comparing 32 bit unsigned ints
 * and worry about wraparound (automatic with unsigned arithmetic).
@@ -311,6 +319,8 @@ static inline bool tcp_out_of_memory(struct sock *sk)
	return false;
}

void sk_forced_mem_schedule(struct sock *sk, int size);

static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
{
	struct percpu_counter *ocp = sk->sk_prot->orphan_count;
+5 −4
Original line number Diff line number Diff line
@@ -2069,12 +2069,13 @@ EXPORT_SYMBOL(__sk_mem_schedule);
/**
 *	__sk_reclaim - reclaim memory_allocated
 *	@sk: socket
 *	@amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
 */
void __sk_mem_reclaim(struct sock *sk)
void __sk_mem_reclaim(struct sock *sk, int amount)
{
	sk_memory_allocated_sub(sk,
				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
	amount >>= SK_MEM_QUANTUM_SHIFT;
	sk_memory_allocated_sub(sk, amount);
	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;

	if (sk_under_memory_pressure(sk) &&
	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
+18 −6
Original line number Diff line number Diff line
@@ -815,9 +815,20 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
	/* The TCP header must be at least 32-bit aligned.  */
	size = ALIGN(size, 4);

	if (unlikely(tcp_under_memory_pressure(sk)))
		sk_mem_reclaim_partial(sk);

	skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
	if (skb) {
		if (sk_wmem_schedule(sk, skb->truesize)) {
	if (likely(skb)) {
		bool mem_schedule;

		if (skb_queue_len(&sk->sk_write_queue) == 0) {
			mem_schedule = true;
			sk_forced_mem_schedule(sk, skb->truesize);
		} else {
			mem_schedule = sk_wmem_schedule(sk, skb->truesize);
		}
		if (likely(mem_schedule)) {
			skb_reserve(skb, sk->sk_prot->max_header);
			/*
			 * Make sure that we have exactly size bytes
@@ -3057,11 +3068,12 @@ __setup("thash_entries=", set_thash_entries);

static void __init tcp_init_mem(void)
{
	unsigned long limit = nr_free_buffer_pages() / 8;
	unsigned long limit = nr_free_buffer_pages() / 16;

	limit = max(limit, 128UL);
	sysctl_tcp_mem[0] = limit / 4 * 3;
	sysctl_tcp_mem[1] = limit;
	sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
	sysctl_tcp_mem[0] = limit / 4 * 3;		/* 4.68 % */
	sysctl_tcp_mem[1] = limit;			/* 6.25 % */
	sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;	/* 9.37 % */
}

void __init tcp_init(void)
+10 −8
Original line number Diff line number Diff line
@@ -359,7 +359,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
	/* Check #1 */
	if (tp->rcv_ssthresh < tp->window_clamp &&
	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
	    !sk_under_memory_pressure(sk)) {
	    !tcp_under_memory_pressure(sk)) {
		int incr;

		/* Check #2. Increase window, if skb with such overhead
@@ -446,7 +446,7 @@ static void tcp_clamp_window(struct sock *sk)

	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
	    !sk_under_memory_pressure(sk) &&
	    !tcp_under_memory_pressure(sk) &&
	    sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
				    sysctl_tcp_rmem[2]);
@@ -4507,10 +4507,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)

		if (eaten <= 0) {
queue_and_out:
			if (eaten < 0 &&
			    tcp_try_rmem_schedule(sk, skb, skb->truesize))
			if (eaten < 0) {
				if (skb_queue_len(&sk->sk_receive_queue) == 0)
					sk_forced_mem_schedule(sk, skb->truesize);
				else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
					goto drop;

			}
			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
		}
		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
@@ -4781,7 +4783,7 @@ static int tcp_prune_queue(struct sock *sk)

	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
		tcp_clamp_window(sk);
	else if (sk_under_memory_pressure(sk))
	else if (tcp_under_memory_pressure(sk))
		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);

	tcp_collapse_ofo_queue(sk);
@@ -4825,7 +4827,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
		return false;

	/* If we are under global TCP memory pressure, do not expand.  */
	if (sk_under_memory_pressure(sk))
	if (tcp_under_memory_pressure(sk))
		return false;

	/* If we are under soft global TCP memory pressure, do not expand.  */
Loading