Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 28d64271 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

net: attempt high order allocations in sock_alloc_send_pskb()



Adding paged frags skbs to af_unix sockets introduced a performance
regression on large sends because of additional page allocations, even
if each skb could carry at least 100% more payload than before.

We can instruct sock_alloc_send_pskb() to attempt high order
allocations.

Most of the time, it does a single page allocation instead of 8.

I added an additional parameter to sock_alloc_send_pskb() to
let other users to opt-in for this new feature on followup patches.

Tested:

Before patch :

$ netperf -t STREAM_STREAM
STREAM STREAM TEST
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 2304  212992  212992    10.00    46861.15

After patch :

$ netperf -t STREAM_STREAM
STREAM STREAM TEST
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 2304  212992  212992    10.00    57981.11

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e370a723
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -524,7 +524,7 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
		linear = len;
		linear = len;


	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
				   err);
				   err, 0);
	if (!skb)
	if (!skb)
		return NULL;
		return NULL;


+1 −1
Original line number Original line Diff line number Diff line
@@ -949,7 +949,7 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
		linear = len;
		linear = len;


	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
				   &err);
				   &err, 0);
	if (!skb)
	if (!skb)
		return ERR_PTR(err);
		return ERR_PTR(err);


+2 −1
Original line number Original line Diff line number Diff line
@@ -1539,7 +1539,8 @@ extern struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
						      unsigned long header_len,
						      unsigned long header_len,
						      unsigned long data_len,
						      unsigned long data_len,
						      int noblock,
						      int noblock,
						      int *errcode);
						      int *errcode,
						      int max_page_order);
extern void *sock_kmalloc(struct sock *sk, int size,
extern void *sock_kmalloc(struct sock *sk, int size,
			  gfp_t priority);
			  gfp_t priority);
extern void sock_kfree_s(struct sock *sk, void *mem, int size);
extern void sock_kfree_s(struct sock *sk, void *mem, int size);
+51 −49
Original line number Original line Diff line number Diff line
@@ -1741,24 +1741,23 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)


struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
				     unsigned long data_len, int noblock,
				     unsigned long data_len, int noblock,
				     int *errcode)
				     int *errcode, int max_page_order)
{
{
	struct sk_buff *skb;
	struct sk_buff *skb = NULL;
	unsigned long chunk;
	gfp_t gfp_mask;
	gfp_t gfp_mask;
	long timeo;
	long timeo;
	int err;
	int err;
	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
	struct page *page;
	int i;


	err = -EMSGSIZE;
	err = -EMSGSIZE;
	if (npages > MAX_SKB_FRAGS)
	if (npages > MAX_SKB_FRAGS)
		goto failure;
		goto failure;


	gfp_mask = sk->sk_allocation;
	if (gfp_mask & __GFP_WAIT)
		gfp_mask |= __GFP_REPEAT;

	timeo = sock_sndtimeo(sk, noblock);
	timeo = sock_sndtimeo(sk, noblock);
	while (1) {
	while (!skb) {
		err = sock_error(sk);
		err = sock_error(sk);
		if (err != 0)
		if (err != 0)
			goto failure;
			goto failure;
@@ -1767,50 +1766,52 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
		if (sk->sk_shutdown & SEND_SHUTDOWN)
		if (sk->sk_shutdown & SEND_SHUTDOWN)
			goto failure;
			goto failure;


		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
		if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
			skb = alloc_skb(header_len, gfp_mask);
			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
			if (skb) {
			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
				int i;
			err = -EAGAIN;
			if (!timeo)
				goto failure;
			if (signal_pending(current))
				goto interrupted;
			timeo = sock_wait_for_wmem(sk, timeo);
			continue;
		}


				/* No pages, we're done... */
		err = -ENOBUFS;
				if (!data_len)
		gfp_mask = sk->sk_allocation;
					break;
		if (gfp_mask & __GFP_WAIT)
			gfp_mask |= __GFP_REPEAT;

		skb = alloc_skb(header_len, gfp_mask);
		if (!skb)
			goto failure;


		skb->truesize += data_len;
		skb->truesize += data_len;
				skb_shinfo(skb)->nr_frags = npages;
				for (i = 0; i < npages; i++) {
					struct page *page;


					page = alloc_pages(sk->sk_allocation, 0);
		for (i = 0; npages > 0; i++) {
					if (!page) {
			int order = max_page_order;
						err = -ENOBUFS;
						skb_shinfo(skb)->nr_frags = i;
						kfree_skb(skb);
						goto failure;
					}


					__skb_fill_page_desc(skb, i,
			while (order) {
							page, 0,
				if (npages >= 1 << order) {
							(data_len >= PAGE_SIZE ?
					page = alloc_pages(sk->sk_allocation |
							 PAGE_SIZE :
							   __GFP_COMP | __GFP_NOWARN,
							 data_len));
							   order);
					data_len -= PAGE_SIZE;
					if (page)
						goto fill_page;
				}
				}

				order--;
				/* Full success... */
				break;
			}
			}
			err = -ENOBUFS;
			page = alloc_page(sk->sk_allocation);
			if (!page)
				goto failure;
				goto failure;
fill_page:
			chunk = min_t(unsigned long, data_len,
				      PAGE_SIZE << order);
			skb_fill_page_desc(skb, i, page, 0, chunk);
			data_len -= chunk;
			npages -= 1 << order;
		}
		}
		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
		err = -EAGAIN;
		if (!timeo)
			goto failure;
		if (signal_pending(current))
			goto interrupted;
		timeo = sock_wait_for_wmem(sk, timeo);
	}
	}


	skb_set_owner_w(skb, sk);
	skb_set_owner_w(skb, sk);
@@ -1819,6 +1820,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
interrupted:
interrupted:
	err = sock_intr_errno(timeo);
	err = sock_intr_errno(timeo);
failure:
failure:
	kfree_skb(skb);
	*errcode = err;
	*errcode = err;
	return NULL;
	return NULL;
}
}
@@ -1827,7 +1829,7 @@ EXPORT_SYMBOL(sock_alloc_send_pskb);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
				    int noblock, int *errcode)
				    int noblock, int *errcode)
{
{
	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
}
}
EXPORT_SYMBOL(sock_alloc_send_skb);
EXPORT_SYMBOL(sock_alloc_send_skb);


+1 −1
Original line number Original line Diff line number Diff line
@@ -2181,7 +2181,7 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
		linear = len;
		linear = len;


	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
				   err);
				   err, 0);
	if (!skb)
	if (!skb)
		return NULL;
		return NULL;


Loading