Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d179cd12 authored by David S. Miller's avatar David S. Miller Committed by David S. Miller
Browse files

[NET]: Implement SKB fast cloning.



Protocols that make extensive use of SKB cloning,
for example TCP, eat at least 2 allocations per
packet sent as a result.

To cut the kmalloc() count in half, we implement
a pre-allocation scheme wherein we allocate
2 sk_buff objects in advance, then use a simple
reference count to free up the memory at the
correct time.

Based upon an initial patch by Thomas Graf and
suggestions from Herbert Xu.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e92ae93a
Loading
Loading
Loading
Loading
+23 −3
Original line number Diff line number Diff line
@@ -162,6 +162,13 @@ struct skb_timeval {
	u32	off_usec;
};


enum {
	SKB_FCLONE_UNAVAILABLE,
	SKB_FCLONE_ORIG,
	SKB_FCLONE_CLONE,
};

/** 
 *	struct sk_buff - socket buffer
 *	@next: Next buffer in list
@@ -255,7 +262,8 @@ struct sk_buff {
				ip_summed:2,
				nohdr:1,
				nfctinfo:3;
	__u8			pkt_type;
	__u8			pkt_type:3,
				fclone:2;
	__be16			protocol;

	void			(*destructor)(struct sk_buff *skb);
@@ -295,8 +303,20 @@ struct sk_buff {
#include <asm/system.h>

extern void	       __kfree_skb(struct sk_buff *skb);
extern struct sk_buff *alloc_skb(unsigned int size,
				 unsigned int __nocast priority);
extern struct sk_buff *__alloc_skb(unsigned int size,
				   unsigned int __nocast priority, int fclone);
static inline struct sk_buff *alloc_skb(unsigned int size,
					unsigned int __nocast priority)
{
	return __alloc_skb(size, priority, 0);
}

static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
					       unsigned int __nocast priority)
{
	return __alloc_skb(size, priority, 1);
}

extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
					    unsigned int size,
					    unsigned int __nocast priority);
+1 −1
Original line number Diff line number Diff line
@@ -1200,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
	int hdr_len;

	hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
	skb = alloc_skb(size + hdr_len, gfp);
	skb = alloc_skb_fclone(size + hdr_len, gfp);
	if (skb) {
		skb->truesize += mem;
		if (sk->sk_forward_alloc >= (int)skb->truesize ||
+72 −10
Original line number Diff line number Diff line
@@ -69,6 +69,7 @@
#include <asm/system.h>

static kmem_cache_t *skbuff_head_cache;
static kmem_cache_t *skbuff_fclone_cache;

struct timeval __read_mostly skb_tv_base;

@@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 */

/**
 *	alloc_skb	-	allocate a network buffer
 *	__alloc_skb	-	allocate a network buffer
 *	@size: size to allocate
 *	@gfp_mask: allocation mask
 *
@@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 *	Buffers may only be allocated from interrupts using a @gfp_mask of
 *	%GFP_ATOMIC.
 */
struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
			    int fclone)
{
	struct sk_buff *skb;
	u8 *data;

	/* Get the HEAD */
	if (fclone)
		skb = kmem_cache_alloc(skbuff_fclone_cache,
				       gfp_mask & ~__GFP_DMA);
	else
		skb = kmem_cache_alloc(skbuff_head_cache,
				       gfp_mask & ~__GFP_DMA);

	if (!skb)
		goto out;

@@ -155,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
	skb->data = data;
	skb->tail = data;
	skb->end  = data + size;
	if (fclone) {
		struct sk_buff *child = skb + 1;
		atomic_t *fclone_ref = (atomic_t *) (child + 1);

		skb->fclone = SKB_FCLONE_ORIG;
		atomic_set(fclone_ref, 1);

		child->fclone = SKB_FCLONE_UNAVAILABLE;
	}
	atomic_set(&(skb_shinfo(skb)->dataref), 1);
	skb_shinfo(skb)->nr_frags  = 0;
	skb_shinfo(skb)->tso_size = 0;
@@ -268,8 +283,34 @@ void skb_release_data(struct sk_buff *skb)
 */
void kfree_skbmem(struct sk_buff *skb)
{
	struct sk_buff *other;
	atomic_t *fclone_ref;

	skb_release_data(skb);
	switch (skb->fclone) {
	case SKB_FCLONE_UNAVAILABLE:
		kmem_cache_free(skbuff_head_cache, skb);
		break;

	case SKB_FCLONE_ORIG:
		fclone_ref = (atomic_t *) (skb + 2);
		if (atomic_dec_and_test(fclone_ref))
			kmem_cache_free(skbuff_fclone_cache, skb);
		break;

	case SKB_FCLONE_CLONE:
		fclone_ref = (atomic_t *) (skb + 1);
		other = skb - 1;

		/* The clone portion is available for
		 * fast-cloning again.
		 */
		skb->fclone = SKB_FCLONE_UNAVAILABLE;

		if (atomic_dec_and_test(fclone_ref))
			kmem_cache_free(skbuff_fclone_cache, other);
		break;
	};
}

/**
@@ -324,10 +365,20 @@ void __kfree_skb(struct sk_buff *skb)

struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
{
	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);

	struct sk_buff *n;

	n = skb + 1;
	if (skb->fclone == SKB_FCLONE_ORIG &&
	    n->fclone == SKB_FCLONE_UNAVAILABLE) {
		atomic_t *fclone_ref = (atomic_t *) (n + 1);
		n->fclone = SKB_FCLONE_CLONE;
		atomic_inc(fclone_ref);
	} else {
		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
		if (!n)
			return NULL;
		n->fclone = SKB_FCLONE_UNAVAILABLE;
	}

#define C(x) n->x = skb->x

@@ -409,6 +460,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
	new->mac.raw	= old->mac.raw + offset;
	memcpy(new->cb, old->cb, sizeof(old->cb));
	new->local_df	= old->local_df;
	new->fclone	= SKB_FCLONE_UNAVAILABLE;
	new->pkt_type	= old->pkt_type;
	new->tstamp	= old->tstamp;
	new->destructor = NULL;
@@ -1647,13 +1699,23 @@ void __init skb_init(void)
					      NULL, NULL);
	if (!skbuff_head_cache)
		panic("cannot create skbuff cache");

	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
						(2*sizeof(struct sk_buff)) +
						sizeof(atomic_t),
						0,
						SLAB_HWCACHE_ALIGN,
						NULL, NULL);
	if (!skbuff_fclone_cache)
		panic("cannot create skbuff cache");

	do_gettimeofday(&skb_tv_base);
}

EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(__pskb_pull_tail);
EXPORT_SYMBOL(alloc_skb);
EXPORT_SYMBOL(__alloc_skb);
EXPORT_SYMBOL(pskb_copy);
EXPORT_SYMBOL(pskb_expand_head);
EXPORT_SYMBOL(skb_checksum);
+2 −2
Original line number Diff line number Diff line
@@ -1582,7 +1582,7 @@ void tcp_send_fin(struct sock *sk)
	} else {
		/* Socket is locked, keep trying until memory is available. */
		for (;;) {
			skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
			skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
			if (skb)
				break;
			yield();
@@ -1804,7 +1804,7 @@ int tcp_connect(struct sock *sk)

	tcp_connect_init(sk);

	buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
	buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
	if (unlikely(buff == NULL))
		return -ENOBUFS;