Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 795bb1c0 authored by Jesper Dangaard Brouer's avatar Jesper Dangaard Brouer Committed by David S. Miller
Browse files

net: bulk free infrastructure for NAPI context, use napi_consume_skb



Discovered that network stack were hitting the kmem_cache/SLUB
slowpath when freeing SKBs.  Doing bulk free with kmem_cache_free_bulk
can speedup this slowpath.

NAPI context is a bit special, lets take advantage of that for bulk
free'ing SKBs.

In NAPI context we are running in softirq, which gives us certain
protection.  A softirq can run on several CPUs at once.  BUT the
important part is a softirq will never preempt another softirq running
on the same CPU.  This gives us the opportunity to access per-cpu
variables in softirq context.

Extend napi_alloc_cache (before only contained page_frag_cache) to be
a struct with a small array based stack for holding SKBs.  Introduce a
SKB defer and flush API for accessing this.

Introduce napi_consume_skb() as replacement for e.g. dev_consume_skb_any()
when running in NAPI context.  A small trick to handle/detect if we
are called from netpoll is to see if budget is 0.  In that case, we
need to invoke dev_consume_skb_irq().

Joint work with Alexander Duyck.

Signed-off-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 18ac5590
Loading
Loading
Loading
Loading
+3 −0
Original line number Original line Diff line number Diff line
@@ -2404,6 +2404,9 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
{
{
	return __napi_alloc_skb(napi, length, GFP_ATOMIC);
	return __napi_alloc_skb(napi, length, GFP_ATOMIC);
}
}
void napi_consume_skb(struct sk_buff *skb, int budget);

void __kfree_skb_flush(void);


/**
/**
 * __dev_alloc_pages - allocate page for network Rx
 * __dev_alloc_pages - allocate page for network Rx
+1 −0
Original line number Original line Diff line number Diff line
@@ -5155,6 +5155,7 @@ static void net_rx_action(struct softirq_action *h)
		}
		}
	}
	}


	__kfree_skb_flush();
	local_irq_disable();
	local_irq_disable();


	list_splice_tail_init(&sd->poll_list, &list);
	list_splice_tail_init(&sd->poll_list, &list);
+77 −6
Original line number Original line Diff line number Diff line
@@ -347,8 +347,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
}
}
EXPORT_SYMBOL(build_skb);
EXPORT_SYMBOL(build_skb);


#define NAPI_SKB_CACHE_SIZE	64

struct napi_alloc_cache {
	struct page_frag_cache page;
	size_t skb_count;
	void *skb_cache[NAPI_SKB_CACHE_SIZE];
};

static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);


static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
{
@@ -378,9 +386,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);


static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
{
	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);


	return __alloc_page_frag(nc, fragsz, gfp_mask);
	return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
}
}


void *napi_alloc_frag(unsigned int fragsz)
void *napi_alloc_frag(unsigned int fragsz)
@@ -474,7 +482,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
				 gfp_t gfp_mask)
				 gfp_t gfp_mask)
{
{
	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
	struct sk_buff *skb;
	struct sk_buff *skb;
	void *data;
	void *data;


@@ -494,7 +502,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
	if (sk_memalloc_socks())
	if (sk_memalloc_socks())
		gfp_mask |= __GFP_MEMALLOC;
		gfp_mask |= __GFP_MEMALLOC;


	data = __alloc_page_frag(nc, len, gfp_mask);
	data = __alloc_page_frag(&nc->page, len, gfp_mask);
	if (unlikely(!data))
	if (unlikely(!data))
		return NULL;
		return NULL;


@@ -505,7 +513,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
	}
	}


	/* use OR instead of assignment to avoid clearing of bits in mask */
	/* use OR instead of assignment to avoid clearing of bits in mask */
	if (nc->pfmemalloc)
	if (nc->page.pfmemalloc)
		skb->pfmemalloc = 1;
		skb->pfmemalloc = 1;
	skb->head_frag = 1;
	skb->head_frag = 1;


@@ -747,6 +755,69 @@ void consume_skb(struct sk_buff *skb)
}
}
EXPORT_SYMBOL(consume_skb);
EXPORT_SYMBOL(consume_skb);


void __kfree_skb_flush(void)
{
	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);

	/* flush skb_cache if containing objects */
	if (nc->skb_count) {
		kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
				     nc->skb_cache);
		nc->skb_count = 0;
	}
}

static void __kfree_skb_defer(struct sk_buff *skb)
{
	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);

	/* drop skb->head and call any destructors for packet */
	skb_release_all(skb);

	/* record skb to CPU local list */
	nc->skb_cache[nc->skb_count++] = skb;

#ifdef CONFIG_SLUB
	/* SLUB writes into objects when freeing */
	prefetchw(skb);
#endif

	/* flush skb_cache if it is filled */
	if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
		kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
				     nc->skb_cache);
		nc->skb_count = 0;
	}
}

void napi_consume_skb(struct sk_buff *skb, int budget)
{
	if (unlikely(!skb))
		return;

	/* if budget is 0 assume netpoll w/ IRQs disabled */
	if (unlikely(!budget)) {
		dev_consume_skb_irq(skb);
		return;
	}

	if (likely(atomic_read(&skb->users) == 1))
		smp_rmb();
	else if (likely(!atomic_dec_and_test(&skb->users)))
		return;
	/* if reaching here SKB is ready to free */
	trace_consume_skb(skb);

	/* if SKB is a clone, don't handle this case */
	if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
		__kfree_skb(skb);
		return;
	}

	__kfree_skb_defer(skb);
}
EXPORT_SYMBOL(napi_consume_skb);

/* Make sure a field is enclosed inside headers_start/headers_end section */
/* Make sure a field is enclosed inside headers_start/headers_end section */
#define CHECK_SKB_FIELD(field) \
#define CHECK_SKB_FIELD(field) \
	BUILD_BUG_ON(offsetof(struct sk_buff, field) <		\
	BUILD_BUG_ON(offsetof(struct sk_buff, field) <		\