Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d0bf4a9e authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

net: cleanup and document skb fclone layout



Lets use a proper structure to clearly document and implement
skb fast clones.

Then, we might experiment more easily alternative layouts.

This patch adds a new skb_fclone_busy() helper, used by tcp and xfrm,
to stop leaking of implementation details.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b248230c
Loading
Loading
Loading
Loading
+25 −0
Original line number Diff line number Diff line
@@ -781,6 +781,31 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
				     int *errcode,
				     gfp_t gfp_mask);

/* Layout of fast clones : [skb1][skb2][fclone_ref] */
struct sk_buff_fclones {
	struct sk_buff	skb1;

	struct sk_buff	skb2;

	atomic_t	fclone_ref;
};

/**
 *	skb_fclone_busy - check if fclone is busy
 *	@skb: buffer
 *
 * Returns true is skb is a fast clone, and its clone is not freed.
 */
static inline bool skb_fclone_busy(const struct sk_buff *skb)
{
	const struct sk_buff_fclones *fclones;

	fclones = container_of(skb, struct sk_buff_fclones, skb1);

	return skb->fclone == SKB_FCLONE_ORIG &&
	       fclones->skb2.fclone == SKB_FCLONE_CLONE;
}

static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
					       gfp_t priority)
{
+20 −21
Original line number Diff line number Diff line
@@ -257,15 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
	kmemcheck_annotate_variable(shinfo->destructor_arg);

	if (flags & SKB_ALLOC_FCLONE) {
		struct sk_buff *child = skb + 1;
		atomic_t *fclone_ref = (atomic_t *) (child + 1);
		struct sk_buff_fclones *fclones;

		kmemcheck_annotate_bitfield(child, flags1);
		fclones = container_of(skb, struct sk_buff_fclones, skb1);

		kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
		skb->fclone = SKB_FCLONE_ORIG;
		atomic_set(fclone_ref, 1);
		atomic_set(&fclones->fclone_ref, 1);

		child->fclone = SKB_FCLONE_UNAVAILABLE;
		child->pfmemalloc = pfmemalloc;
		fclones->skb2.fclone = SKB_FCLONE_UNAVAILABLE;
		fclones->skb2.pfmemalloc = pfmemalloc;
	}
out:
	return skb;
@@ -524,8 +525,7 @@ static void skb_release_data(struct sk_buff *skb)
 */
static void kfree_skbmem(struct sk_buff *skb)
{
	struct sk_buff *other;
	atomic_t *fclone_ref;
	struct sk_buff_fclones *fclones;

	switch (skb->fclone) {
	case SKB_FCLONE_UNAVAILABLE:
@@ -533,22 +533,21 @@ static void kfree_skbmem(struct sk_buff *skb)
		break;

	case SKB_FCLONE_ORIG:
		fclone_ref = (atomic_t *) (skb + 2);
		if (atomic_dec_and_test(fclone_ref))
			kmem_cache_free(skbuff_fclone_cache, skb);
		fclones = container_of(skb, struct sk_buff_fclones, skb1);
		if (atomic_dec_and_test(&fclones->fclone_ref))
			kmem_cache_free(skbuff_fclone_cache, fclones);
		break;

	case SKB_FCLONE_CLONE:
		fclone_ref = (atomic_t *) (skb + 1);
		other = skb - 1;
		fclones = container_of(skb, struct sk_buff_fclones, skb2);

		/* The clone portion is available for
		 * fast-cloning again.
		 */
		skb->fclone = SKB_FCLONE_UNAVAILABLE;

		if (atomic_dec_and_test(fclone_ref))
			kmem_cache_free(skbuff_fclone_cache, other);
		if (atomic_dec_and_test(&fclones->fclone_ref))
			kmem_cache_free(skbuff_fclone_cache, fclones);
		break;
	}
}
@@ -859,17 +858,18 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs);

struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
	struct sk_buff *n;
	struct sk_buff_fclones *fclones = container_of(skb,
						       struct sk_buff_fclones,
						       skb1);
	struct sk_buff *n = &fclones->skb2;

	if (skb_orphan_frags(skb, gfp_mask))
		return NULL;

	n = skb + 1;
	if (skb->fclone == SKB_FCLONE_ORIG &&
	    n->fclone == SKB_FCLONE_UNAVAILABLE) {
		atomic_t *fclone_ref = (atomic_t *) (n + 1);
		n->fclone = SKB_FCLONE_CLONE;
		atomic_inc(fclone_ref);
		atomic_inc(&fclones->fclone_ref);
	} else {
		if (skb_pfmemalloc(skb))
			gfp_mask |= __GFP_MEMALLOC;
@@ -3240,8 +3240,7 @@ void __init skb_init(void)
					      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
					      NULL);
	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
						(2*sizeof(struct sk_buff)) +
						sizeof(atomic_t),
						sizeof(struct sk_buff_fclones),
						0,
						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
						NULL);
+1 −4
Original line number Diff line number Diff line
@@ -2110,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
static bool skb_still_in_host_queue(const struct sock *sk,
				    const struct sk_buff *skb)
{
	const struct sk_buff *fclone = skb + 1;

	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
		     fclone->fclone == SKB_FCLONE_CLONE)) {
	if (unlikely(skb_fclone_busy(skb))) {
		NET_INC_STATS_BH(sock_net(sk),
				 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
		return true;
+1 −3
Original line number Diff line number Diff line
@@ -1961,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
	struct xfrm_policy *pol = xdst->pols[0];
	struct xfrm_policy_queue *pq = &pol->polq;
	const struct sk_buff *fclone = skb + 1;

	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
		     fclone->fclone == SKB_FCLONE_CLONE)) {
	if (unlikely(skb_fclone_busy(skb))) {
		kfree_skb(skb);
		return 0;
	}