Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7e32aa4d authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'inet-frags-next'



Nikolay Aleksandrov says:

====================
inet: frags: cleanup and kmem_cache use

This patchset does a couple of small cleanups in patches 1-5 and then in
patch 06 it introduces the use of kmem_cache for allocation/freeing of
inet_frag_queue+header objects.

v2: Broke up patch 02 into 3 patches as David suggested

Here are the results of a couple of netperf runs:
netperf options: -l 30 -I95,5 -i 15,10 -m 64k

- 10 gig before the patchset
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.155.1 () port 0 AF_INET : +/-2.500% @ 95% conf.
Socket  Message  Elapsed      Messages
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

212992   64000   30.00      442466      0    7551.39
212992           30.00      439130           7494.45

- 10 gig after the patchset
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.155.1 () port 0 AF_INET : +/-2.500% @ 95% conf.
Socket  Message  Elapsed      Messages
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

212992   64000   30.00      458846      0    7830.94
212992           30.00      457575           7809.25

- Virtio before the patchset
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.144.1 () port 0 AF_INET : +/-2.500% @ 95% conf.
Socket  Message  Elapsed      Messages
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

212992   64000   30.00      735000      0    12543.96
212992           30.00      560322           9562.79

- Virtio after the patchset
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.144.1 () port 0 AF_INET : +/-2.500% @ 95% conf.
Socket  Message  Elapsed      Messages
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

212992   64000   30.00      731729      0    12488.14
212992           30.00      647241           11046.21
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a5536e10 d4ad4d22
Loading
Loading
Loading
Loading
+38 −12
Original line number Diff line number Diff line
@@ -15,25 +15,49 @@ struct netns_frags {
	int			low_thresh;
};

/**
 * fragment queue flags
 *
 * @INET_FRAG_FIRST_IN: first fragment has arrived
 * @INET_FRAG_LAST_IN: final fragment has arrived
 * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
 * @INET_FRAG_EVICTED: frag queue is being evicted
 */
enum {
	INET_FRAG_FIRST_IN	= BIT(0),
	INET_FRAG_LAST_IN	= BIT(1),
	INET_FRAG_COMPLETE	= BIT(2),
	INET_FRAG_EVICTED	= BIT(3)
};

/**
 * struct inet_frag_queue - fragment queue
 *
 * @lock: spinlock protecting the queue
 * @timer: queue expiration timer
 * @list: hash bucket list
 * @refcnt: reference count of the queue
 * @fragments: received fragments head
 * @fragments_tail: received fragments tail
 * @stamp: timestamp of the last received fragment
 * @len: total length of the original datagram
 * @meat: length of received fragments so far
 * @flags: fragment queue flags
 * @max_size: (ipv4 only) maximum received fragment size with IP_DF set
 * @net: namespace that this frag belongs to
 */
struct inet_frag_queue {
	spinlock_t		lock;
	struct timer_list	timer;      /* when will this queue expire? */
	struct timer_list	timer;
	struct hlist_node	list;
	atomic_t		refcnt;
	struct sk_buff		*fragments; /* list of received fragments */
	struct sk_buff		*fragments;
	struct sk_buff		*fragments_tail;
	ktime_t			stamp;
	int			len;        /* total length of orig datagram */
	int			len;
	int			meat;
	__u8			last_in;    /* first/last segment arrived? */

#define INET_FRAG_EVICTED	8
#define INET_FRAG_COMPLETE	4
#define INET_FRAG_FIRST_IN	2
#define INET_FRAG_LAST_IN	1

	__u8			flags;
	u16			max_size;

	struct netns_frags	*net;
};

@@ -77,9 +101,11 @@ struct inet_frags {
	void			(*destructor)(struct inet_frag_queue *);
	void			(*skb_free)(struct sk_buff *);
	void			(*frag_expire)(unsigned long data);
	struct kmem_cache	*frags_cachep;
	const char		*frags_cache_name;
};

void inet_frags_init(struct inet_frags *);
int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);

void inet_frags_init_net(struct netns_frags *nf);
+13 −8
Original line number Diff line number Diff line
@@ -30,6 +30,8 @@

#include "reassembly.h"

static const char lowpan_frags_cache_name[] = "lowpan-frags";

struct lowpan_frag_info {
	__be16 d_tag;
	u16 d_size;
@@ -99,7 +101,7 @@ static void lowpan_frag_expire(unsigned long data)

	spin_lock(&fq->q.lock);

	if (fq->q.last_in & INET_FRAG_COMPLETE)
	if (fq->q.flags & INET_FRAG_COMPLETE)
		goto out;

	inet_frag_kill(&fq->q, &lowpan_frags);
@@ -142,7 +144,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
	struct net_device *dev;
	int end, offset;

	if (fq->q.last_in & INET_FRAG_COMPLETE)
	if (fq->q.flags & INET_FRAG_COMPLETE)
		goto err;

	offset = lowpan_cb(skb)->d_offset << 3;
@@ -154,14 +156,14 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
		 * or have different end, the segment is corrupted.
		 */
		if (end < fq->q.len ||
		    ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
		    ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
			goto err;
		fq->q.last_in |= INET_FRAG_LAST_IN;
		fq->q.flags |= INET_FRAG_LAST_IN;
		fq->q.len = end;
	} else {
		if (end > fq->q.len) {
			/* Some bits beyond end -> corruption. */
			if (fq->q.last_in & INET_FRAG_LAST_IN)
			if (fq->q.flags & INET_FRAG_LAST_IN)
				goto err;
			fq->q.len = end;
		}
@@ -201,13 +203,13 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
	if (frag_type == LOWPAN_DISPATCH_FRAG1) {
		/* Calculate uncomp. 6lowpan header to estimate full size */
		fq->q.meat += lowpan_uncompress_size(skb, NULL);
		fq->q.last_in |= INET_FRAG_FIRST_IN;
		fq->q.flags |= INET_FRAG_FIRST_IN;
	} else {
		fq->q.meat += skb->len;
	}
	add_frag_mem_limit(&fq->q, skb->truesize);

	if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
	    fq->q.meat == fq->q.len) {
		int res;
		unsigned long orefdst = skb->_skb_refdst;
@@ -571,7 +573,10 @@ int __init lowpan_net_frag_init(void)
	lowpan_frags.qsize = sizeof(struct frag_queue);
	lowpan_frags.match = lowpan_frag_match;
	lowpan_frags.frag_expire = lowpan_frag_expire;
	inet_frags_init(&lowpan_frags);
	lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
	ret = inet_frags_init(&lowpan_frags);
	if (ret)
		goto err_pernet;

	return ret;
err_pernet:
+25 −16
Original line number Diff line number Diff line
@@ -151,9 +151,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
			goto evict_again;
		}

		/* suppress xmit of (icmp) error packet */
		fq->last_in &= ~INET_FRAG_FIRST_IN;
		fq->last_in |= INET_FRAG_EVICTED;
		fq->flags |= INET_FRAG_EVICTED;
		hlist_del(&fq->list);
		hlist_add_head(&fq->list, &expired);
		++evicted;
@@ -200,7 +198,7 @@ static void inet_frag_schedule_worker(struct inet_frags *f)
		schedule_work(&f->frags_work);
}

void inet_frags_init(struct inet_frags *f)
int inet_frags_init(struct inet_frags *f)
{
	int i;

@@ -215,6 +213,12 @@ void inet_frags_init(struct inet_frags *f)

	seqlock_init(&f->rnd_seqlock);
	f->last_rebuild_jiffies = 0;
	f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
					    NULL);
	if (!f->frags_cachep)
		return -ENOMEM;

	return 0;
}
EXPORT_SYMBOL(inet_frags_init);

@@ -227,6 +231,7 @@ EXPORT_SYMBOL(inet_frags_init_net);
void inet_frags_fini(struct inet_frags *f)
{
	cancel_work_sync(&f->frags_work);
	kmem_cache_destroy(f->frags_cachep);
}
EXPORT_SYMBOL(inet_frags_fini);

@@ -289,10 +294,10 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
	if (del_timer(&fq->timer))
		atomic_dec(&fq->refcnt);

	if (!(fq->last_in & INET_FRAG_COMPLETE)) {
	if (!(fq->flags & INET_FRAG_COMPLETE)) {
		fq_unlink(fq, f);
		atomic_dec(&fq->refcnt);
		fq->last_in |= INET_FRAG_COMPLETE;
		fq->flags |= INET_FRAG_COMPLETE;
	}
}
EXPORT_SYMBOL(inet_frag_kill);
@@ -311,7 +316,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
	struct netns_frags *nf;
	unsigned int sum, sum_truesize = 0;

	WARN_ON(!(q->last_in & INET_FRAG_COMPLETE));
	WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
	WARN_ON(del_timer(&q->timer) != 0);

	/* Release all fragment data. */
@@ -329,12 +334,13 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)

	if (f->destructor)
		f->destructor(q);
	kfree(q);
	kmem_cache_free(f->frags_cachep, q);
}
EXPORT_SYMBOL(inet_frag_destroy);

static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
		struct inet_frag_queue *qp_in, struct inet_frags *f,
						struct inet_frag_queue *qp_in,
						struct inet_frags *f,
						void *arg)
{
	struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
@@ -349,7 +355,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
		if (qp->net == nf && f->match(qp, arg)) {
			atomic_inc(&qp->refcnt);
			spin_unlock(&hb->chain_lock);
			qp_in->last_in |= INET_FRAG_COMPLETE;
			qp_in->flags |= INET_FRAG_COMPLETE;
			inet_frag_put(qp_in, f);
			return qp;
		}
@@ -368,7 +374,8 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
}

static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
		struct inet_frags *f, void *arg)
					       struct inet_frags *f,
					       void *arg)
{
	struct inet_frag_queue *q;

@@ -377,7 +384,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
		return NULL;
	}

	q = kzalloc(f->qsize, GFP_ATOMIC);
	q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
	if (q == NULL)
		return NULL;

@@ -393,7 +400,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
}

static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
		struct inet_frags *f, void *arg)
						struct inet_frags *f,
						void *arg)
{
	struct inet_frag_queue *q;

@@ -405,7 +413,8 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
}

struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
		struct inet_frags *f, void *key, unsigned int hash)
				       struct inet_frags *f, void *key,
				       unsigned int hash)
{
	struct inet_frag_bucket *hb;
	struct inet_frag_queue *q;
+19 −16
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@
 */

static int sysctl_ipfrag_max_dist __read_mostly = 64;
static const char ip_frag_cache_name[] = "ip4-frags";

struct ipfrag_skb_cb
{
@@ -185,20 +186,22 @@ static void ip_expire(unsigned long arg)

	spin_lock(&qp->q.lock);

	if (qp->q.last_in & INET_FRAG_COMPLETE)
	if (qp->q.flags & INET_FRAG_COMPLETE)
		goto out;

	ipq_kill(qp);

	if (!(qp->q.last_in & INET_FRAG_EVICTED))
		IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);

	if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
	if (!(qp->q.flags & INET_FRAG_EVICTED)) {
		struct sk_buff *head = qp->q.fragments;
		const struct iphdr *iph;
		int err;

		IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);

		if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
			goto out;

		rcu_read_lock();
		head->dev = dev_get_by_index_rcu(net, qp->iif);
		if (!head->dev)
@@ -211,8 +214,7 @@ static void ip_expire(unsigned long arg)
		if (err)
			goto out_rcu_unlock;

		/*
		 * Only an end host needs to send an ICMP
		/* Only an end host needs to send an ICMP
		 * "Fragment Reassembly Timeout" message, per RFC792.
		 */
		if (qp->user == IP_DEFRAG_AF_PACKET ||
@@ -221,7 +223,6 @@ static void ip_expire(unsigned long arg)
		     (skb_rtable(head)->rt_type != RTN_LOCAL)))
			goto out_rcu_unlock;


		/* Send an ICMP "Fragment Reassembly Timeout" message. */
		icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
out_rcu_unlock:
@@ -302,7 +303,7 @@ static int ip_frag_reinit(struct ipq *qp)
	} while (fp);
	sub_frag_mem_limit(&qp->q, sum_truesize);

	qp->q.last_in = 0;
	qp->q.flags = 0;
	qp->q.len = 0;
	qp->q.meat = 0;
	qp->q.fragments = NULL;
@@ -323,7 +324,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
	int err = -ENOENT;
	u8 ecn;

	if (qp->q.last_in & INET_FRAG_COMPLETE)
	if (qp->q.flags & INET_FRAG_COMPLETE)
		goto err;

	if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
@@ -350,9 +351,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
		 * or have different end, the segment is corrupted.
		 */
		if (end < qp->q.len ||
		    ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len))
		    ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
			goto err;
		qp->q.last_in |= INET_FRAG_LAST_IN;
		qp->q.flags |= INET_FRAG_LAST_IN;
		qp->q.len = end;
	} else {
		if (end&7) {
@@ -362,7 +363,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
		}
		if (end > qp->q.len) {
			/* Some bits beyond end -> corruption. */
			if (qp->q.last_in & INET_FRAG_LAST_IN)
			if (qp->q.flags & INET_FRAG_LAST_IN)
				goto err;
			qp->q.len = end;
		}
@@ -471,13 +472,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
	qp->ecn |= ecn;
	add_frag_mem_limit(&qp->q, skb->truesize);
	if (offset == 0)
		qp->q.last_in |= INET_FRAG_FIRST_IN;
		qp->q.flags |= INET_FRAG_FIRST_IN;

	if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
	    skb->len + ihl > qp->q.max_size)
		qp->q.max_size = skb->len + ihl;

	if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
	if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
	    qp->q.meat == qp->q.len) {
		unsigned long orefdst = skb->_skb_refdst;

@@ -860,5 +861,7 @@ void __init ipfrag_init(void)
	ip4_frags.qsize = sizeof(struct ipq);
	ip4_frags.match = ip4_frag_match;
	ip4_frags.frag_expire = ip_expire;
	inet_frags_init(&ip4_frags);
	ip4_frags.frags_cache_name = ip_frag_cache_name;
	if (inet_frags_init(&ip4_frags))
		panic("IP: failed to allocate ip4_frags cache\n");
}
+12 −8
Original line number Diff line number Diff line
@@ -50,6 +50,7 @@
#include <linux/module.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>

static const char nf_frags_cache_name[] = "nf-frags";

struct nf_ct_frag6_skb_cb
{
@@ -222,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
	int offset, end;
	u8 ecn;

	if (fq->q.last_in & INET_FRAG_COMPLETE) {
	if (fq->q.flags & INET_FRAG_COMPLETE) {
		pr_debug("Already completed\n");
		goto err;
	}
@@ -253,11 +254,11 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
		 * or have different end, the segment is corrupted.
		 */
		if (end < fq->q.len ||
		    ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) {
		    ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) {
			pr_debug("already received last fragment\n");
			goto err;
		}
		fq->q.last_in |= INET_FRAG_LAST_IN;
		fq->q.flags |= INET_FRAG_LAST_IN;
		fq->q.len = end;
	} else {
		/* Check if the fragment is rounded to 8 bytes.
@@ -272,7 +273,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
		}
		if (end > fq->q.len) {
			/* Some bits beyond end -> corruption. */
			if (fq->q.last_in & INET_FRAG_LAST_IN) {
			if (fq->q.flags & INET_FRAG_LAST_IN) {
				pr_debug("last packet already reached.\n");
				goto err;
			}
@@ -354,7 +355,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
	 */
	if (offset == 0) {
		fq->nhoffset = nhoff;
		fq->q.last_in |= INET_FRAG_FIRST_IN;
		fq->q.flags |= INET_FRAG_FIRST_IN;
	}

	return 0;
@@ -617,7 +618,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
		goto ret_orig;
	}

	if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
	    fq->q.meat == fq->q.len) {
		ret_skb = nf_ct_frag6_reasm(fq, dev);
		if (ret_skb == NULL)
@@ -677,12 +678,15 @@ int nf_ct_frag6_init(void)
	nf_frags.qsize = sizeof(struct frag_queue);
	nf_frags.match = ip6_frag_match;
	nf_frags.frag_expire = nf_ct_frag6_expire;
	inet_frags_init(&nf_frags);

	nf_frags.frags_cache_name = nf_frags_cache_name;
	ret = inet_frags_init(&nf_frags);
	if (ret)
		goto out;
	ret = register_pernet_subsys(&nf_ct_net_ops);
	if (ret)
		inet_frags_fini(&nf_frags);

out:
	return ret;
}

Loading