Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ebc0ffae authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

fib: RCU conversion of fib_lookup()



fib_lookup() converted to be called in RCU protected context, no
reference taken and released on a contended cache line (fib_clntref)

fib_table_lookup() and fib_semantic_match() get an additional parameter.

struct fib_info gets an rcu_head field, and is freed after an rcu grace
period.

Stress test :
(Sending 160.000.000 UDP frames on same neighbour,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_HASH) (about same results for FIB_TRIE)

Before patch :

real	1m31.199s
user	0m13.761s
sys	23m24.780s

After patch:

real	1m5.375s
user	0m14.997s
sys	15m50.115s

Before patch Profile :

13044.00 15.4% __ip_route_output_key vmlinux
 8438.00 10.0% dst_destroy           vmlinux
 5983.00  7.1% fib_semantic_match    vmlinux
 5410.00  6.4% fib_rules_lookup      vmlinux
 4803.00  5.7% neigh_lookup          vmlinux
 4420.00  5.2% _raw_spin_lock        vmlinux
 3883.00  4.6% rt_set_nexthop        vmlinux
 3261.00  3.9% _raw_read_lock        vmlinux
 2794.00  3.3% fib_table_lookup      vmlinux
 2374.00  2.8% neigh_resolve_output  vmlinux
 2153.00  2.5% dst_alloc             vmlinux
 1502.00  1.8% _raw_read_lock_bh     vmlinux
 1484.00  1.8% kmem_cache_alloc      vmlinux
 1407.00  1.7% eth_header            vmlinux
 1406.00  1.7% ipv4_dst_destroy      vmlinux
 1298.00  1.5% __copy_from_user_ll   vmlinux
 1174.00  1.4% dev_queue_xmit        vmlinux
 1000.00  1.2% ip_output             vmlinux

After patch Profile :

13712.00 15.8% dst_destroy             vmlinux
 8548.00  9.9% __ip_route_output_key   vmlinux
 7017.00  8.1% neigh_lookup            vmlinux
 4554.00  5.3% fib_semantic_match      vmlinux
 4067.00  4.7% _raw_read_lock          vmlinux
 3491.00  4.0% dst_alloc               vmlinux
 3186.00  3.7% neigh_resolve_output    vmlinux
 3103.00  3.6% fib_table_lookup        vmlinux
 2098.00  2.4% _raw_read_lock_bh       vmlinux
 2081.00  2.4% kmem_cache_alloc        vmlinux
 2013.00  2.3% _raw_spin_lock          vmlinux
 1763.00  2.0% __copy_from_user_ll     vmlinux
 1763.00  2.0% ip_output               vmlinux
 1761.00  2.0% ipv4_dst_destroy        vmlinux
 1631.00  1.9% eth_header              vmlinux
 1440.00  1.7% _raw_read_unlock_bh     vmlinux

Reference results, if IP route cache is enabled :

real	0m29.718s
user	0m10.845s
sys	7m37.341s

25213.00 29.5% __ip_route_output_key   vmlinux
 9011.00 10.5% dst_release             vmlinux
 4817.00  5.6% ip_push_pending_frames  vmlinux
 4232.00  5.0% ip_finish_output        vmlinux
 3940.00  4.6% udp_sendmsg             vmlinux
 3730.00  4.4% __copy_from_user_ll     vmlinux
 3716.00  4.4% ip_route_output_flow    vmlinux
 2451.00  2.9% __xfrm_lookup           vmlinux
 2221.00  2.6% ip_append_data          vmlinux
 1718.00  2.0% _raw_spin_lock_bh       vmlinux
 1655.00  1.9% __alloc_skb             vmlinux
 1572.00  1.8% sock_wfree              vmlinux
 1345.00  1.6% kfree                   vmlinux

Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c2952c31
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@ struct fib_lookup_arg {
	void			*lookup_ptr;
	void			*result;
	struct fib_rule		*rule;
	int			flags;
#define FIB_LOOKUP_NOREF	1
};

struct fib_rules_ops {
+4 −13
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@ struct fib_info {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
	int			fib_power;
#endif
	struct rcu_head		rcu;
	struct fib_nh		fib_nh[0];
#define fib_dev		fib_nh[0].nh_dev
};
@@ -148,7 +149,7 @@ struct fib_table {
};

extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
			    struct fib_result *res);
			    struct fib_result *res, int fib_flags);
extern int fib_table_insert(struct fib_table *, struct fib_config *);
extern int fib_table_delete(struct fib_table *, struct fib_config *);
extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
@@ -185,11 +186,11 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp,
	struct fib_table *table;

	table = fib_get_table(net, RT_TABLE_LOCAL);
	if (!fib_table_lookup(table, flp, res))
	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
		return 0;

	table = fib_get_table(net, RT_TABLE_MAIN);
	if (!fib_table_lookup(table, flp, res))
	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
		return 0;
	return -ENETUNREACH;
}
@@ -254,16 +255,6 @@ static inline void fib_info_put(struct fib_info *fi)
		free_fib_info(fi);
}

static inline void fib_res_put(struct fib_result *res)
{
	if (res->fi)
		fib_info_put(res->fi);
#ifdef CONFIG_IP_MULTIPLE_TABLES
	if (res->r)
		fib_rule_put(res->r);
#endif
}

#ifdef CONFIG_PROC_FS
extern int __net_init  fib_proc_init(struct net *net);
extern void __net_exit fib_proc_exit(struct net *net);
+2 −1
Original line number Diff line number Diff line
@@ -225,7 +225,8 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
			err = ops->action(rule, fl, flags, arg);

		if (err != -EAGAIN) {
			if (likely(atomic_inc_not_zero(&rule->refcnt))) {
			if ((arg->flags & FIB_LOOKUP_NOREF) ||
			    likely(atomic_inc_not_zero(&rule->refcnt))) {
				arg->rule = rule;
				goto out;
			}
+13 −14
Original line number Diff line number Diff line
@@ -168,8 +168,11 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
	struct fib_result res = { 0 };
	struct net_device *dev = NULL;

	if (fib_lookup(net, &fl, &res))
	rcu_read_lock();
	if (fib_lookup(net, &fl, &res)) {
		rcu_read_unlock();
		return NULL;
	}
	if (res.type != RTN_LOCAL)
		goto out;
	dev = FIB_RES_DEV(res);
@@ -177,7 +180,7 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
	if (dev && devref)
		dev_hold(dev);
out:
	fib_res_put(&res);
	rcu_read_unlock();
	return dev;
}
EXPORT_SYMBOL(__ip_dev_find);
@@ -207,11 +210,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
	local_table = fib_get_table(net, RT_TABLE_LOCAL);
	if (local_table) {
		ret = RTN_UNICAST;
		if (!fib_table_lookup(local_table, &fl, &res)) {
		rcu_read_lock();
		if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
			if (!dev || dev == res.fi->fib_dev)
				ret = res.type;
			fib_res_put(&res);
		}
		rcu_read_unlock();
	}
	return ret;
}
@@ -235,6 +239,7 @@ EXPORT_SYMBOL(inet_dev_addr_type);
 * - figure out what "logical" interface this packet arrived
 *   and calculate "specific destination" address.
 * - check, that packet arrived from expected physical interface.
 * called with rcu_read_lock()
 */
int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
			struct net_device *dev, __be32 *spec_dst,
@@ -259,7 +264,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
	struct net *net;

	no_addr = rpf = accept_local = 0;
	rcu_read_lock();
	in_dev = __in_dev_get_rcu(dev);
	if (in_dev) {
		no_addr = in_dev->ifa_list == NULL;
@@ -268,7 +272,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
		if (mark && !IN_DEV_SRC_VMARK(in_dev))
			fl.mark = 0;
	}
	rcu_read_unlock();

	if (in_dev == NULL)
		goto e_inval;
@@ -278,7 +281,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
		goto last_resort;
	if (res.type != RTN_UNICAST) {
		if (res.type != RTN_LOCAL || !accept_local)
			goto e_inval_res;
			goto e_inval;
	}
	*spec_dst = FIB_RES_PREFSRC(res);
	fib_combine_itag(itag, &res);
@@ -299,10 +302,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
#endif
	if (dev_match) {
		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
		fib_res_put(&res);
		return ret;
	}
	fib_res_put(&res);
	if (no_addr)
		goto last_resort;
	if (rpf == 1)
@@ -315,7 +316,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
			*spec_dst = FIB_RES_PREFSRC(res);
			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
		}
		fib_res_put(&res);
	}
	return ret;

@@ -326,8 +326,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
	*itag = 0;
	return 0;

e_inval_res:
	fib_res_put(&res);
e_inval:
	return -EINVAL;
e_rpf:
@@ -873,15 +871,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
		local_bh_disable();

		frn->tb_id = tb->tb_id;
		frn->err = fib_table_lookup(tb, &fl, &res);
		rcu_read_lock();
		frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF);

		if (!frn->err) {
			frn->prefixlen = res.prefixlen;
			frn->nh_sel = res.nh_sel;
			frn->type = res.type;
			frn->scope = res.scope;
			fib_res_put(&res);
		}
		rcu_read_unlock();
		local_bh_enable();
	}
}
+3 −2
Original line number Diff line number Diff line
@@ -244,7 +244,8 @@ fn_new_zone(struct fn_hash *table, int z)
}

int fib_table_lookup(struct fib_table *tb,
		     const struct flowi *flp, struct fib_result *res)
		     const struct flowi *flp, struct fib_result *res,
		     int fib_flags)
{
	int err;
	struct fn_zone *fz;
@@ -264,7 +265,7 @@ int fib_table_lookup(struct fib_table *tb,

			err = fib_semantic_match(&f->fn_alias,
						 flp, res,
						 fz->fz_order);
						 fz->fz_order, fib_flags);
			if (err <= 0)
				goto out;
		}
Loading