Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e67f88dd authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

net: dont hold rtnl mutex during netlink dump callbacks



Four years ago, Patrick made a change to hold rtnl mutex during netlink
dump callbacks.

I believe it was a wrong move. This slows down concurrent dumps, making
good old /proc/net/ files faster than rtnetlink in some situations.

This occurred to me because one "ip link show dev ..." was _very_ slow
on a workload adding/removing network devices in background.

All dump callbacks are able to use RCU locking now, so this patch does
roughly a revert of commits :

1c2d670f : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks
6313c1e0 : [RTNETLINK]: Remove unnecessary locking in dump callbacks

This let writers fight for rtnl mutex and readers going full speed.

It also takes care of phonet : phonet_route_get() is now called from rcu
read section. I renamed it to phonet_route_get_rcu()

Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Acked-by: default avatarStephen Hemminger <shemminger@vyatta.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent dcfd9cdc
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -51,7 +51,7 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr);
int phonet_route_add(struct net_device *dev, u8 daddr);
int phonet_route_add(struct net_device *dev, u8 daddr);
int phonet_route_del(struct net_device *dev, u8 daddr);
int phonet_route_del(struct net_device *dev, u8 daddr);
void rtm_phonet_notify(int event, struct net_device *dev, u8 dst);
void rtm_phonet_notify(int event, struct net_device *dev, u8 dst);
struct net_device *phonet_route_get(struct net *net, u8 daddr);
struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr);
struct net_device *phonet_route_output(struct net *net, u8 daddr);
struct net_device *phonet_route_output(struct net *net, u8 daddr);


#define PN_NO_ADDR	0xff
#define PN_NO_ADDR	0xff
+4 −3
Original line number Original line Diff line number Diff line
@@ -120,8 +120,9 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
	int idx;
	int idx;


	idx = 0;
	idx = 0;
	for_each_netdev(net, dev) {
	rcu_read_lock();
		struct net_bridge_port *port = br_port_get_rtnl(dev);
	for_each_netdev_rcu(net, dev) {
		struct net_bridge_port *port = br_port_get_rcu(dev);


		/* not a bridge port */
		/* not a bridge port */
		if (!port || idx < cb->args[0])
		if (!port || idx < cb->args[0])
@@ -135,7 +136,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
skip:
skip:
		++idx;
		++idx;
	}
	}

	rcu_read_unlock();
	cb->args[0] = idx;
	cb->args[0] = idx;


	return skb->len;
	return skb->len;
+2 −1
Original line number Original line Diff line number Diff line
@@ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
	int idx = 0;
	int idx = 0;
	struct fib_rule *rule;
	struct fib_rule *rule;


	list_for_each_entry(rule, &ops->rules_list, list) {
	rcu_read_lock();
	list_for_each_entry_rcu(rule, &ops->rules_list, list) {
		if (idx < cb->args[1])
		if (idx < cb->args[1])
			goto skip;
			goto skip;


+5 −7
Original line number Original line Diff line number Diff line
@@ -1007,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
	s_h = cb->args[0];
	s_h = cb->args[0];
	s_idx = cb->args[1];
	s_idx = cb->args[1];


	rcu_read_lock();
	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
		idx = 0;
		idx = 0;
		head = &net->dev_index_head[h];
		head = &net->dev_index_head[h];
		hlist_for_each_entry(dev, node, head, index_hlist) {
		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
			if (idx < s_idx)
			if (idx < s_idx)
				goto cont;
				goto cont;
			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1023,6 +1024,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
		}
		}
	}
	}
out:
out:
	rcu_read_unlock();
	cb->args[1] = idx;
	cb->args[1] = idx;
	cb->args[0] = h;
	cb->args[0] = h;


@@ -1879,7 +1881,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
	int min_len;
	int min_len;
	int family;
	int family;
	int type;
	int type;
	int err;


	type = nlh->nlmsg_type;
	type = nlh->nlmsg_type;
	if (type > RTM_MAX)
	if (type > RTM_MAX)
@@ -1906,11 +1907,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
		if (dumpit == NULL)
		if (dumpit == NULL)
			return -EOPNOTSUPP;
			return -EOPNOTSUPP;


		__rtnl_unlock();
		rtnl = net->rtnl;
		rtnl = net->rtnl;
		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
		return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
		rtnl_lock();
		return err;
	}
	}


	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
@@ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
{
{
	struct sock *sk;
	struct sock *sk;
	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
				   rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
				   rtnetlink_rcv, NULL, THIS_MODULE);
	if (!sk)
	if (!sk)
		return -ENOMEM;
		return -ENOMEM;
	net->rtnl = sk;
	net->rtnl = sk;
+6 −4
Original line number Original line Diff line number Diff line
@@ -752,7 +752,8 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
	skip_naddr = cb->args[1];
	skip_naddr = cb->args[1];


	idx = 0;
	idx = 0;
	for_each_netdev(&init_net, dev) {
	rcu_read_lock();
	for_each_netdev_rcu(&init_net, dev) {
		if (idx < skip_ndevs)
		if (idx < skip_ndevs)
			goto cont;
			goto cont;
		else if (idx > skip_ndevs) {
		else if (idx > skip_ndevs) {
@@ -761,11 +762,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
			skip_naddr = 0;
			skip_naddr = 0;
		}
		}


		if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL)
		if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL)
			goto cont;
			goto cont;


		for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
		for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
		     ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) {
		     ifa = rcu_dereference(ifa->ifa_next), dn_idx++) {
			if (dn_idx < skip_naddr)
			if (dn_idx < skip_naddr)
				continue;
				continue;


@@ -778,6 +779,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
		idx++;
		idx++;
	}
	}
done:
done:
	rcu_read_unlock();
	cb->args[0] = idx;
	cb->args[0] = idx;
	cb->args[1] = dn_idx;
	cb->args[1] = dn_idx;


Loading