Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1a2e10a2 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'Rework-ip_ra_chain-protection'



Kirill Tkhai says:

====================
Rework ip_ra_chain protection

Commit 1215e51e "ipv4: fix a deadlock in ip_ra_control"
made rtnl_lock() be used in raw_close(). This function is called
on every RAW socket destruction, so that rtnl_mutex is taken
every time. This scales very sadly. I observe cleanup_net()
spending a lot of time in rtnl_lock() and raw_close() is one
of the biggest rtnl user (since we have percpu net->ipv4.icmp_sk).

This patchset reworks the locking: reverts the problem commit
and its descendant, and introduces rtnl-independent locking.
This may have a continuation, and someone may work on killing
rtnl_lock() in mrtsock_destruct() in the future.

v3: Change patches order: [2/5] and [3/5].
v2: Fix sparse warning [4/5], as reported by kbuild test robot.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f2d254fa d9ff3049
Loading
Loading
Loading
Loading
+11 −2
Original line number Diff line number Diff line
@@ -91,6 +91,17 @@ static inline int inet_sdif(struct sk_buff *skb)
	return 0;
}

/* Special input handler for packets caught by router alert option.
   They are selected only by protocol field, and then processed likely
   local ones; but only if someone wants them! Otherwise, router
   not running rsvpd will kill RSVP.

   It is user level problem, what it will make with them.
   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
   but receiver should be enough clever f.e. to forward mtrace requests,
   sent to multicast group to reach destination designated router.
 */

struct ip_ra_chain {
	struct ip_ra_chain __rcu *next;
	struct sock		*sk;
@@ -101,8 +112,6 @@ struct ip_ra_chain {
	struct rcu_head		rcu;
};

extern struct ip_ra_chain __rcu *ip_ra_chain;

/* IP flags. */
#define IP_CE		0x8000		/* Flag: "Congestion"		*/
#define IP_DF		0x4000		/* Flag: "Don't Fragment"	*/
+2 −0
Original line number Diff line number Diff line
@@ -49,6 +49,8 @@ struct netns_ipv4 {
#endif
	struct ipv4_devconf	*devconf_all;
	struct ipv4_devconf	*devconf_dflt;
	struct ip_ra_chain __rcu *ra_chain;
	struct mutex		ra_mutex;
#ifdef CONFIG_IP_MULTIPLE_TABLES
	struct fib_rules_ops	*rules_ops;
	bool			fib_has_custom_rules;
+1 −0
Original line number Diff line number Diff line
@@ -301,6 +301,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
	net->user_ns = user_ns;
	idr_init(&net->netns_ids);
	spin_lock_init(&net->nsid_lock);
	mutex_init(&net->ipv4.ra_mutex);

	list_for_each_entry(ops, &pernet_list, list) {
		error = ops_init(ops, net);
+2 −3
Original line number Diff line number Diff line
@@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
	struct net_device *dev = skb->dev;
	struct net *net = dev_net(dev);

	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
	for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
		struct sock *sk = ra->sk;

		/* If socket is bound to an interface, only report
@@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
		 */
		if (sk && inet_sk(sk)->inet_num == protocol &&
		    (!sk->sk_bound_dev_if ||
		     sk->sk_bound_dev_if == dev->ifindex) &&
		    net_eq(sock_net(sk), net)) {
		     sk->sk_bound_dev_if == dev->ifindex)) {
			if (ip_is_fragment(ip_hdr(skb))) {
				if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
					return true;
+13 −21
Original line number Diff line number Diff line
@@ -322,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
	return 0;
}


/* Special input handler for packets caught by router alert option.
   They are selected only by protocol field, and then processed likely
   local ones; but only if someone wants them! Otherwise, router
   not running rsvpd will kill RSVP.

   It is user level problem, what it will make with them.
   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
   but receiver should be enough clever f.e. to forward mtrace requests,
   sent to multicast group to reach destination designated router.
 */
struct ip_ra_chain __rcu *ip_ra_chain;


static void ip_ra_destroy_rcu(struct rcu_head *head)
{
	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
@@ -349,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on,
{
	struct ip_ra_chain *ra, *new_ra;
	struct ip_ra_chain __rcu **rap;
	struct net *net = sock_net(sk);

	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
		return -EINVAL;

	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;

	for (rap = &ip_ra_chain;
	     (ra = rtnl_dereference(*rap)) != NULL;
	mutex_lock(&net->ipv4.ra_mutex);
	for (rap = &net->ipv4.ra_chain;
	     (ra = rcu_dereference_protected(*rap,
			lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
	     rap = &ra->next) {
		if (ra->sk == sk) {
			if (on) {
				mutex_unlock(&net->ipv4.ra_mutex);
				kfree(new_ra);
				return -EADDRINUSE;
			}
			/* dont let ip_call_ra_chain() use sk again */
			ra->sk = NULL;
			RCU_INIT_POINTER(*rap, ra->next);
			mutex_unlock(&net->ipv4.ra_mutex);

			if (ra->destructor)
				ra->destructor(sk);
@@ -379,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on,
			return 0;
		}
	}
	if (!new_ra)
	if (!new_ra) {
		mutex_unlock(&net->ipv4.ra_mutex);
		return -ENOBUFS;
	}
	new_ra->sk = sk;
	new_ra->destructor = destructor;

	RCU_INIT_POINTER(new_ra->next, ra);
	rcu_assign_pointer(*rap, new_ra);
	sock_hold(sk);
	mutex_unlock(&net->ipv4.ra_mutex);

	return 0;
}
@@ -586,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname)
	case MCAST_LEAVE_GROUP:
	case MCAST_LEAVE_SOURCE_GROUP:
	case MCAST_UNBLOCK_SOURCE:
	case IP_ROUTER_ALERT:
		return true;
	}
	return false;
@@ -639,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,

	/* If optlen==0, it is equivalent to val == 0 */

	if (optname == IP_ROUTER_ALERT)
		return ip_ra_control(sk, val ? 1 : 0, NULL);
	if (ip_mroute_opt(optname))
		return ip_mroute_setsockopt(sk, optname, optval, optlen);

@@ -1149,9 +1144,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
			goto e_inval;
		inet->mc_all = val;
		break;
	case IP_ROUTER_ALERT:
		err = ip_ra_control(sk, val ? 1 : 0, NULL);
		break;

	case IP_FREEBIND:
		if (optlen < 1)
Loading