Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 21e4902a authored by Thomas Graf's avatar Thomas Graf Committed by David S. Miller
Browse files

netlink: Lockless lookup with RCU grace period in socket release



Defers the release of the socket reference using call_rcu() to
allow using an RCU read-side protected call to rhashtable_lookup()

This restores behaviour and performance gains as previously
introduced by e341694e ("netlink: Convert netlink_lookup() to use
RCU protected hash table") without the side effect of severely
delayed socket destruction.

Signed-off-by: default avatarThomas Graf <tgraf@suug.ch>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f89bd6f8
Loading
Loading
Loading
Loading
+16 −16
Original line number Original line Diff line number Diff line
@@ -97,12 +97,12 @@ static int netlink_dump(struct sock *sk);
static void netlink_skb_destructor(struct sk_buff *skb);
static void netlink_skb_destructor(struct sk_buff *skb);


/* nl_table locking explained:
/* nl_table locking explained:
 * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock
 * Lookup and traversal are protected with an RCU read-side lock. Insertion
 * combined with an RCU read-side lock. Insertion and removal are protected
 * and removal are protected with nl_sk_hash_lock while using RCU list
 * with nl_sk_hash_lock while using RCU list modification primitives and may
 * modification primitives and may run in parallel to RCU protected lookups.
 * run in parallel to nl_table_lock protected lookups. Destruction of the
 * Destruction of the Netlink socket may only occur *after* nl_table_lock has
 * Netlink socket may only occur *after* nl_table_lock has been acquired
 * been acquired * either during or after the socket has been removed from
 * either during or after the socket has been removed from the list.
 * the list and after an RCU grace period.
 */
 */
DEFINE_RWLOCK(nl_table_lock);
DEFINE_RWLOCK(nl_table_lock);
EXPORT_SYMBOL_GPL(nl_table_lock);
EXPORT_SYMBOL_GPL(nl_table_lock);
@@ -1003,13 +1003,11 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
	struct netlink_table *table = &nl_table[protocol];
	struct netlink_table *table = &nl_table[protocol];
	struct sock *sk;
	struct sock *sk;


	read_lock(&nl_table_lock);
	rcu_read_lock();
	rcu_read_lock();
	sk = __netlink_lookup(table, portid, net);
	sk = __netlink_lookup(table, portid, net);
	if (sk)
	if (sk)
		sock_hold(sk);
		sock_hold(sk);
	rcu_read_unlock();
	rcu_read_unlock();
	read_unlock(&nl_table_lock);


	return sk;
	return sk;
}
}
@@ -1183,6 +1181,13 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
	goto out;
	goto out;
}
}


static void deferred_put_nlk_sk(struct rcu_head *head)
{
	struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);

	sock_put(&nlk->sk);
}

static int netlink_release(struct socket *sock)
static int netlink_release(struct socket *sock)
{
{
	struct sock *sk = sock->sk;
	struct sock *sk = sock->sk;
@@ -1248,7 +1253,7 @@ static int netlink_release(struct socket *sock)
	local_bh_disable();
	local_bh_disable();
	sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
	sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
	local_bh_enable();
	local_bh_enable();
	sock_put(sk);
	call_rcu(&nlk->rcu, deferred_put_nlk_sk);
	return 0;
	return 0;
}
}


@@ -1263,7 +1268,6 @@ static int netlink_autobind(struct socket *sock)


retry:
retry:
	cond_resched();
	cond_resched();
	netlink_table_grab();
	rcu_read_lock();
	rcu_read_lock();
	if (__netlink_lookup(table, portid, net)) {
	if (__netlink_lookup(table, portid, net)) {
		/* Bind collision, search negative portid values. */
		/* Bind collision, search negative portid values. */
@@ -1271,11 +1275,9 @@ static int netlink_autobind(struct socket *sock)
		if (rover > -4097)
		if (rover > -4097)
			rover = -4097;
			rover = -4097;
		rcu_read_unlock();
		rcu_read_unlock();
		netlink_table_ungrab();
		goto retry;
		goto retry;
	}
	}
	rcu_read_unlock();
	rcu_read_unlock();
	netlink_table_ungrab();


	err = netlink_insert(sk, net, portid);
	err = netlink_insert(sk, net, portid);
	if (err == -EADDRINUSE)
	if (err == -EADDRINUSE)
@@ -2910,9 +2912,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
}
}


static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
	__acquires(nl_table_lock) __acquires(RCU)
	__acquires(RCU)
{
{
	read_lock(&nl_table_lock);
	rcu_read_lock();
	rcu_read_lock();
	return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
	return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
}
@@ -2964,10 +2965,9 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
}


static void netlink_seq_stop(struct seq_file *seq, void *v)
static void netlink_seq_stop(struct seq_file *seq, void *v)
	__releases(RCU) __releases(nl_table_lock)
	__releases(RCU)
{
{
	rcu_read_unlock();
	rcu_read_unlock();
	read_unlock(&nl_table_lock);
}
}




+1 −0
Original line number Original line Diff line number Diff line
@@ -50,6 +50,7 @@ struct netlink_sock {
#endif /* CONFIG_NETLINK_MMAP */
#endif /* CONFIG_NETLINK_MMAP */


	struct rhash_head	node;
	struct rhash_head	node;
	struct rcu_head		rcu;
};
};


static inline struct netlink_sock *nlk_sk(struct sock *sk)
static inline struct netlink_sock *nlk_sk(struct sock *sk)