Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f3f05f70 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller
Browse files

[INET]: Generalise the tcp_listen_ lock routines

parent 6e04e021
Loading
Loading
Loading
Loading
+48 −0
Original line number Diff line number Diff line
@@ -19,10 +19,14 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/tcp.h>		/* only for TCP_LISTEN, damn :-( */
#include <linux/types.h>
#include <linux/wait.h>

#include <net/sock.h>

#include <asm/atomic.h>

/* This is for all connections with a full identity, no wildcards.
 * New scheme, half the table is for TIME_WAIT, the other half is
 * for the rest.  I'll experiment with dynamic table growth later.
@@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table,

extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);

extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);

/*
 * - We may sleep inside this lock.
 * - If sleeping is not required (or called from BH),
 *   use plain read_(un)lock(&inet_hashinfo.lhash_lock).
 */
static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
{
	/* read_lock synchronizes to candidates to writers */
	read_lock(&hashinfo->lhash_lock);
	atomic_inc(&hashinfo->lhash_users);
	read_unlock(&hashinfo->lhash_lock);
}

static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
{
	if (atomic_dec_and_test(&hashinfo->lhash_users))
		wake_up(&hashinfo->lhash_wait);
}

static inline void __inet_hash(struct inet_hashinfo *hashinfo,
			       struct sock *sk, const int listen_possible)
{
	struct hlist_head *list;
	rwlock_t *lock;

	BUG_TRAP(sk_unhashed(sk));
	if (listen_possible && sk->sk_state == TCP_LISTEN) {
		list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
		lock = &hashinfo->lhash_lock;
		inet_listen_wlock(hashinfo);
	} else {
		sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size);
		list = &hashinfo->ehash[sk->sk_hashent].chain;
		lock = &hashinfo->ehash[sk->sk_hashent].lock;
		write_lock(lock);
	}
	__sk_add_node(sk, list);
	sock_prot_inc_use(sk->sk_prot);
	write_unlock(lock);
	if (listen_possible && sk->sk_state == TCP_LISTEN)
		wake_up(&hashinfo->lhash_wait);
}
#endif /* _INET_HASHTABLES_H */
+0 −21
Original line number Diff line number Diff line
@@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,

extern void tcp_enter_memory_pressure(void);

extern void tcp_listen_wlock(void);

/* - We may sleep inside this lock.
 * - If sleeping is not required (or called from BH),
 *   use plain read_(un)lock(&inet_hashinfo.lhash_lock).
 */

static inline void tcp_listen_lock(void)
{
	/* read_lock synchronizes to candidates to writers */
	read_lock(&tcp_hashinfo.lhash_lock);
	atomic_inc(&tcp_hashinfo.lhash_users);
	read_unlock(&tcp_hashinfo.lhash_lock);
}

static inline void tcp_listen_unlock(void)
{
	if (atomic_dec_and_test(&tcp_hashinfo.lhash_users))
		wake_up(&tcp_hashinfo.lhash_wait);
}

static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
	return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
+32 −0
Original line number Diff line number Diff line
@@ -15,7 +15,9 @@

#include <linux/config.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/wait.h>

#include <net/inet_hashtables.h>

@@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
}

EXPORT_SYMBOL(inet_put_port);

/*
 * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
 * Look, when several writers sleep and reader wakes them up, all but one
 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 * this, _but_ remember, it adds useless work on UP machines (wake up each
 * exclusive lock release). It should be ifdefed really.
 */
void inet_listen_wlock(struct inet_hashinfo *hashinfo)
{
	write_lock(&hashinfo->lhash_lock);

	if (atomic_read(&hashinfo->lhash_users)) {
		DEFINE_WAIT(wait);

		for (;;) {
			prepare_to_wait_exclusive(&hashinfo->lhash_wait,
						  &wait, TASK_UNINTERRUPTIBLE);
			if (!atomic_read(&hashinfo->lhash_users))
				break;
			write_unlock_bh(&hashinfo->lhash_lock);
			schedule();
			write_lock_bh(&hashinfo->lhash_lock);
		}

		finish_wait(&hashinfo->lhash_wait, &wait);
	}
}

EXPORT_SYMBOL(inet_listen_wlock);
+4 −4
Original line number Diff line number Diff line
@@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
	if (cb->args[0] == 0) {
		if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
			goto skip_listen_ht;
		tcp_listen_lock();
		inet_listen_lock(&tcp_hashinfo);
		for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
			struct sock *sk;
			struct hlist_node *node;
@@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
					goto syn_recv;

				if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
					tcp_listen_unlock();
					inet_listen_unlock(&tcp_hashinfo);
					goto done;
				}

@@ -622,7 +622,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
					goto next_listen;

				if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
					tcp_listen_unlock();
					inet_listen_unlock(&tcp_hashinfo);
					goto done;
				}

@@ -636,7 +636,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
			cb->args[3] = 0;
			cb->args[4] = 0;
		}
		tcp_listen_unlock();
		inet_listen_unlock(&tcp_hashinfo);
skip_listen_ht:
		cb->args[0] = 1;
		s_i = num = s_num = 0;
+9 −61
Original line number Diff line number Diff line
@@ -228,62 +228,11 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
	return ret;
}

/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
 * Look, when several writers sleep and reader wakes them up, all but one
 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 * this, _but_ remember, it adds useless work on UP machines (wake up each
 * exclusive lock release). It should be ifdefed really.
 */

void tcp_listen_wlock(void)
{
	write_lock(&tcp_hashinfo.lhash_lock);

	if (atomic_read(&tcp_hashinfo.lhash_users)) {
		DEFINE_WAIT(wait);

		for (;;) {
			prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait,
						&wait, TASK_UNINTERRUPTIBLE);
			if (!atomic_read(&tcp_hashinfo.lhash_users))
				break;
			write_unlock_bh(&tcp_hashinfo.lhash_lock);
			schedule();
			write_lock_bh(&tcp_hashinfo.lhash_lock);
		}

		finish_wait(&tcp_hashinfo.lhash_wait, &wait);
	}
}

static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
{
	struct hlist_head *list;
	rwlock_t *lock;

	BUG_TRAP(sk_unhashed(sk));
	if (listen_possible && sk->sk_state == TCP_LISTEN) {
		list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
		lock = &tcp_hashinfo.lhash_lock;
		tcp_listen_wlock();
	} else {
		sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
		list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
		lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
		write_lock(lock);
	}
	__sk_add_node(sk, list);
	sock_prot_inc_use(sk->sk_prot);
	write_unlock(lock);
	if (listen_possible && sk->sk_state == TCP_LISTEN)
		wake_up(&tcp_hashinfo.lhash_wait);
}

static void tcp_v4_hash(struct sock *sk)
{
	if (sk->sk_state != TCP_CLOSE) {
		local_bh_disable();
		__tcp_v4_hash(sk, 1);
		__inet_hash(&tcp_hashinfo, sk, 1);
		local_bh_enable();
	}
}
@@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk)

	if (sk->sk_state == TCP_LISTEN) {
		local_bh_disable();
		tcp_listen_wlock();
		inet_listen_wlock(&tcp_hashinfo);
		lock = &tcp_hashinfo.lhash_lock;
	} else {
		struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent];
@@ -624,7 +573,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
 		inet_bind_hash(sk, tb, port);
		if (sk_unhashed(sk)) {
 			inet_sk(sk)->sport = htons(port);
 			__tcp_v4_hash(sk, 0);
 			__inet_hash(&tcp_hashinfo, sk, 0);
 		}
 		spin_unlock(&head->lock);

@@ -641,7 +590,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
 	tb  = inet_sk(sk)->bind_hash;
	spin_lock_bh(&head->lock);
	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
		__tcp_v4_hash(sk, 0);
		__inet_hash(&tcp_hashinfo, sk, 0);
		spin_unlock_bh(&head->lock);
		return 0;
	} else {
@@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
	tcp_initialize_rcv_mss(newsk);

	__tcp_v4_hash(newsk, 0);
	__inet_hash(&tcp_hashinfo, newsk, 0);
	__inet_inherit_port(&tcp_hashinfo, sk, newsk);

	return newsk;
@@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
	void *rc;
	struct tcp_iter_state* st = seq->private;

	tcp_listen_lock();
	inet_listen_lock(&tcp_hashinfo);
	st->state = TCP_SEQ_STATE_LISTENING;
	rc	  = listening_get_idx(seq, &pos);

	if (!rc) {
		tcp_listen_unlock();
		inet_listen_unlock(&tcp_hashinfo);
		local_bh_disable();
		st->state = TCP_SEQ_STATE_ESTABLISHED;
		rc	  = established_get_idx(seq, pos);
@@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
	case TCP_SEQ_STATE_LISTENING:
		rc = listening_get_next(seq, v);
		if (!rc) {
			tcp_listen_unlock();
			inet_listen_unlock(&tcp_hashinfo);
			local_bh_disable();
			st->state = TCP_SEQ_STATE_ESTABLISHED;
			rc	  = established_get_first(seq);
@@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
		}
	case TCP_SEQ_STATE_LISTENING:
		if (v != SEQ_START_TOKEN)
			tcp_listen_unlock();
			inet_listen_unlock(&tcp_hashinfo);
		break;
	case TCP_SEQ_STATE_TIME_WAIT:
	case TCP_SEQ_STATE_ESTABLISHED:
@@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops)
EXPORT_SYMBOL(ipv4_specific);
EXPORT_SYMBOL(inet_bind_bucket_create);
EXPORT_SYMBOL(tcp_hashinfo);
EXPORT_SYMBOL(tcp_listen_wlock);
EXPORT_SYMBOL(tcp_prot);
EXPORT_SYMBOL(tcp_unhash);
EXPORT_SYMBOL(tcp_v4_conn_request);
Loading