Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 079096f1 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

tcp/dccp: install syn_recv requests into ehash table



In this patch, we insert request sockets into TCP/DCCP
regular ehash table (where ESTABLISHED and TIMEWAIT sockets
are) instead of using the per listener hash table.

ACK packets find SYN_RECV pseudo sockets without having
to find and lock the listener.

In nominal conditions, this halves pressure on listener lock.

Note that this will allow for SO_REUSEPORT refinements,
so that we can select a listener using cpu/numa affinities instead
of the prior 'consistent hash', since only SYN packets will
apply this selection logic.

We will shrink listen_sock in the following patch to ease
code review.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Ying Cai <ycai@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 2feda341
Loading
Loading
Loading
Loading
+0 −4
Original line number Original line Diff line number Diff line
@@ -258,10 +258,6 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,


struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);


struct request_sock *inet_csk_search_req(struct sock *sk,
					 const __be16 rport,
					 const __be32 raddr,
					 const __be32 laddr);
int inet_csk_bind_conflict(const struct sock *sk,
int inet_csk_bind_conflict(const struct sock *sk,
			   const struct inet_bind_bucket *tb, bool relax);
			   const struct inet_bind_bucket *tb, bool relax);
int inet_csk_get_port(struct sock *sk, unsigned short snum);
int inet_csk_get_port(struct sock *sk, unsigned short snum);
+1 −0
Original line number Original line Diff line number Diff line
@@ -205,6 +205,7 @@ void inet_put_port(struct sock *sk);


void inet_hashinfo_init(struct inet_hashinfo *h);
void inet_hashinfo_init(struct inet_hashinfo *h);


int inet_ehash_insert(struct sock *sk, struct sock *osk);
void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
void __inet_hash(struct sock *sk, struct sock *osk);
void __inet_hash(struct sock *sk, struct sock *osk);
void inet_hash(struct sock *sk);
void inet_hash(struct sock *sk);
+0 −4
Original line number Original line Diff line number Diff line
@@ -266,8 +266,4 @@ static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
	return reqsk_queue_len(queue) >> queue->listen_opt->max_qlen_log;
	return reqsk_queue_len(queue) >> queue->listen_opt->max_qlen_log;
}
}


void reqsk_queue_hash_req(struct request_sock_queue *queue,
			  u32 hash, struct request_sock *req,
			  unsigned long timeout);

#endif /* _REQUEST_SOCK_H */
#endif /* _REQUEST_SOCK_H */
+0 −3
Original line number Original line Diff line number Diff line
@@ -1618,7 +1618,6 @@ static inline bool tcp_stream_is_thin(struct tcp_sock *tp)
/* /proc */
/* /proc */
enum tcp_seq_states {
enum tcp_seq_states {
	TCP_SEQ_STATE_LISTENING,
	TCP_SEQ_STATE_LISTENING,
	TCP_SEQ_STATE_OPENREQ,
	TCP_SEQ_STATE_ESTABLISHED,
	TCP_SEQ_STATE_ESTABLISHED,
};
};


@@ -1717,8 +1716,6 @@ struct tcp_request_sock_ops {
	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
	int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
			   struct flowi *fl, struct request_sock *req,
			   struct flowi *fl, struct request_sock *req,
			   u16 queue_mapping, struct tcp_fastopen_cookie *foc);
			   u16 queue_mapping, struct tcp_fastopen_cookie *foc);
	void (*queue_hash_add)(struct sock *sk, struct request_sock *req,
			       const unsigned long timeout);
};
};


#ifdef CONFIG_SYN_COOKIES
#ifdef CONFIG_SYN_COOKIES
+1 −27
Original line number Original line Diff line number Diff line
@@ -99,35 +99,9 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(


void reqsk_queue_destroy(struct request_sock_queue *queue)
void reqsk_queue_destroy(struct request_sock_queue *queue)
{
{
	/* make all the listen_opt local to us */
	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);


	if (reqsk_queue_len(queue) != 0) {
	/* cleaning is done by req timers */
		unsigned int i;

		for (i = 0; i < lopt->nr_table_entries; i++) {
			struct request_sock *req;

			spin_lock_bh(&queue->syn_wait_lock);
			while ((req = lopt->syn_table[i]) != NULL) {
				lopt->syn_table[i] = req->dl_next;
				/* Because of following del_timer_sync(),
				 * we must release the spinlock here
				 * or risk a dead lock.
				 */
				spin_unlock_bh(&queue->syn_wait_lock);
				atomic_dec(&queue->qlen);
				if (del_timer_sync(&req->rsk_timer))
					reqsk_put(req);
				reqsk_put(req);
				spin_lock_bh(&queue->syn_wait_lock);
			}
			spin_unlock_bh(&queue->syn_wait_lock);
		}
	}

	if (WARN_ON(reqsk_queue_len(queue) != 0))
		pr_err("qlen %u\n", reqsk_queue_len(queue));
	kvfree(lopt);
	kvfree(lopt);
}
}


Loading