Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 750f2f91 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'listener_refactor_part_14'



Eric Dumazet says:

====================
inet: tcp listener refactoring part 14

OK, we have serious patches here.

We get rid of the central timer handling SYNACK rtx,
which is killing us under even medium SYN flood.

We still use the listener specific hash table.

This will be done in next round ;)
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a998f712 becb74f0
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -28,8 +28,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6,
				      const struct request_sock *req);

struct request_sock *inet6_csk_search_req(const struct sock *sk,
					  struct request_sock ***prevp,
struct request_sock *inet6_csk_search_req(struct sock *sk,
					  const __be16 rport,
					  const struct in6_addr *raddr,
					  const struct in6_addr *laddr,
+8 −18
Original line number Diff line number Diff line
@@ -256,8 +256,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,

struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);

struct request_sock *inet_csk_search_req(const struct sock *sk,
					 struct request_sock ***prevp,
struct request_sock *inet_csk_search_req(struct sock *sk,
					 const __be16 rport,
					 const __be32 raddr,
					 const __be32 laddr);
@@ -283,15 +282,13 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
						struct request_sock *req)
{
	if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
		inet_csk_delete_keepalive_timer(sk);
	reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
}

static inline void inet_csk_reqsk_queue_added(struct sock *sk,
					      const unsigned long timeout)
{
	if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
		inet_csk_reset_keepalive_timer(sk, timeout);
	reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue);
}

static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
@@ -310,26 +307,19 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
}

static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
					       struct request_sock *req,
					       struct request_sock **prev)
					       struct request_sock *req)
{
	reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
	reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req);
}

static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
					     struct request_sock *req,
					     struct request_sock **prev)
					     struct request_sock *req)
{
	inet_csk_reqsk_queue_unlink(sk, req, prev);
	inet_csk_reqsk_queue_unlink(sk, req);
	inet_csk_reqsk_queue_removed(sk, req);
	reqsk_free(req);
	reqsk_put(req);
}

void inet_csk_reqsk_queue_prune(struct sock *parent,
				const unsigned long interval,
				const unsigned long timeout,
				const unsigned long max_rto);

void inet_csk_destroy_sock(struct sock *sk);
void inet_csk_prepare_forced_close(struct sock *sk);

+50 −48
Original line number Diff line number Diff line
@@ -50,6 +50,7 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req);
struct request_sock {
	struct sock_common		__req_common;
#define rsk_refcnt			__req_common.skc_refcnt
#define rsk_hash			__req_common.skc_hash

	struct request_sock		*dl_next;
	struct sock			*rsk_listener;
@@ -61,7 +62,7 @@ struct request_sock {
	u32				window_clamp; /* window clamp at creation time */
	u32				rcv_wnd;	  /* rcv_wnd offered first time */
	u32				ts_recent;
	unsigned long			expires;
	struct timer_list		rsk_timer;
	const struct request_sock_ops	*rsk_ops;
	struct sock			*sk;
	u32				secid;
@@ -109,9 +110,6 @@ static inline void reqsk_free(struct request_sock *req)

static inline void reqsk_put(struct request_sock *req)
{
	/* temporary debugging, until req sock are put into ehash table */
	WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1);

	if (atomic_dec_and_test(&req->rsk_refcnt))
		reqsk_free(req);
}
@@ -123,12 +121,16 @@ extern int sysctl_max_syn_backlog;
 * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs
 */
struct listen_sock {
	u8			max_qlen_log;
	int			qlen_inc; /* protected by listener lock */
	int			young_inc;/* protected by listener lock */

	/* following fields can be updated by timer */
	atomic_t		qlen_dec; /* qlen = qlen_inc - qlen_dec */
	atomic_t		young_dec;

	u8			max_qlen_log ____cacheline_aligned_in_smp;
	u8			synflood_warned;
	/* 2 bytes hole, try to use */
	int			qlen;
	int			qlen_young;
	int			clock_hand;
	u32			hash_rnd;
	u32			nr_table_entries;
	struct request_sock	*syn_table[0];
@@ -181,9 +183,7 @@ struct fastopen_queue {
struct request_sock_queue {
	struct request_sock	*rskq_accept_head;
	struct request_sock	*rskq_accept_tail;
	rwlock_t		syn_wait_lock;
	u8			rskq_defer_accept;
	/* 3 bytes hole, try to pack */
	struct listen_sock	*listen_opt;
	struct fastopen_queue	*fastopenq; /* This is non-NULL iff TFO has been
					     * enabled on this listener. Check
@@ -191,6 +191,9 @@ struct request_sock_queue {
					     * to determine if TFO is enabled
					     * right at this moment.
					     */

	/* temporary alignment, our goal is to get rid of this lock */
	rwlock_t		syn_wait_lock ____cacheline_aligned_in_smp;
};

int reqsk_queue_alloc(struct request_sock_queue *queue,
@@ -216,12 +219,21 @@ static inline int reqsk_queue_empty(struct request_sock_queue *queue)
}

static inline void reqsk_queue_unlink(struct request_sock_queue *queue,
				      struct request_sock *req,
				      struct request_sock **prev_req)
				      struct request_sock *req)
{
	struct listen_sock *lopt = queue->listen_opt;
	struct request_sock **prev;

	write_lock(&queue->syn_wait_lock);
	*prev_req = req->dl_next;

	prev = &lopt->syn_table[req->rsk_hash];
	while (*prev != req)
		prev = &(*prev)->dl_next;
	*prev = req->dl_next;

	write_unlock(&queue->syn_wait_lock);
	if (del_timer(&req->rsk_timer))
		reqsk_put(req);
}

static inline void reqsk_queue_add(struct request_sock_queue *queue,
@@ -254,63 +266,53 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue
	return req;
}

static inline int reqsk_queue_removed(struct request_sock_queue *queue,
				      struct request_sock *req)
static inline void reqsk_queue_removed(struct request_sock_queue *queue,
				       const struct request_sock *req)
{
	struct listen_sock *lopt = queue->listen_opt;

	if (req->num_timeout == 0)
		--lopt->qlen_young;

	return --lopt->qlen;
		atomic_inc(&lopt->young_dec);
	atomic_inc(&lopt->qlen_dec);
}

static inline int reqsk_queue_added(struct request_sock_queue *queue)
static inline void reqsk_queue_added(struct request_sock_queue *queue)
{
	struct listen_sock *lopt = queue->listen_opt;
	const int prev_qlen = lopt->qlen;

	lopt->qlen_young++;
	lopt->qlen++;
	return prev_qlen;
	lopt->young_inc++;
	lopt->qlen_inc++;
}

static inline int listen_sock_qlen(const struct listen_sock *lopt)
{
	return lopt->qlen_inc - atomic_read(&lopt->qlen_dec);
}

static inline int listen_sock_young(const struct listen_sock *lopt)
{
	return lopt->young_inc - atomic_read(&lopt->young_dec);
}

static inline int reqsk_queue_len(const struct request_sock_queue *queue)
{
	return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0;
	const struct listen_sock *lopt = queue->listen_opt;

	return lopt ? listen_sock_qlen(lopt) : 0;
}

static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
{
	return queue->listen_opt->qlen_young;
	return listen_sock_young(queue->listen_opt);
}

static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
{
	return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
	return reqsk_queue_len(queue) >> queue->listen_opt->max_qlen_log;
}

static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
void reqsk_queue_hash_req(struct request_sock_queue *queue,
			  u32 hash, struct request_sock *req,
					unsigned long timeout)
{
	struct listen_sock *lopt = queue->listen_opt;

	req->expires = jiffies + timeout;
	req->num_retrans = 0;
	req->num_timeout = 0;
	req->sk = NULL;
	req->dl_next = lopt->syn_table[hash];

	/* before letting lookups find us, make sure all req fields
	 * are committed to memory and refcnt initialized.
	 */
	smp_wmb();
	atomic_set(&req->rsk_refcnt, 1);

	write_lock(&queue->syn_wait_lock);
	lopt->syn_table[hash] = req;
	write_unlock(&queue->syn_wait_lock);
}
			  unsigned long timeout);

#endif /* _REQUEST_SOCK_H */
+2 −2
Original line number Diff line number Diff line
@@ -405,8 +405,8 @@ struct sock {
	rwlock_t		sk_callback_lock;
	int			sk_err,
				sk_err_soft;
	unsigned short		sk_ack_backlog;
	unsigned short		sk_max_ack_backlog;
	u32			sk_ack_backlog;
	u32			sk_max_ack_backlog;
	__u32			sk_priority;
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
	__u32			sk_cgrp_prioidx;
+1 −2
Original line number Diff line number Diff line
@@ -406,8 +406,7 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
					      struct sk_buff *skb,
					      const struct tcphdr *th);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
			   struct request_sock *req, struct request_sock **prev,
			   bool fastopen);
			   struct request_sock *req, bool fastopen);
int tcp_child_process(struct sock *parent, struct sock *child,
		      struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
Loading