Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 15f41e2b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-udp-misc'



Eric Dumazet says:

====================
net: various udp/tcp changes

First round of patches for linux-4.7

Add a generic facility for sockets to be freed after an RCU grace
period, if they need to.

Then UDP stack is changed to no longer use SLAB_DESTROY_BY_RCU,
in order to speedup rx processing for traffic encapsulated in UDP.
It gives a 17 % speedup for normal UDP reception in stress conditions.

Then TCP listeners are changed to use SOCK_RCU_FREE as well
to avoid touching sk_refcnt in synflood case :
I got up to 30 % performance increase for a mono listener.

Then three patches add SK_MEMINFO_DROPS to sock_diag
and add per socket rx drops accounting to TCP.

Last patch adds rate limiting on ACK sent on behalf of SYN_RECV
to better resist to SYNFLOOD targeting one or few flows.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 43e2dfb2 4ce7e93c
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
	return udp_sk(sk)->no_check6_rx;
}

#define udp_portaddr_for_each_entry(__sk, node, list) \
	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
#define udp_portaddr_for_each_entry(__sk, list) \
	hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)

#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
	hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
#define udp_portaddr_for_each_entry_rcu(__sk, list) \
	hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)

#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)

+8 −4
Original line number Diff line number Diff line
@@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net,
					  const __be16 sport,
					  const struct in6_addr *daddr,
					  const u16 hnum,
					  const int dif)
					  const int dif,
					  bool *refcounted)
{
	struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
						sport, daddr, hnum, dif);
	*refcounted = true;
	if (sk)
		return sk;

	*refcounted = false;
	return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
				     daddr, hnum, dif);
}
@@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
					      struct sk_buff *skb, int doff,
					      const __be16 sport,
					      const __be16 dport,
					      int iif)
					      int iif,
					      bool *refcounted)
{
	struct sock *sk = skb_steal_sock(skb);

	*refcounted = true;
	if (sk)
		return sk;

	return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
			      doff, &ipv6_hdr(skb)->saddr, sport,
			      &ipv6_hdr(skb)->daddr, ntohs(dport),
			      iif);
			      iif, refcounted);
}

struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+25 −22
Original line number Diff line number Diff line
@@ -100,14 +100,10 @@ struct inet_bind_hashbucket {

/*
 * Sockets can be hashed in established or listening table
 * We must use different 'nulls' end-of-chain value for listening
 * hash table, or we might find a socket that was closed and
 * reallocated/inserted into established hash table
 */
#define LISTENING_NULLS_BASE (1U << 29)
struct inet_listen_hashbucket {
	spinlock_t		lock;
	struct hlist_nulls_head	head;
	struct hlist_head	head;
};

/* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -280,11 +276,8 @@ static inline struct sock *inet_lookup_listener(struct net *net,
	 net_eq(sock_net(__sk), (__net)))
#endif /* 64-bit arch */

/*
 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
 * not check it for lookups anymore, thanks Alexey. -DaveM
 *
 * Local BH must be disabled here.
 */
struct sock *__inet_lookup_established(struct net *net,
				       struct inet_hashinfo *hashinfo,
@@ -307,13 +300,19 @@ static inline struct sock *__inet_lookup(struct net *net,
					 struct sk_buff *skb, int doff,
					 const __be32 saddr, const __be16 sport,
					 const __be32 daddr, const __be16 dport,
					 const int dif)
					 const int dif,
					 bool *refcounted)
{
	u16 hnum = ntohs(dport);
	struct sock *sk = __inet_lookup_established(net, hashinfo,
				saddr, sport, daddr, hnum, dif);
	struct sock *sk;

	return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
	sk = __inet_lookup_established(net, hashinfo, saddr, sport,
				       daddr, hnum, dif);
	*refcounted = true;
	if (sk)
		return sk;
	*refcounted = false;
	return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
				      sport, daddr, hnum, dif);
}

@@ -325,12 +324,13 @@ static inline struct sock *inet_lookup(struct net *net,
				       const int dif)
{
	struct sock *sk;
	bool refcounted;

	local_bh_disable();
	sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
			   dport, dif);
	local_bh_enable();
			   dport, dif, &refcounted);

	if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
		sk = NULL;
	return sk;
}

@@ -338,17 +338,20 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
					     struct sk_buff *skb,
					     int doff,
					     const __be16 sport,
					     const __be16 dport)
					     const __be16 dport,
					     bool *refcounted)
{
	struct sock *sk = skb_steal_sock(skb);
	const struct iphdr *iph = ip_hdr(skb);

	*refcounted = true;
	if (sk)
		return sk;
	else

	return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
			     doff, iph->saddr, sport,
				     iph->daddr, dport, inet_iif(skb));
			     iph->daddr, dport, inet_iif(skb),
			     refcounted);
}

u32 sk_ehashfn(const struct sock *sk);
+15 −16
Original line number Diff line number Diff line
@@ -85,24 +85,23 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
	struct request_sock *req;

	req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN);

	if (req) {
		req->rsk_ops = ops;
	if (!req)
		return NULL;
	req->rsk_listener = NULL;
	if (attach_listener) {
			sock_hold(sk_listener);
		if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) {
			kmem_cache_free(ops->slab, req);
			return NULL;
		}
		req->rsk_listener = sk_listener;
		} else {
			req->rsk_listener = NULL;
	}
	req->rsk_ops = ops;
	req_to_sk(req)->sk_prot = sk_listener->sk_prot;
	sk_node_init(&req_to_sk(req)->sk_node);
	sk_tx_queue_clear(req_to_sk(req));
	req->saved_syn = NULL;
		/* Following is temporary. It is coupled with debugging
		 * helpers in reqsk_put() & reqsk_free()
		 */
	atomic_set(&req->rsk_refcnt, 0);
	}

	return req;
}

+15 −6
Original line number Diff line number Diff line
@@ -178,7 +178,7 @@ struct sock_common {
	int			skc_bound_dev_if;
	union {
		struct hlist_node	skc_bind_node;
		struct hlist_nulls_node skc_portaddr_node;
		struct hlist_node	skc_portaddr_node;
	};
	struct proto		*skc_prot;
	possible_net_t		skc_net;
@@ -438,6 +438,7 @@ struct sock {
						  struct sk_buff *skb);
	void                    (*sk_destruct)(struct sock *sk);
	struct sock_reuseport __rcu	*sk_reuseport_cb;
	struct rcu_head		sk_rcu;
};

#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
@@ -669,18 +670,18 @@ static inline void sk_add_bind_node(struct sock *sk,
	hlist_for_each_entry(__sk, list, sk_bind_node)

/**
 * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset
 * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
 * @tpos:	the type * to use as a loop cursor.
 * @pos:	the &struct hlist_node to use as a loop cursor.
 * @head:	the head for your list.
 * @offset:	offset of hlist_node within the struct.
 *
 */
#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset)		       \
	for (pos = (head)->first;					       \
	     (!is_a_nulls(pos)) &&					       \
#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset)		       \
	for (pos = rcu_dereference((head)->first);			       \
	     pos != NULL &&						       \
		({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;});       \
	     pos = pos->next)
	     pos = rcu_dereference(pos->next))

static inline struct user_namespace *sk_user_ns(struct sock *sk)
{
@@ -720,6 +721,7 @@ enum sock_flags {
		     */
	SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
	SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
};

#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -2010,6 +2012,13 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
	SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
}

static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
{
	int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);

	atomic_add(segs, &sk->sk_drops);
}

void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
			   struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
Loading