Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 76973dd7 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'setsockopt_incoming_cpu'



Eric Dumazet says:

====================
tcp: better smp listener behavior

As promised in last patch series, we implement a better SO_REUSEPORT
strategy, based on cpu hints if given by the application.

We also moved sk_refcnt out of the cache line containing the lookup
keys, as it was considerably slowing down smp operations because
of false sharing. This was simpler than converting listen sockets
to conventional RCU (to avoid sk_refcnt dirtying)

Could process 6.0 Mpps SYN instead of 4.2 Mpps on my test server.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c7d39e32 d475f090
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -356,8 +356,8 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)

struct tcp_timewait_sock {
	struct inet_timewait_sock tw_sk;
	u32			  tw_rcv_nxt;
	u32			  tw_snd_nxt;
#define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt
#define tw_snd_nxt tw_sk.__tw_common.skc_tw_snd_nxt
	u32			  tw_rcv_wnd;
	u32			  tw_ts_offset;
	u32			  tw_ts_recent;
+1 −1
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ struct inet_timewait_sock {
#define tw_dport		__tw_common.skc_dport
#define tw_num			__tw_common.skc_num
#define tw_cookie		__tw_common.skc_cookie
#define tw_dr			__tw_common.skc_tw_dr

	int			tw_timeout;
	volatile unsigned char	tw_substate;
@@ -88,7 +89,6 @@ struct inet_timewait_sock {
	kmemcheck_bitfield_end(flags);
	struct timer_list	tw_timer;
	struct inet_bind_bucket	*tw_tb;
	struct inet_timewait_death_row *tw_dr;
};
#define tw_tclass tw_tos

+3 −4
Original line number Diff line number Diff line
@@ -50,16 +50,15 @@ struct request_sock {
	struct sock_common		__req_common;
#define rsk_refcnt			__req_common.skc_refcnt
#define rsk_hash			__req_common.skc_hash
#define rsk_listener			__req_common.skc_listener
#define rsk_window_clamp		__req_common.skc_window_clamp
#define rsk_rcv_wnd			__req_common.skc_rcv_wnd

	struct request_sock		*dl_next;
	struct sock			*rsk_listener;
	u16				mss;
	u8				num_retrans; /* number of retransmits */
	u8				cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
	u8				num_timeout:7; /* number of timeouts */
	/* The following two fields can be easily recomputed I think -AK */
	u32				window_clamp; /* window clamp at creation time */
	u32				rcv_wnd;	  /* rcv_wnd offered first time */
	u32				ts_recent;
	struct timer_list		rsk_timer;
	const struct request_sock_ops	*rsk_ops;
+28 −13
Original line number Diff line number Diff line
@@ -150,6 +150,10 @@ typedef __u64 __bitwise __addrpair;
 *	@skc_node: main hash linkage for various protocol lookup tables
 *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
 *	@skc_tx_queue_mapping: tx queue number for this connection
 *	@skc_flags: place holder for sk_flags
 *		%SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
 *		%SO_OOBINLINE settings, %SO_TIMESTAMPING settings
 *	@skc_incoming_cpu: record/match cpu processing incoming packets
 *	@skc_refcnt: reference count
 *
 *	This is the minimal network layer representation of sockets, the header
@@ -200,6 +204,16 @@ struct sock_common {

	atomic64_t		skc_cookie;

	/* following fields are padding to force
	 * offset(struct sock, sk_refcnt) == 128 on 64bit arches
	 * assuming IPV6 is enabled. We use this padding differently
	 * for different kind of 'sockets'
	 */
	union {
		unsigned long	skc_flags;
		struct sock	*skc_listener; /* request_sock */
		struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */
	};
	/*
	 * fields between dontcopy_begin/dontcopy_end
	 * are not copied in sock_copy()
@@ -212,9 +226,20 @@ struct sock_common {
		struct hlist_nulls_node skc_nulls_node;
	};
	int			skc_tx_queue_mapping;
	union {
		int		skc_incoming_cpu;
		u32		skc_rcv_wnd;
		u32		skc_tw_rcv_nxt; /* struct tcp_timewait_sock  */
	};

	atomic_t		skc_refcnt;
	/* private: */
	int                     skc_dontcopy_end[0];
	union {
		u32		skc_rxhash;
		u32		skc_window_clamp;
		u32		skc_tw_snd_nxt; /* struct tcp_timewait_sock */
	};
	/* public: */
};

@@ -243,8 +268,6 @@ struct cg_proto;
  *	@sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
  *	@sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
  *	@sk_sndbuf: size of send buffer in bytes
  *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
  *		   %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
  *	@sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
  *	@sk_no_check_rx: allow zero checksum in RX packets
  *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
@@ -273,8 +296,6 @@ struct cg_proto;
  *	@sk_rcvlowat: %SO_RCVLOWAT setting
  *	@sk_rcvtimeo: %SO_RCVTIMEO setting
  *	@sk_sndtimeo: %SO_SNDTIMEO setting
  *	@sk_rxhash: flow hash received from netif layer
  *	@sk_incoming_cpu: record cpu processing incoming packets
  *	@sk_txhash: computed flow hash for use on transmit
  *	@sk_filter: socket filtering instructions
  *	@sk_timer: sock cleanup timer
@@ -331,6 +352,9 @@ struct sock {
#define sk_v6_daddr		__sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr	__sk_common.skc_v6_rcv_saddr
#define sk_cookie		__sk_common.skc_cookie
#define sk_incoming_cpu		__sk_common.skc_incoming_cpu
#define sk_flags		__sk_common.skc_flags
#define sk_rxhash		__sk_common.skc_rxhash

	socket_lock_t		sk_lock;
	struct sk_buff_head	sk_receive_queue;
@@ -350,14 +374,6 @@ struct sock {
	} sk_backlog;
#define sk_rmem_alloc sk_backlog.rmem_alloc
	int			sk_forward_alloc;
#ifdef CONFIG_RPS
	__u32			sk_rxhash;
#endif
	u16			sk_incoming_cpu;
	/* 16bit hole
	 * Warned : sk_incoming_cpu can be set from softirq,
	 * Do not use this hole without fully understanding possible issues.
	 */

	__u32			sk_txhash;
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -373,7 +389,6 @@ struct sock {
#ifdef CONFIG_XFRM
	struct xfrm_policy	*sk_policy[2];
#endif
	unsigned long 		sk_flags;
	struct dst_entry	*sk_rx_dst;
	struct dst_entry __rcu	*sk_dst_cache;
	spinlock_t		sk_dst_lock;
+5 −0
Original line number Diff line number Diff line
@@ -988,6 +988,10 @@ set_rcvbuf:
					 sk->sk_max_pacing_rate);
		break;

	case SO_INCOMING_CPU:
		sk->sk_incoming_cpu = val;
		break;

	default:
		ret = -ENOPROTOOPT;
		break;
@@ -2379,6 +2383,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)

	sk->sk_max_pacing_rate = ~0U;
	sk->sk_pacing_rate = ~0U;
	sk->sk_incoming_cpu = -1;
	/*
	 * Before updating sk_refcnt, we must commit prior changes to memory
	 * (Documentation/RCU/rculist_nulls.txt for details)
Loading