Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bb5b7c11 authored by David S. Miller's avatar David S. Miller
Browse files

tcp: Revert per-route SACK/DSACK/TIMESTAMP changes.



It creates a regression, triggering badness for SYN_RECV
sockets, for example:

[19148.022102] Badness at net/ipv4/inet_connection_sock.c:293
[19148.022570] NIP: c02a0914 LR: c02a0904 CTR: 00000000
[19148.023035] REGS: eeecbd30 TRAP: 0700   Not tainted  (2.6.32)
[19148.023496] MSR: 00029032 <EE,ME,CE,IR,DR>  CR: 24002442  XER: 00000000
[19148.024012] TASK = eee9a820[1756] 'privoxy' THREAD: eeeca000

This is likely caused by the change in the 'estab' parameter
passed to tcp_parse_options() when invoked by the functions
in net/ipv4/tcp_minisocks.c

But even if that is fixed, the ->conn_request() changes made in
this patch series is fundamentally wrong.  They try to use the
listening socket's 'dst' to probe the route settings.  The
listening socket doesn't even have a route, and you can't
get the right route (the child request one) until much later
after we setup all of the state, and it must be done by hand.

This stuff really isn't ready, so the best thing to do is a
full revert.  This reverts the following commits:

f55017a9
022c3f7d
1aba721e
cda42ebd
345cda2f
dc343475
05eaade2
6a2a2d6b

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 166a0fd4
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -368,11 +368,9 @@ enum {
#define RTAX_MAX (__RTAX_MAX - 1)

#define RTAX_FEATURE_ECN	0x00000001
#define RTAX_FEATURE_NO_SACK	0x00000002
#define RTAX_FEATURE_NO_TSTAMP	0x00000004
#define RTAX_FEATURE_SACK	0x00000002
#define RTAX_FEATURE_TIMESTAMP	0x00000004
#define RTAX_FEATURE_ALLFRAG	0x00000008
#define RTAX_FEATURE_NO_WSCALE	0x00000010
#define RTAX_FEATURE_NO_DSACK	0x00000020

struct rta_session {
	__u8	proto;
+1 −1
Original line number Diff line number Diff line
@@ -113,7 +113,7 @@ dst_metric(const struct dst_entry *dst, int metric)
static inline u32
dst_feature(const struct dst_entry *dst, u32 feature)
{
	return (dst ? dst_metric(dst, RTAX_FEATURES) & feature : 0);
	return dst_metric(dst, RTAX_FEATURES) & feature;
}

static inline u32 dst_mtu(const struct dst_entry *dst)
+1 −2
Original line number Diff line number Diff line
@@ -408,8 +408,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
extern void			tcp_parse_options(struct sk_buff *skb,
						  struct tcp_options_received *opt_rx,
						  u8 **hvpp,
						  int estab,
						  struct dst_entry *dst);
						  int estab);

extern u8			*tcp_parse_md5sig_option(struct tcphdr *th);

+13 −14
Original line number Diff line number Diff line
@@ -277,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,

	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);

	/* check for timestamp cookie support */
	memset(&tcp_opt, 0, sizeof(tcp_opt));
	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);

	if (tcp_opt.saw_tstamp)
		cookie_check_timestamp(&tcp_opt);

	ret = NULL;
	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
	if (!req)
@@ -292,6 +299,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
	ireq->loc_addr		= ip_hdr(skb)->daddr;
	ireq->rmt_addr		= ip_hdr(skb)->saddr;
	ireq->ecn_ok		= 0;
	ireq->snd_wscale	= tcp_opt.snd_wscale;
	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
	ireq->sack_ok		= tcp_opt.sack_ok;
	ireq->wscale_ok		= tcp_opt.wscale_ok;
	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;

	/* We throwed the options of the initial SYN away, so we hope
	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -340,20 +353,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
		}
	}

	/* check for timestamp cookie support */
	memset(&tcp_opt, 0, sizeof(tcp_opt));
	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst);

	if (tcp_opt.saw_tstamp)
		cookie_check_timestamp(&tcp_opt);

	ireq->snd_wscale        = tcp_opt.snd_wscale;
	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
	ireq->sack_ok           = tcp_opt.sack_ok;
	ireq->wscale_ok         = tcp_opt.wscale_ok;
	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;

	/* Try to redo what tcp_v4_send_synack did. */
	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);

+8 −16
Original line number Diff line number Diff line
@@ -3727,7 +3727,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 * the fast version below fails.
 */
void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
		       u8 **hvpp, int estab,  struct dst_entry *dst)
		       u8 **hvpp, int estab)
{
	unsigned char *ptr;
	struct tcphdr *th = tcp_hdr(skb);
@@ -3766,8 +3766,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
				break;
			case TCPOPT_WINDOW:
				if (opsize == TCPOLEN_WINDOW && th->syn &&
				    !estab && sysctl_tcp_window_scaling &&
				    !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
				    !estab && sysctl_tcp_window_scaling) {
					__u8 snd_wscale = *(__u8 *)ptr;
					opt_rx->wscale_ok = 1;
					if (snd_wscale > 14) {
@@ -3783,8 +3782,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
			case TCPOPT_TIMESTAMP:
				if ((opsize == TCPOLEN_TIMESTAMP) &&
				    ((estab && opt_rx->tstamp_ok) ||
				     (!estab && sysctl_tcp_timestamps &&
				      !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
				     (!estab && sysctl_tcp_timestamps))) {
					opt_rx->saw_tstamp = 1;
					opt_rx->rcv_tsval = get_unaligned_be32(ptr);
					opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3792,8 +3790,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
				break;
			case TCPOPT_SACK_PERM:
				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
				    !estab && sysctl_tcp_sack &&
				    !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
				    !estab && sysctl_tcp_sack) {
					opt_rx->sack_ok = 1;
					tcp_sack_reset(opt_rx);
				}
@@ -3878,7 +3875,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
		if (tcp_parse_aligned_timestamp(tp, th))
			return 1;
	}
	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
	return 1;
}

@@ -4133,10 +4130,8 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct dst_entry *dst = __sk_dst_get(sk);

	if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
	    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
		int mib_idx;

		if (before(seq, tp->rcv_nxt))
@@ -4165,15 +4160,13 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct dst_entry *dst = __sk_dst_get(sk);

	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
		tcp_enter_quickack_mode(sk);

		if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
		    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
			u32 end_seq = TCP_SKB_CB(skb)->end_seq;

			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -5428,11 +5421,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
	u8 *hash_location;
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct tcp_sock *tp = tcp_sk(sk);
	struct dst_entry *dst = __sk_dst_get(sk);
	struct tcp_cookie_values *cvp = tp->cookie_values;
	int saved_clamp = tp->rx_opt.mss_clamp;

	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst);
	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);

	if (th->ack) {
		/* rfc793:
Loading