Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 21445c91 authored by David S. Miller's avatar David S. Miller
Browse files

Merge tag 'rxrpc-rewrite-20160924' of...

Merge tag 'rxrpc-rewrite-20160924' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs



David Howells says:

====================
rxrpc: Implement slow-start and other bits

This set of patches implements the RxRPC slow-start feature for AF_RXRPC to
improve performance and handling of occasional packet loss.  This is more or
less the same as TCP slow start [RFC 5681].  Firstly, there are some ACK
generation improvements:

 (1) Send ACKs regularly to apprise the peer of our state so that they can do
     congestion management of their own.

 (2) Send an ACK when we fill in a hole in the buffer so that the peer can
     find out that we did this thus forestalling retransmission.

 (3) Note the final DATA packet's serial number in the final ACK for
     correlation purposes.

and a couple of bug fixes:

 (4) Reinitialise the ACK state and clear the ACK and resend timers upon
     entering the client reply reception phase to kill off any pending probe
     ACKs.

 (5) Delay the resend timer to allow for nsec->jiffies conversion errors.

and then there's the slow-start pieces:

 (6) Summarise an ACK.

 (7) Schedule a PING or IDLE ACK if the reply to a client call is overdue to
     try and find out what happened to it.

 (8) Implement the slow start feature.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c2675de4 57494343
Loading
Loading
Loading
Loading
+45 −0
Original line number Diff line number Diff line
@@ -570,6 +570,51 @@ TRACE_EVENT(rxrpc_retransmit,
		      __entry->expiry)
	    );

TRACE_EVENT(rxrpc_congest,
	    TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
		     rxrpc_serial_t ack_serial, enum rxrpc_congest_change change),

	    TP_ARGS(call, summary, ack_serial, change),

	    TP_STRUCT__entry(
		    __field(struct rxrpc_call *,		call		)
		    __field(enum rxrpc_congest_change,		change		)
		    __field(rxrpc_seq_t,			hard_ack	)
		    __field(rxrpc_seq_t,			top		)
		    __field(rxrpc_seq_t,			lowest_nak	)
		    __field(rxrpc_serial_t,			ack_serial	)
		    __field_struct(struct rxrpc_ack_summary,	sum		)
			     ),

	    TP_fast_assign(
		    __entry->call	= call;
		    __entry->change	= change;
		    __entry->hard_ack	= call->tx_hard_ack;
		    __entry->top	= call->tx_top;
		    __entry->lowest_nak	= call->acks_lowest_nak;
		    __entry->ack_serial	= ack_serial;
		    memcpy(&__entry->sum, summary, sizeof(__entry->sum));
			   ),

	    TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
		      __entry->call,
		      __entry->ack_serial,
		      rxrpc_ack_names[__entry->sum.ack_reason],
		      __entry->hard_ack,
		      rxrpc_congest_modes[__entry->sum.mode],
		      __entry->sum.cwnd,
		      __entry->sum.ssthresh,
		      __entry->sum.nr_acks, __entry->sum.nr_nacks,
		      __entry->sum.nr_new_acks, __entry->sum.nr_new_nacks,
		      __entry->sum.nr_rot_new_acks,
		      __entry->top - __entry->hard_ack,
		      __entry->sum.cumulative_acks,
		      __entry->sum.dup_acks,
		      __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "",
		      rxrpc_congest_changes[__entry->change],
		      __entry->sum.retrans_timeo ? " rTxTo" : "")
	    );

#endif /* _TRACE_RXRPC_H */

/* This part must be outside protection */
+71 −0
Original line number Diff line number Diff line
@@ -402,6 +402,7 @@ enum rxrpc_call_flag {
	RXRPC_CALL_RX_LAST,		/* Received the last packet (at rxtx_top) */
	RXRPC_CALL_TX_LAST,		/* Last packet in Tx buffer (at rxtx_top) */
	RXRPC_CALL_PINGING,		/* Ping in process */
	RXRPC_CALL_RETRANS_TIMEOUT,	/* Retransmission due to timeout occurred */
};

/*
@@ -446,6 +447,17 @@ enum rxrpc_call_completion {
	NR__RXRPC_CALL_COMPLETIONS
};

/*
 * Call Tx congestion management modes.
 */
enum rxrpc_congest_mode {
	RXRPC_CALL_SLOW_START,
	RXRPC_CALL_CONGEST_AVOIDANCE,
	RXRPC_CALL_PACKET_LOSS,
	RXRPC_CALL_FAST_RETRANSMIT,
	NR__RXRPC_CONGEST_MODES
};

/*
 * RxRPC call definition
 * - matched by { connection, call_id }
@@ -518,6 +530,20 @@ struct rxrpc_call {
						 * not hard-ACK'd packet follows this.
						 */
	rxrpc_seq_t		tx_top;		/* Highest Tx slot allocated. */

	/* TCP-style slow-start congestion control [RFC5681].  Since the SMSS
	 * is fixed, we keep these numbers in terms of segments (ie. DATA
	 * packets) rather than bytes.
	 */
#define RXRPC_TX_SMSS		RXRPC_JUMBO_DATALEN
	u8			cong_cwnd;	/* Congestion window size */
	u8			cong_extra;	/* Extra to send for congestion management */
	u8			cong_ssthresh;	/* Slow-start threshold */
	enum rxrpc_congest_mode	cong_mode:8;	/* Congestion management mode */
	u8			cong_dup_acks;	/* Count of ACKs showing missing packets */
	u8			cong_cumul_acks; /* Cumulative ACK count */
	ktime_t			cong_tstamp;	/* Last time cwnd was changed */

	rxrpc_seq_t		rx_hard_ack;	/* Dead slot in buffer; the first received but not
						 * consumed packet follows this.
						 */
@@ -533,11 +559,36 @@ struct rxrpc_call {
	u16			ackr_skew;	/* skew on packet being ACK'd */
	rxrpc_serial_t		ackr_serial;	/* serial of packet being ACK'd */
	rxrpc_seq_t		ackr_prev_seq;	/* previous sequence number received */
	rxrpc_seq_t		ackr_consumed;	/* Highest packet shown consumed */
	rxrpc_seq_t		ackr_seen;	/* Highest packet shown seen */
	rxrpc_serial_t		ackr_ping;	/* Last ping sent */
	ktime_t			ackr_ping_time;	/* Time last ping sent */

	/* transmission-phase ACK management */
	ktime_t			acks_latest_ts;	/* Timestamp of latest ACK received */
	rxrpc_serial_t		acks_latest;	/* serial number of latest ACK received */
	rxrpc_seq_t		acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
};

/*
 * Summary of a new ACK and the changes it made to the Tx buffer packet states.
 */
struct rxrpc_ack_summary {
	u8			ack_reason;
	u8			nr_acks;		/* Number of ACKs in packet */
	u8			nr_nacks;		/* Number of NACKs in packet */
	u8			nr_new_acks;		/* Number of new ACKs in packet */
	u8			nr_new_nacks;		/* Number of new NACKs in packet */
	u8			nr_rot_new_acks;	/* Number of rotated new ACKs */
	bool			new_low_nack;		/* T if new low NACK found */
	bool			retrans_timeo;		/* T if reTx due to timeout happened */
	u8			flight_size;		/* Number of unreceived transmissions */
	/* Place to stash values for tracing */
	enum rxrpc_congest_mode	mode:8;
	u8			cwnd;
	u8			ssthresh;
	u8			dup_acks;
	u8			cumulative_acks;
};

enum rxrpc_skb_trace {
@@ -680,6 +731,7 @@ extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5];

enum rxrpc_timer_trace {
	rxrpc_timer_begin,
	rxrpc_timer_init_for_reply,
	rxrpc_timer_expired,
	rxrpc_timer_set_for_ack,
	rxrpc_timer_set_for_resend,
@@ -690,11 +742,15 @@ enum rxrpc_timer_trace {
extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8];

enum rxrpc_propose_ack_trace {
	rxrpc_propose_ack_client_tx_end,
	rxrpc_propose_ack_input_data,
	rxrpc_propose_ack_ping_for_lost_ack,
	rxrpc_propose_ack_ping_for_lost_reply,
	rxrpc_propose_ack_ping_for_params,
	rxrpc_propose_ack_respond_to_ack,
	rxrpc_propose_ack_respond_to_ping,
	rxrpc_propose_ack_retry_tx,
	rxrpc_propose_ack_rotate_rx,
	rxrpc_propose_ack_terminal_ack,
	rxrpc_propose_ack__nr_trace
};
@@ -709,6 +765,21 @@ enum rxrpc_propose_ack_outcome {
extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8];
extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes];

enum rxrpc_congest_change {
	rxrpc_cong_begin_retransmission,
	rxrpc_cong_cleared_nacks,
	rxrpc_cong_new_low_nack,
	rxrpc_cong_no_change,
	rxrpc_cong_progress,
	rxrpc_cong_retransmit_again,
	rxrpc_cong_rtt_window_end,
	rxrpc_cong_saw_nack,
	rxrpc_congest__nr_change
};

extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10];
extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9];

extern const char *const rxrpc_pkts[];
extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4];

+43 −4
Original line number Diff line number Diff line
@@ -100,6 +100,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
			expiry = rxrpc_soft_ack_delay;
		break;

	case RXRPC_ACK_PING:
	case RXRPC_ACK_IDLE:
		if (rxrpc_idle_ack_delay < expiry)
			expiry = rxrpc_idle_ack_delay;
@@ -145,6 +146,14 @@ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
	spin_unlock_bh(&call->lock);
}

/*
 * Handle congestion being detected by the retransmit timeout.
 */
static void rxrpc_congestion_timeout(struct rxrpc_call *call)
{
	set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags);
}

/*
 * Perform retransmission of NAK'd and unack'd packets.
 */
@@ -153,9 +162,9 @@ static void rxrpc_resend(struct rxrpc_call *call)
	struct rxrpc_skb_priv *sp;
	struct sk_buff *skb;
	rxrpc_seq_t cursor, seq, top;
	ktime_t now = ktime_get_real(), max_age, oldest,  resend_at;
	ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts;
	int ix;
	u8 annotation, anno_type;
	u8 annotation, anno_type, retrans = 0, unacked = 0;

	_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);

@@ -192,16 +201,44 @@ static void rxrpc_resend(struct rxrpc_call *call)
					oldest = skb->tstamp;
				continue;
			}
			if (!(annotation & RXRPC_TX_ANNO_RESENT))
				unacked++;
		}

		/* Okay, we need to retransmit a packet. */
		call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation;
		retrans++;
		trace_rxrpc_retransmit(call, seq, annotation | anno_type,
				       ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
	}

	resend_at = ktime_sub(ktime_add_ms(oldest, rxrpc_resend_timeout), now);
	call->resend_at = jiffies + nsecs_to_jiffies(ktime_to_ns(resend_at));
	resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout);
	call->resend_at = jiffies +
		nsecs_to_jiffies(ktime_to_ns(ktime_sub(resend_at, now))) +
		1; /* We have to make sure that the calculated jiffies value
		    * falls at or after the nsec value, or we shall loop
		    * ceaselessly because the timer times out, but we haven't
		    * reached the nsec timeout yet.
		    */

	if (unacked)
		rxrpc_congestion_timeout(call);

	/* If there was nothing that needed retransmission then it's likely
	 * that an ACK got lost somewhere.  Send a ping to find out instead of
	 * retransmitting data.
	 */
	if (!retrans) {
		rxrpc_set_timer(call, rxrpc_timer_set_for_resend);
		spin_unlock_bh(&call->lock);
		ack_ts = ktime_sub(now, call->acks_latest_ts);
		if (ktime_to_ns(ack_ts) < call->peer->rtt)
			goto out;
		rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
				  rxrpc_propose_ack_ping_for_lost_ack);
		rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
		goto out;
	}

	/* Now go through the Tx window and perform the retransmissions.  We
	 * have to drop the lock for each send.  If an ACK comes in whilst the
@@ -253,6 +290,7 @@ static void rxrpc_resend(struct rxrpc_call *call)

out_unlock:
	spin_unlock_bh(&call->lock);
out:
	_leave("");
}

@@ -286,6 +324,7 @@ void rxrpc_process_call(struct work_struct *work)
	if (time_after_eq(now, call->expire_at)) {
		rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
		goto recheck_state;
	}

	if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
+13 −0
Original line number Diff line number Diff line
@@ -160,6 +160,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
	call->rx_winsize = rxrpc_rx_window_size;
	call->tx_winsize = 16;
	call->rx_expect_next = 1;

	if (RXRPC_TX_SMSS > 2190)
		call->cong_cwnd = 2;
	else if (RXRPC_TX_SMSS > 1095)
		call->cong_cwnd = 3;
	else
		call->cong_cwnd = 4;
	call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1;
	return call;

nomem_2:
@@ -176,6 +184,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
						  gfp_t gfp)
{
	struct rxrpc_call *call;
	ktime_t now;

	_enter("");

@@ -185,6 +194,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
	call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
	call->service_id = srx->srx_service;
	call->tx_phase = true;
	now = ktime_get_real();
	call->acks_latest_ts = now;
	call->cong_tstamp = now;

	_leave(" = %p", call);
	return call;
@@ -325,6 +337,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
	call->state		= RXRPC_CALL_SERVER_ACCEPTING;
	if (sp->hdr.securityIndex > 0)
		call->state	= RXRPC_CALL_SERVER_SECURING;
	call->cong_tstamp	= skb->tstamp;

	/* Set the channel for this call.  We don't get channel_lock as we're
	 * only defending against the data_ready handler (which we're called
+1 −0
Original line number Diff line number Diff line
@@ -97,6 +97,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
		pkt.info.maxMTU		= htonl(mtu);
		pkt.info.rwind		= htonl(rxrpc_rx_window_size);
		pkt.info.jumbo_max	= htonl(rxrpc_rx_jumbo_max);
		pkt.whdr.flags		|= RXRPC_SLOW_START_OK;
		len += sizeof(pkt.ack) + sizeof(pkt.info);
		break;
	}
Loading