Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5aa93bcf authored by Neil Horman's avatar Neil Horman Committed by David S. Miller
Browse files

sctp: Implement quick failover draft from tsvwg

I've seen several attempts recently made to do quick failover of sctp transports
by reducing various retransmit timers and counters.  While its possible to
implement a faster failover on multihomed sctp associations, its not
particularly robust, in that it can lead to unneeded retransmits, as well as
false connection failures due to intermittent latency on a network.

Instead, lets implement the new ietf quick failover draft found here:
http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05



This will let the sctp stack identify transports that have had a small number of
errors, and avoid using them quickly until their reliability can be
re-established.  I've tested this out on two virt guests connected via multiple
isolated virt networks and believe its in compliance with the above draft and
works well.

Signed-off-by: default avatarNeil Horman <nhorman@tuxdriver.com>
CC: Vlad Yasevich <vyasevich@gmail.com>
CC: Sridhar Samudrala <sri@us.ibm.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: linux-sctp@vger.kernel.org
CC: joe@perches.com
Acked-by: default avatarVlad Yasevich <vyasevich@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e3906486
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -1440,6 +1440,20 @@ path_max_retrans - INTEGER

	Default: 5

pf_retrans - INTEGER
	The number of retransmissions that will be attempted on a given path
	before traffic is redirected to an alternate transport (should one
	exist).  Note this is distinct from path_max_retrans, as a path that
	passes the pf_retrans threshold can still be used.  Its only
	deprioritized when a transmission path is selected by the stack.  This
	setting is primarily used to enable fast failover mechanisms without
	having to reduce path_max_retrans to a very low value.  See:
	http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
	for details.  Note also that a value of pf_retrans > path_max_retrans
	disables this feature

	Default: 0

rto_initial - INTEGER
	The initial round trip timeout value in milliseconds that will be used
	in calculating round trip times.  This is the initial time interval
+1 −0
Original line number Diff line number Diff line
@@ -334,6 +334,7 @@ typedef enum {
typedef enum {
	SCTP_TRANSPORT_UP,
	SCTP_TRANSPORT_DOWN,
	SCTP_TRANSPORT_PF,
} sctp_transport_cmd_t;

/* These are the address scopes defined mainly for IPv4 addresses
+19 −1
Original line number Diff line number Diff line
@@ -161,6 +161,12 @@ extern struct sctp_globals {
	int max_retrans_path;
	int max_retrans_init;

	/* Potentially-Failed.Max.Retrans sysctl value
	 * taken from:
	 * http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05
	 */
	int pf_retrans;

	/*
	 * Policy for preforming sctp/socket accounting
	 * 0   - do socket level accounting, all assocs share sk_sndbuf
@@ -258,6 +264,7 @@ extern struct sctp_globals {
#define sctp_sndbuf_policy	 	(sctp_globals.sndbuf_policy)
#define sctp_rcvbuf_policy	 	(sctp_globals.rcvbuf_policy)
#define sctp_max_retrans_path		(sctp_globals.max_retrans_path)
#define sctp_pf_retrans			(sctp_globals.pf_retrans)
#define sctp_max_retrans_init		(sctp_globals.max_retrans_init)
#define sctp_sack_timeout		(sctp_globals.sack_timeout)
#define sctp_hb_interval		(sctp_globals.hb_interval)
@@ -990,10 +997,15 @@ struct sctp_transport {

	/* This is the max_retrans value for the transport and will
	 * be initialized from the assocs value.  This can be changed
	 * using SCTP_SET_PEER_ADDR_PARAMS socket option.
	 * using the SCTP_SET_PEER_ADDR_PARAMS socket option.
	 */
	__u16 pathmaxrxt;

	/* This is the partially failed retrans value for the transport
	 * and will be initialized from the assocs value.  This can be changed
	 * using the SCTP_PEER_ADDR_THLDS socket option
	 */
	int pf_retrans;
	/* PMTU	      : The current known path MTU.  */
	__u32 pathmtu;

@@ -1664,6 +1676,12 @@ struct sctp_association {
	 */
	int max_retrans;

	/* This is the partially failed retrans value for the transport
	 * and will be initialized from the assocs value.  This can be
	 * changed using the SCTP_PEER_ADDR_THLDS socket option
	 */
	int pf_retrans;

	/* Maximum number of times the endpoint will retransmit INIT  */
	__u16 max_init_attempts;

+11 −0
Original line number Diff line number Diff line
@@ -93,6 +93,7 @@ typedef __s32 sctp_assoc_t;
#define SCTP_GET_ASSOC_NUMBER	28	/* Read only */
#define SCTP_GET_ASSOC_ID_LIST	29	/* Read only */
#define SCTP_AUTO_ASCONF       30
#define SCTP_PEER_ADDR_THLDS	31

/* Internal Socket Options. Some of the sctp library functions are
 * implemented using these socket options.
@@ -649,6 +650,7 @@ struct sctp_paddrinfo {
 */
enum sctp_spinfo_state {
	SCTP_INACTIVE,
	SCTP_PF,
	SCTP_ACTIVE,
	SCTP_UNCONFIRMED,
	SCTP_UNKNOWN = 0xffff  /* Value used for transport state unknown */
@@ -741,4 +743,13 @@ typedef struct {
	int sd;
} sctp_peeloff_arg_t;

/*
 *  Peer Address Thresholds socket option
 */
struct sctp_paddrthlds {
	sctp_assoc_t spt_assoc_id;
	struct sockaddr_storage spt_address;
	__u16 spt_pathmaxrxt;
	__u16 spt_pathpfthld;
};
#endif /* __net_sctp_user_h__ */
+30 −7
Original line number Diff line number Diff line
@@ -124,6 +124,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
	 * socket values.
	 */
	asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
	asoc->pf_retrans  = sctp_pf_retrans;

	asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
	asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
	asoc->rto_min = msecs_to_jiffies(sp->rtoinfo.srto_min);
@@ -686,6 +688,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
	/* Set the path max_retrans.  */
	peer->pathmaxrxt = asoc->pathmaxrxt;

	/* And the partial failure retrnas threshold */
	peer->pf_retrans = asoc->pf_retrans;

	/* Initialize the peer's SACK delay timeout based on the
	 * association configured value.
	 */
@@ -841,6 +846,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
	struct sctp_ulpevent *event;
	struct sockaddr_storage addr;
	int spc_state = 0;
	bool ulp_notify = true;

	/* Record the transition on the transport.  */
	switch (command) {
@@ -854,6 +860,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
			spc_state = SCTP_ADDR_CONFIRMED;
		else
			spc_state = SCTP_ADDR_AVAILABLE;
		/* Don't inform ULP about transition from PF to
		 * active state and set cwnd to 1, see SCTP
		 * Quick failover draft section 5.1, point 5
		 */
		if (transport->state == SCTP_PF) {
			ulp_notify = false;
			transport->cwnd = 1;
		}
		transport->state = SCTP_ACTIVE;
		break;

@@ -872,6 +886,11 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
		spc_state = SCTP_ADDR_UNREACHABLE;
		break;

	case SCTP_TRANSPORT_PF:
		transport->state = SCTP_PF;
		ulp_notify = false;
		break;

	default:
		return;
	}
@@ -879,12 +898,15 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
	/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
	 * user.
	 */
	if (ulp_notify) {
		memset(&addr, 0, sizeof(struct sockaddr_storage));
	memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len);
		memcpy(&addr, &transport->ipaddr,
		       transport->af_specific->sockaddr_len);
		event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
					0, spc_state, error, GFP_ATOMIC);
		if (event)
			sctp_ulpq_tail_event(&asoc->ulpq, event);
	}

	/* Select new active and retran paths. */

@@ -900,7 +922,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
			transports) {

		if ((t->state == SCTP_INACTIVE) ||
		    (t->state == SCTP_UNCONFIRMED))
		    (t->state == SCTP_UNCONFIRMED) ||
		    (t->state == SCTP_PF))
			continue;
		if (!first || t->last_time_heard > first->last_time_heard) {
			second = first;
Loading