Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fcd2b0da authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'rds-ha-failover-fixes'



Sowmini Varadhan says:

====================
RDS: TCP: HA/Failover fixes

This series contains a set of fixes for bugs exposed when
we ran the following in a loop between a test machine pair:

 while (1); do
   # modprobe rds-tcp on test nodes
   # run rds-stress in bi-dir mode between test machine pair
   # modprobe -r rds-tcp on test nodes
 done

rds-stress in bi-dir mode will cause both nodes to initiate
RDS-TCP connections at almost the same instant, exposing the
bugs fixed in this series.

Without the fixes, rds-stress reports sporadic packet drops,
and packets arriving out of sequence. After the fixes,we have
been able to run the  test overnight, without any issues.

Each patch has a detailed description of the root-cause fixed
by the patch.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b3e51069 1a0e100f
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -605,10 +605,14 @@ static void rds_exit(void)
}
module_exit(rds_exit);

u32 rds_gen_num;

static int rds_init(void)
{
	int ret;

	net_get_random_once(&rds_gen_num, sizeof(rds_gen_num));

	ret = rds_bind_lock_init();
	if (ret)
		goto out;
+3 −0
Original line number Diff line number Diff line
@@ -269,6 +269,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
			kmem_cache_free(rds_conn_slab, conn);
			conn = found;
		} else {
			conn->c_my_gen_num = rds_gen_num;
			conn->c_peer_gen_num = 0;
			hlist_add_head_rcu(&conn->c_hash_node, head);
			rds_cong_add_conn(conn);
			rds_conn_count++;
@@ -681,6 +683,7 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
	    !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
		queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
}
EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);

void rds_conn_connect_if_down(struct rds_connection *conn)
{
+1 −0
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
[RDS_EXTHDR_RDMA]	= sizeof(struct rds_ext_header_rdma),
[RDS_EXTHDR_RDMA_DEST]	= sizeof(struct rds_ext_header_rdma_dest),
[RDS_EXTHDR_NPATHS]	= sizeof(u16),
[RDS_EXTHDR_GEN_NUM]	= sizeof(u32),
};


+7 −1
Original line number Diff line number Diff line
@@ -151,6 +151,9 @@ struct rds_connection {

	struct rds_conn_path	c_path[RDS_MPATH_WORKERS];
	wait_queue_head_t	c_hs_waitq; /* handshake waitq */

	u32			c_my_gen_num;
	u32			c_peer_gen_num;
};

static inline
@@ -243,7 +246,8 @@ struct rds_ext_header_rdma_dest {
/* Extension header announcing number of paths.
 * Implicit length = 2 bytes.
 */
#define RDS_EXTHDR_NPATHS	4
#define RDS_EXTHDR_NPATHS	5
#define RDS_EXTHDR_GEN_NUM	6

#define __RDS_EXTHDR_MAX	16 /* for now */

@@ -338,6 +342,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
#define RDS_MSG_RETRANSMITTED	5
#define RDS_MSG_MAPPED		6
#define RDS_MSG_PAGEVEC		7
#define RDS_MSG_FLUSH		8

struct rds_message {
	atomic_t		m_refcount;
@@ -664,6 +669,7 @@ void rds_cong_exit(void);
struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);

/* conn.c */
extern u32 rds_gen_num;
int rds_conn_init(void);
void rds_conn_exit(void);
struct rds_connection *rds_conn_create(struct net *net,
+36 −0
Original line number Diff line number Diff line
@@ -120,6 +120,36 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
	/* do nothing if no change in cong state */
}

static void rds_conn_peer_gen_update(struct rds_connection *conn,
				     u32 peer_gen_num)
{
	int i;
	struct rds_message *rm, *tmp;
	unsigned long flags;

	WARN_ON(conn->c_trans->t_type != RDS_TRANS_TCP);
	if (peer_gen_num != 0) {
		if (conn->c_peer_gen_num != 0 &&
		    peer_gen_num != conn->c_peer_gen_num) {
			for (i = 0; i < RDS_MPATH_WORKERS; i++) {
				struct rds_conn_path *cp;

				cp = &conn->c_path[i];
				spin_lock_irqsave(&cp->cp_lock, flags);
				cp->cp_next_tx_seq = 1;
				cp->cp_next_rx_seq = 0;
				list_for_each_entry_safe(rm, tmp,
							 &cp->cp_retrans,
							 m_conn_item) {
					set_bit(RDS_MSG_FLUSH, &rm->m_flags);
				}
				spin_unlock_irqrestore(&cp->cp_lock, flags);
			}
		}
		conn->c_peer_gen_num = peer_gen_num;
	}
}

/*
 * Process all extension headers that come with this message.
 */
@@ -163,7 +193,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
	union {
		struct rds_ext_header_version version;
		u16 rds_npaths;
		u32 rds_gen_num;
	} buffer;
	u32 new_peer_gen_num = 0;

	while (1) {
		len = sizeof(buffer);
@@ -176,6 +208,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
			conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
					       buffer.rds_npaths);
			break;
		case RDS_EXTHDR_GEN_NUM:
			new_peer_gen_num = buffer.rds_gen_num;
			break;
		default:
			pr_warn_ratelimited("ignoring unknown exthdr type "
					     "0x%x\n", type);
@@ -183,6 +218,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
	}
	/* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
	conn->c_npaths = max_t(int, conn->c_npaths, 1);
	rds_conn_peer_gen_update(conn, new_peer_gen_num);
}

/* rds_start_mprds() will synchronously start multiple paths when appropriate.
Loading