Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 161cd45f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'rds-mprds-foundations'



Sowmini Varadhan says:

====================
RDS: multiple connection paths for scaling

Today RDS-over-TCP is implemented by demux-ing multiple PF_RDS sockets
between any 2 endpoints (where endpoint == [IP address, port]) over a
single TCP socket between the 2 IP addresses involved. This has the
limitation that it ends up funneling multiple RDS flows over a single
TCP flow, thus the rds/tcp connection is
   (a) upper-bounded to the single-flow bandwidth,
   (b) suffers from head-of-line blocking for the RDS sockets.

Better throughput (for a fixed small packet size, MTU) can be achieved
by having multiple TCP/IP flows per rds/tcp connection, i.e., multipathed
RDS (mprds).  Each such TCP/IP flow constitutes a path for the rds/tcp
connection. RDS sockets will be attached to a path based on some hash
(e.g., of local address and RDS port number) and packets for that RDS
socket will be sent over the attached path using TCP to segment/reassemble
RDS datagrams on that path.

The table below, generated using a prototype that implements mprds,
shows that this is significant for scaling to 40G.  Packet sizes
used were: 8K byte req, 256 byte resp. MTU: 1500.  The parameters for
RDS-concurrency used below are described in the rds-stress(1) man page-
the number listed is proportional to the number of threads at which max
throughput was attained.

  -------------------------------------------------------------------
     RDS-concurrency   Num of       tx+rx K/s (iops)       throughput
     (-t N -d N)       TCP paths
  -------------------------------------------------------------------
        16             1             600K -  700K            4 Gbps
        28             8            5000K - 6000K           32 Gbps
  -------------------------------------------------------------------

FAQ: what is the relation between mprds and mptcp?
  mprds is orthogonal to mptcp. Whereas mptcp creates
  sub-flows for a single TCP connection, mprds parallelizes tx/rx
  at the RDS layer. MPRDS with N paths will allow N datagrams to
  be sent in parallel; each path will continue to send one
  datagram at a time, with sender and receiver keeping track of
  the retransmit and dgram-assembly state based on the RDS header.
  If desired, mptcp can additionally be used to speed up each TCP
  path. That acceleration is orthogonal to the parallelization benefits
  of mprds.

This patch series lays down the foundational data-structures to support
mprds in the kernel. It implements the changes to split up the
rds_connection structure into a common (to all paths) part,
and a per-path rds_conn_path. All I/O workqs are driven from
the rds_conn_path.

Note that this patchset does not (yet) actually enable multipathing
for any of the transports; all transports will continue to use a
single path with the refactored data-structures. A subsequent patchset
will  add the changes to the rds-tcp module to actually use mprds
in rds-tcp.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents dcf1158b 3ecc5693
Loading
Loading
Loading
Loading
+2 −1
Original line number Original line Diff line number Diff line
@@ -235,7 +235,8 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
			 *    therefore trigger warnings.
			 *    therefore trigger warnings.
			 * Defer the xmit to rds_send_worker() instead.
			 * Defer the xmit to rds_send_worker() instead.
			 */
			 */
			queue_delayed_work(rds_wq, &conn->c_send_w, 0);
			queue_delayed_work(rds_wq,
					   &conn->c_path[0].cp_send_w, 0);
		}
		}
	}
	}


+232 −97
Original line number Original line Diff line number Diff line
@@ -95,14 +95,16 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
 * and receiving over this connection again in the future.  It is up to
 * and receiving over this connection again in the future.  It is up to
 * the transport to have serialized this call with its send and recv.
 * the transport to have serialized this call with its send and recv.
 */
 */
static void rds_conn_reset(struct rds_connection *conn)
static void rds_conn_path_reset(struct rds_conn_path *cp)
{
{
	struct rds_connection *conn = cp->cp_conn;

	rdsdebug("connection %pI4 to %pI4 reset\n",
	rdsdebug("connection %pI4 to %pI4 reset\n",
	  &conn->c_laddr, &conn->c_faddr);
	  &conn->c_laddr, &conn->c_faddr);


	rds_stats_inc(s_conn_reset);
	rds_stats_inc(s_conn_reset);
	rds_send_reset(conn);
	rds_send_path_reset(cp);
	conn->c_flags = 0;
	cp->cp_flags = 0;


	/* Do not clear next_rx_seq here, else we cannot distinguish
	/* Do not clear next_rx_seq here, else we cannot distinguish
	 * retransmitted packets from new packets, and will hand all
	 * retransmitted packets from new packets, and will hand all
@@ -110,6 +112,32 @@ static void rds_conn_reset(struct rds_connection *conn)
	 * reliability guarantees of RDS. */
	 * reliability guarantees of RDS. */
}
}


static void __rds_conn_path_init(struct rds_connection *conn,
				 struct rds_conn_path *cp, bool is_outgoing)
{
	spin_lock_init(&cp->cp_lock);
	cp->cp_next_tx_seq = 1;
	init_waitqueue_head(&cp->cp_waitq);
	INIT_LIST_HEAD(&cp->cp_send_queue);
	INIT_LIST_HEAD(&cp->cp_retrans);

	cp->cp_conn = conn;
	atomic_set(&cp->cp_state, RDS_CONN_DOWN);
	cp->cp_send_gen = 0;
	/* cp_outgoing is per-path. So we can only set it here
	 * for the single-path transports.
	 */
	if (!conn->c_trans->t_mp_capable)
		cp->cp_outgoing = (is_outgoing ? 1 : 0);
	cp->cp_reconnect_jiffies = 0;
	INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
	INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);
	INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker);
	INIT_WORK(&cp->cp_down_w, rds_shutdown_worker);
	mutex_init(&cp->cp_cm_lock);
	cp->cp_flags = 0;
}

/*
/*
 * There is only every one 'conn' for a given pair of addresses in the
 * There is only every one 'conn' for a given pair of addresses in the
 * system at a time.  They contain messages to be retransmitted and so
 * system at a time.  They contain messages to be retransmitted and so
@@ -153,13 +181,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
	INIT_HLIST_NODE(&conn->c_hash_node);
	INIT_HLIST_NODE(&conn->c_hash_node);
	conn->c_laddr = laddr;
	conn->c_laddr = laddr;
	conn->c_faddr = faddr;
	conn->c_faddr = faddr;
	spin_lock_init(&conn->c_lock);
	conn->c_next_tx_seq = 1;
	rds_conn_net_set(conn, net);


	init_waitqueue_head(&conn->c_waitq);
	rds_conn_net_set(conn, net);
	INIT_LIST_HEAD(&conn->c_send_queue);
	INIT_LIST_HEAD(&conn->c_retrans);


	ret = rds_cong_get_maps(conn);
	ret = rds_cong_get_maps(conn);
	if (ret) {
	if (ret) {
@@ -195,17 +218,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
		goto out;
		goto out;
	}
	}


	atomic_set(&conn->c_state, RDS_CONN_DOWN);
	conn->c_send_gen = 0;
	conn->c_outgoing = (is_outgoing ? 1 : 0);
	conn->c_reconnect_jiffies = 0;
	INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
	INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
	INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker);
	INIT_WORK(&conn->c_down_w, rds_shutdown_worker);
	mutex_init(&conn->c_cm_lock);
	conn->c_flags = 0;

	rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
	rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
	  conn, &laddr, &faddr,
	  conn, &laddr, &faddr,
	  trans->t_name ? trans->t_name : "[unknown]",
	  trans->t_name ? trans->t_name : "[unknown]",
@@ -222,7 +234,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
	if (parent) {
	if (parent) {
		/* Creating passive conn */
		/* Creating passive conn */
		if (parent->c_passive) {
		if (parent->c_passive) {
			trans->conn_free(conn->c_transport_data);
			trans->conn_free(conn->c_path[0].cp_transport_data);
			kmem_cache_free(rds_conn_slab, conn);
			kmem_cache_free(rds_conn_slab, conn);
			conn = parent->c_passive;
			conn = parent->c_passive;
		} else {
		} else {
@@ -236,10 +248,26 @@ static struct rds_connection *__rds_conn_create(struct net *net,


		found = rds_conn_lookup(net, head, laddr, faddr, trans);
		found = rds_conn_lookup(net, head, laddr, faddr, trans);
		if (found) {
		if (found) {
			trans->conn_free(conn->c_transport_data);
			struct rds_conn_path *cp;
			int i;

			for (i = 0; i < RDS_MPATH_WORKERS; i++) {
				cp = &conn->c_path[i];
				trans->conn_free(cp->cp_transport_data);
				if (!trans->t_mp_capable)
					break;
			}
			kmem_cache_free(rds_conn_slab, conn);
			kmem_cache_free(rds_conn_slab, conn);
			conn = found;
			conn = found;
		} else {
		} else {
			int i;

			for (i = 0; i < RDS_MPATH_WORKERS; i++) {
				__rds_conn_path_init(conn, &conn->c_path[i],
						     is_outgoing);
				conn->c_path[i].cp_index = i;
			}

			hlist_add_head_rcu(&conn->c_hash_node, head);
			hlist_add_head_rcu(&conn->c_hash_node, head);
			rds_cong_add_conn(conn);
			rds_cong_add_conn(conn);
			rds_conn_count++;
			rds_conn_count++;
@@ -267,10 +295,12 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
}
}
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);


void rds_conn_shutdown(struct rds_connection *conn)
void rds_conn_shutdown(struct rds_conn_path *cp)
{
{
	struct rds_connection *conn = cp->cp_conn;

	/* shut it down unless it's down already */
	/* shut it down unless it's down already */
	if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
	if (!rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
		/*
		/*
		 * Quiesce the connection mgmt handlers before we start tearing
		 * Quiesce the connection mgmt handlers before we start tearing
		 * things down. We don't hold the mutex for the entire
		 * things down. We don't hold the mutex for the entire
@@ -278,35 +308,41 @@ void rds_conn_shutdown(struct rds_connection *conn)
		 * deadlocking with the CM handler. Instead, the CM event
		 * deadlocking with the CM handler. Instead, the CM event
		 * handler is supposed to check for state DISCONNECTING
		 * handler is supposed to check for state DISCONNECTING
		 */
		 */
		mutex_lock(&conn->c_cm_lock);
		mutex_lock(&cp->cp_cm_lock);
		if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
		if (!rds_conn_path_transition(cp, RDS_CONN_UP,
		 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
					      RDS_CONN_DISCONNECTING) &&
			rds_conn_error(conn, "shutdown called in state %d\n",
		    !rds_conn_path_transition(cp, RDS_CONN_ERROR,
					atomic_read(&conn->c_state));
					      RDS_CONN_DISCONNECTING)) {
			mutex_unlock(&conn->c_cm_lock);
			rds_conn_path_error(cp,
					    "shutdown called in state %d\n",
					    atomic_read(&cp->cp_state));
			mutex_unlock(&cp->cp_cm_lock);
			return;
			return;
		}
		}
		mutex_unlock(&conn->c_cm_lock);
		mutex_unlock(&cp->cp_cm_lock);


		wait_event(conn->c_waitq,
		wait_event(cp->cp_waitq,
			   !test_bit(RDS_IN_XMIT, &conn->c_flags));
			   !test_bit(RDS_IN_XMIT, &cp->cp_flags));
		wait_event(conn->c_waitq,
		wait_event(cp->cp_waitq,
			   !test_bit(RDS_RECV_REFILL, &conn->c_flags));
			   !test_bit(RDS_RECV_REFILL, &cp->cp_flags));


		if (!conn->c_trans->t_mp_capable)
			conn->c_trans->conn_shutdown(conn);
			conn->c_trans->conn_shutdown(conn);
		rds_conn_reset(conn);
		else
			conn->c_trans->conn_path_shutdown(cp);
		rds_conn_path_reset(cp);


		if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
		if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
					      RDS_CONN_DOWN)) {
			/* This can happen - eg when we're in the middle of tearing
			/* This can happen - eg when we're in the middle of tearing
			 * down the connection, and someone unloads the rds module.
			 * down the connection, and someone unloads the rds module.
			 * Quite reproduceable with loopback connections.
			 * Quite reproduceable with loopback connections.
			 * Mostly harmless.
			 * Mostly harmless.
			 */
			 */
			rds_conn_error(conn,
			rds_conn_path_error(cp, "%s: failed to transition "
				"%s: failed to transition to state DOWN, "
					    "to state DOWN, current state "
				"current state is %d\n",
					    "is %d\n", __func__,
				__func__,
					    atomic_read(&cp->cp_state));
				atomic_read(&conn->c_state));
			return;
			return;
		}
		}
	}
	}
@@ -315,18 +351,46 @@ void rds_conn_shutdown(struct rds_connection *conn)
	 * The passive side of an IB loopback connection is never added
	 * The passive side of an IB loopback connection is never added
	 * to the conn hash, so we never trigger a reconnect on this
	 * to the conn hash, so we never trigger a reconnect on this
	 * conn - the reconnect is always triggered by the active peer. */
	 * conn - the reconnect is always triggered by the active peer. */
	cancel_delayed_work_sync(&conn->c_conn_w);
	cancel_delayed_work_sync(&cp->cp_conn_w);
	rcu_read_lock();
	rcu_read_lock();
	if (!hlist_unhashed(&conn->c_hash_node)) {
	if (!hlist_unhashed(&conn->c_hash_node)) {
		rcu_read_unlock();
		rcu_read_unlock();
		if (conn->c_trans->t_type != RDS_TRANS_TCP ||
		if (conn->c_trans->t_type != RDS_TRANS_TCP ||
		    conn->c_outgoing == 1)
		    cp->cp_outgoing == 1)
			rds_queue_reconnect(conn);
			rds_queue_reconnect(cp);
	} else {
	} else {
		rcu_read_unlock();
		rcu_read_unlock();
	}
	}
}
}


/* destroy a single rds_conn_path. rds_conn_destroy() iterates over
 * all paths using rds_conn_path_destroy()
 */
static void rds_conn_path_destroy(struct rds_conn_path *cp)
{
	struct rds_message *rm, *rtmp;

	rds_conn_path_drop(cp);
	flush_work(&cp->cp_down_w);

	/* make sure lingering queued work won't try to ref the conn */
	cancel_delayed_work_sync(&cp->cp_send_w);
	cancel_delayed_work_sync(&cp->cp_recv_w);

	/* tear down queued messages */
	list_for_each_entry_safe(rm, rtmp,
				 &cp->cp_send_queue,
				 m_conn_item) {
		list_del_init(&rm->m_conn_item);
		BUG_ON(!list_empty(&rm->m_sock_item));
		rds_message_put(rm);
	}
	if (cp->cp_xmit_rm)
		rds_message_put(cp->cp_xmit_rm);

	cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
}

/*
/*
 * Stop and free a connection.
 * Stop and free a connection.
 *
 *
@@ -336,7 +400,6 @@ void rds_conn_shutdown(struct rds_connection *conn)
 */
 */
void rds_conn_destroy(struct rds_connection *conn)
void rds_conn_destroy(struct rds_connection *conn)
{
{
	struct rds_message *rm, *rtmp;
	unsigned long flags;
	unsigned long flags;


	rdsdebug("freeing conn %p for %pI4 -> "
	rdsdebug("freeing conn %p for %pI4 -> "
@@ -350,25 +413,19 @@ void rds_conn_destroy(struct rds_connection *conn)
	synchronize_rcu();
	synchronize_rcu();


	/* shut the connection down */
	/* shut the connection down */
	rds_conn_drop(conn);
	if (!conn->c_trans->t_mp_capable) {
	flush_work(&conn->c_down_w);
		rds_conn_path_destroy(&conn->c_path[0]);

		BUG_ON(!list_empty(&conn->c_path[0].cp_retrans));
	/* make sure lingering queued work won't try to ref the conn */
	} else {
	cancel_delayed_work_sync(&conn->c_send_w);
		int i;
	cancel_delayed_work_sync(&conn->c_recv_w);
		struct rds_conn_path *cp;


	/* tear down queued messages */
		for (i = 0; i < RDS_MPATH_WORKERS; i++) {
	list_for_each_entry_safe(rm, rtmp,
			cp = &conn->c_path[i];
				 &conn->c_send_queue,
			rds_conn_path_destroy(cp);
				 m_conn_item) {
			BUG_ON(!list_empty(&cp->cp_retrans));
		list_del_init(&rm->m_conn_item);
		}
		BUG_ON(!list_empty(&rm->m_sock_item));
		rds_message_put(rm);
	}
	}
	if (conn->c_xmit_rm)
		rds_message_put(conn->c_xmit_rm);

	conn->c_trans->conn_free(conn->c_transport_data);


	/*
	/*
	 * The congestion maps aren't freed up here.  They're
	 * The congestion maps aren't freed up here.  They're
@@ -377,7 +434,6 @@ void rds_conn_destroy(struct rds_connection *conn)
	 */
	 */
	rds_cong_remove_conn(conn);
	rds_cong_remove_conn(conn);


	BUG_ON(!list_empty(&conn->c_retrans));
	kmem_cache_free(rds_conn_slab, conn);
	kmem_cache_free(rds_conn_slab, conn);


	spin_lock_irqsave(&rds_conn_lock, flags);
	spin_lock_irqsave(&rds_conn_lock, flags);
@@ -398,6 +454,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
	unsigned int total = 0;
	unsigned int total = 0;
	unsigned long flags;
	unsigned long flags;
	size_t i;
	size_t i;
	int j;


	len /= sizeof(struct rds_info_message);
	len /= sizeof(struct rds_info_message);


@@ -406,23 +463,32 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
	for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
	for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
	     i++, head++) {
	     i++, head++) {
		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
			struct rds_conn_path *cp;

			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
				cp = &conn->c_path[j];
				if (want_send)
				if (want_send)
				list = &conn->c_send_queue;
					list = &cp->cp_send_queue;
				else
				else
				list = &conn->c_retrans;
					list = &cp->cp_retrans;


			spin_lock_irqsave(&conn->c_lock, flags);
				spin_lock_irqsave(&cp->cp_lock, flags);


				/* XXX too lazy to maintain counts.. */
				/* XXX too lazy to maintain counts.. */
				list_for_each_entry(rm, list, m_conn_item) {
				list_for_each_entry(rm, list, m_conn_item) {
					total++;
					total++;
					if (total <= len)
					if (total <= len)
					rds_inc_info_copy(&rm->m_inc, iter,
						rds_inc_info_copy(&rm->m_inc,
								  iter,
								  conn->c_laddr,
								  conn->c_laddr,
							  conn->c_faddr, 0);
								  conn->c_faddr,
								  0);
				}
				}


			spin_unlock_irqrestore(&conn->c_lock, flags);
				spin_unlock_irqrestore(&cp->cp_lock, flags);
				if (!conn->c_trans->t_mp_capable)
					break;
			}
		}
		}
	}
	}
	rcu_read_unlock();
	rcu_read_unlock();
@@ -484,27 +550,72 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
}
}
EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
EXPORT_SYMBOL_GPL(rds_for_each_conn_info);


static int rds_conn_info_visitor(struct rds_connection *conn,
void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
				  void *buffer)
			     struct rds_info_iterator *iter,
			     struct rds_info_lengths *lens,
			     int (*visitor)(struct rds_conn_path *, void *),
			     size_t item_len)
{
	u64  buffer[(item_len + 7) / 8];
	struct hlist_head *head;
	struct rds_connection *conn;
	size_t i;
	int j;

	rcu_read_lock();

	lens->nr = 0;
	lens->each = item_len;

	for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
	     i++, head++) {
		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
			struct rds_conn_path *cp;

			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
				cp = &conn->c_path[j];

				/* XXX no cp_lock usage.. */
				if (!visitor(cp, buffer))
					continue;
				if (!conn->c_trans->t_mp_capable)
					break;
			}

			/* We copy as much as we can fit in the buffer,
			 * but we count all items so that the caller
			 * can resize the buffer.
			 */
			if (len >= item_len) {
				rds_info_copy(iter, buffer, item_len);
				len -= item_len;
			}
			lens->nr++;
		}
	}
	rcu_read_unlock();
}

static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
{
{
	struct rds_info_connection *cinfo = buffer;
	struct rds_info_connection *cinfo = buffer;


	cinfo->next_tx_seq = conn->c_next_tx_seq;
	cinfo->next_tx_seq = cp->cp_next_tx_seq;
	cinfo->next_rx_seq = conn->c_next_rx_seq;
	cinfo->next_rx_seq = cp->cp_next_rx_seq;
	cinfo->laddr = conn->c_laddr;
	cinfo->laddr = cp->cp_conn->c_laddr;
	cinfo->faddr = conn->c_faddr;
	cinfo->faddr = cp->cp_conn->c_faddr;
	strncpy(cinfo->transport, conn->c_trans->t_name,
	strncpy(cinfo->transport, cp->cp_conn->c_trans->t_name,
		sizeof(cinfo->transport));
		sizeof(cinfo->transport));
	cinfo->flags = 0;
	cinfo->flags = 0;


	rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
	rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags),
			  SENDING);
			  SENDING);
	/* XXX Future: return the state rather than these funky bits */
	/* XXX Future: return the state rather than these funky bits */
	rds_conn_info_set(cinfo->flags,
	rds_conn_info_set(cinfo->flags,
			  atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
			  atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING,
			  CONNECTING);
			  CONNECTING);
	rds_conn_info_set(cinfo->flags,
	rds_conn_info_set(cinfo->flags,
			  atomic_read(&conn->c_state) == RDS_CONN_UP,
			  atomic_read(&cp->cp_state) == RDS_CONN_UP,
			  CONNECTED);
			  CONNECTED);
	return 1;
	return 1;
}
}
@@ -513,7 +624,7 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
			  struct rds_info_iterator *iter,
			  struct rds_info_iterator *iter,
			  struct rds_info_lengths *lens)
			  struct rds_info_lengths *lens)
{
{
	rds_for_each_conn_info(sock, len, iter, lens,
	rds_walk_conn_path_info(sock, len, iter, lens,
				rds_conn_info_visitor,
				rds_conn_info_visitor,
				sizeof(struct rds_info_connection));
				sizeof(struct rds_info_connection));
}
}
@@ -553,10 +664,16 @@ void rds_conn_exit(void)
/*
/*
 * Force a disconnect
 * Force a disconnect
 */
 */
void rds_conn_path_drop(struct rds_conn_path *cp)
{
	atomic_set(&cp->cp_state, RDS_CONN_ERROR);
	queue_work(rds_wq, &cp->cp_down_w);
}
EXPORT_SYMBOL_GPL(rds_conn_path_drop);

void rds_conn_drop(struct rds_connection *conn)
void rds_conn_drop(struct rds_connection *conn)
{
{
	atomic_set(&conn->c_state, RDS_CONN_ERROR);
	rds_conn_path_drop(&conn->c_path[0]);
	queue_work(rds_wq, &conn->c_down_w);
}
}
EXPORT_SYMBOL_GPL(rds_conn_drop);
EXPORT_SYMBOL_GPL(rds_conn_drop);


@@ -564,11 +681,17 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
 * If the connection is down, trigger a connect. We may have scheduled a
 * If the connection is down, trigger a connect. We may have scheduled a
 * delayed reconnect however - in this case we should not interfere.
 * delayed reconnect however - in this case we should not interfere.
 */
 */
void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
{
	if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
	    !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
		queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
}

void rds_conn_connect_if_down(struct rds_connection *conn)
void rds_conn_connect_if_down(struct rds_connection *conn)
{
{
	if (rds_conn_state(conn) == RDS_CONN_DOWN &&
	WARN_ON(conn->c_trans->t_mp_capable);
	    !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
	rds_conn_path_connect_if_down(&conn->c_path[0]);
		queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
}
}
EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);


@@ -586,3 +709,15 @@ __rds_conn_error(struct rds_connection *conn, const char *fmt, ...)


	rds_conn_drop(conn);
	rds_conn_drop(conn);
}
}

void
__rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
{
	va_list ap;

	va_start(ap, fmt);
	vprintk(fmt, ap);
	va_end(ap);

	rds_conn_path_drop(cp);
}
+1 −0
Original line number Original line Diff line number Diff line
@@ -40,6 +40,7 @@
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/module.h>


#include "rds_single_path.h"
#include "rds.h"
#include "rds.h"
#include "ib.h"
#include "ib.h"
#include "ib_mr.h"
#include "ib_mr.h"
+2 −1
Original line number Original line Diff line number Diff line
@@ -36,6 +36,7 @@
#include <linux/vmalloc.h>
#include <linux/vmalloc.h>
#include <linux/ratelimit.h>
#include <linux/ratelimit.h>


#include "rds_single_path.h"
#include "rds.h"
#include "rds.h"
#include "ib.h"
#include "ib.h"


@@ -273,7 +274,7 @@ static void rds_ib_tasklet_fn_send(unsigned long data)
	if (rds_conn_up(conn) &&
	if (rds_conn_up(conn) &&
	    (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
	    (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
	    test_bit(0, &conn->c_map_queued)))
	    test_bit(0, &conn->c_map_queued)))
		rds_send_xmit(ic->conn);
		rds_send_xmit(&ic->conn->c_path[0]);
}
}


static void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq,
static void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq,
+1 −0
Original line number Original line Diff line number Diff line
@@ -35,6 +35,7 @@
#include <linux/rculist.h>
#include <linux/rculist.h>
#include <linux/llist.h>
#include <linux/llist.h>


#include "rds_single_path.h"
#include "ib_mr.h"
#include "ib_mr.h"


struct workqueue_struct *rds_ib_mr_wq;
struct workqueue_struct *rds_ib_mr_wq;
Loading