Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 303b0d4b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'for_4.11/net-next/rds_v3' of...

Merge branch 'for_4.11/net-next/rds_v3' of git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux



Santosh Shilimkar says:

====================
net: RDS updates

v2->v3:
- Re-based against latest net-next head.
- Dropped a user visible change after discussing with David Miller.
  It needs some more work to fully support old/new tools matrix.
- Addressed Dave's comment about bool usage in patch
  "RDS: IB: track and log active side..."

v1->v2:
Re-aligned indentation in patch 'RDS: mark few internal functions..."

Series consist of:
 - RDMA transport fixes for map failure, listen sequence, handler panic and
   composite message notification.
 - Couple of sparse fixes.
 - Message logging improvements for bind failure, use once mr semantics
   and connection remote address, active end point.
 - Performance improvement for RDMA transport by reducing the post send
   pressure on the queue and spreading the CQ vectors.
 - Useful statistics for socket send/recv usage and receive cache usage.
 - Additional RDS CMSG used by application to track the RDS message
   stages for certain type of traffic to find out latency spots.
   Can be enabled/disabled per socket.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0a0a8d6b 3289025a
Loading
Loading
Loading
Loading
+33 −0
Original line number Diff line number Diff line
@@ -52,6 +52,13 @@
#define RDS_GET_MR_FOR_DEST		7
#define SO_RDS_TRANSPORT		8

/* Socket option to tap receive path latency
 *	SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
 *	Format used struct rds_rx_trace_so
 */
#define SO_RDS_MSG_RXPATH_LATENCY	10


/* supported values for SO_RDS_TRANSPORT */
#define	RDS_TRANS_IB	0
#define	RDS_TRANS_IWARP	1
@@ -77,6 +84,12 @@
 *	the same as for the GET_MR setsockopt.
 * RDS_CMSG_RDMA_STATUS (recvmsg)
 *	Returns the status of a completed RDMA operation.
 * RDS_CMSG_RXPATH_LATENCY(recvmsg)
 *	Returns rds message latencies in various stages of receive
 *	path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY
 *	socket option. Legitimate points are defined in
 *	enum rds_message_rxpath_latency. More points can be added in
 *	future. CSMG format is struct rds_cmsg_rx_trace.
 */
#define RDS_CMSG_RDMA_ARGS		1
#define RDS_CMSG_RDMA_DEST		2
@@ -87,6 +100,7 @@
#define RDS_CMSG_ATOMIC_CSWP		7
#define RDS_CMSG_MASKED_ATOMIC_FADD	8
#define RDS_CMSG_MASKED_ATOMIC_CSWP	9
#define RDS_CMSG_RXPATH_LATENCY		11

#define RDS_INFO_FIRST			10000
#define RDS_INFO_COUNTERS		10000
@@ -171,6 +185,25 @@ struct rds_info_rdma_connection {
	uint32_t	rdma_mr_size;
};

/* RDS message Receive Path Latency points */
enum rds_message_rxpath_latency {
	RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
	RDS_MSG_RX_DGRAM_REASSEMBLE,
	RDS_MSG_RX_DGRAM_DELIVERED,
	RDS_MSG_RX_DGRAM_TRACE_MAX
};

struct rds_rx_trace_so {
	u8 rx_traces;
	u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
};

struct rds_cmsg_rx_trace {
	u8 rx_traces;
	u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
	u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
};

/*
 * Congestion monitoring.
 * Congestion control in RDS happens at the host connection
+28 −0
Original line number Diff line number Diff line
@@ -298,6 +298,30 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
	return 0;
}

static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
				  int optlen)
{
	struct rds_rx_trace_so trace;
	int i;

	if (optlen != sizeof(struct rds_rx_trace_so))
		return -EFAULT;

	if (copy_from_user(&trace, optval, sizeof(trace)))
		return -EFAULT;

	rs->rs_rx_traces = trace.rx_traces;
	for (i = 0; i < rs->rs_rx_traces; i++) {
		if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
			rs->rs_rx_traces = 0;
			return -EFAULT;
		}
		rs->rs_rx_trace[i] = trace.rx_trace_pos[i];
	}

	return 0;
}

static int rds_setsockopt(struct socket *sock, int level, int optname,
			  char __user *optval, unsigned int optlen)
{
@@ -338,6 +362,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
		ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
		release_sock(sock->sk);
		break;
	case SO_RDS_MSG_RXPATH_LATENCY:
		ret = rds_recv_track_latency(rs, optval, optlen);
		break;
	default:
		ret = -ENOPROTOOPT;
	}
@@ -484,6 +511,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
	INIT_LIST_HEAD(&rs->rs_cong_list);
	spin_lock_init(&rs->rs_rdma_lock);
	rs->rs_rdma_keys = RB_ROOT;
	rs->rs_rx_traces = 0;

	spin_lock_bh(&rds_sock_lock);
	list_add_tail(&rs->rs_item, &rds_sock_list);
+2 −2
Original line number Diff line number Diff line
@@ -176,8 +176,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
	if (!trans) {
		ret = -EADDRNOTAVAIL;
		rds_remove_bound(rs);
		printk_ratelimited(KERN_INFO "RDS: rds_bind() could not find a transport, "
				"load rds_tcp or rds_rdma?\n");
		pr_info_ratelimited("RDS: %s could not find a transport for %pI4, load rds_tcp or rds_rdma?\n",
				    __func__, &sin->sin_addr.s_addr);
		goto out;
	}

+5 −5
Original line number Diff line number Diff line
@@ -545,7 +545,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
}
EXPORT_SYMBOL_GPL(rds_for_each_conn_info);

void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
				    struct rds_info_iterator *iter,
				    struct rds_info_lengths *lens,
				    int (*visitor)(struct rds_conn_path *, void *),
+11 −0
Original line number Diff line number Diff line
@@ -111,6 +111,9 @@ static void rds_ib_dev_free(struct work_struct *work)
		kfree(i_ipaddr);
	}

	if (rds_ibdev->vector_load)
		kfree(rds_ibdev->vector_load);

	kfree(rds_ibdev);
}

@@ -159,6 +162,14 @@ static void rds_ib_add_one(struct ib_device *device)
	rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
	rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;

	rds_ibdev->vector_load = kzalloc(sizeof(int) * device->num_comp_vectors,
					 GFP_KERNEL);
	if (!rds_ibdev->vector_load) {
		pr_err("RDS/IB: %s failed to allocate vector memory\n",
			__func__);
		goto put_dev;
	}

	rds_ibdev->dev = device;
	rds_ibdev->pd = ib_alloc_pd(device, 0);
	if (IS_ERR(rds_ibdev->pd)) {
Loading