Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e779137a authored by Andy Grover's avatar Andy Grover
Browse files

RDS: break out rdma and data ops into nested structs in rds_message



Clearly separate rdma-related variables in rm from data-related ones.
This is in anticipation of adding atomic support.

Signed-off-by: default avatarAndy Grover <andy.grover@oracle.com>
parent 8690bfa1
Loading
Loading
Loading
Loading
+23 −21
Original line number Diff line number Diff line
@@ -83,11 +83,11 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
	rdsdebug("ic %p send %p rm %p\n", ic, send, rm);

	ib_dma_unmap_sg(ic->i_cm_id->device,
		     rm->m_sg, rm->m_nents,
			rm->data.m_sg, rm->data.m_nents,
			DMA_TO_DEVICE);

	if (rm->m_rdma_op) {
		rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
	if (rm->rdma.m_rdma_op) {
		rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op);

		/* If the user asked for a completion notification on this
		 * message, we can implement three different semantics:
@@ -111,10 +111,10 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
		 */
		rds_ib_send_rdma_complete(rm, wc_status);

		if (rm->m_rdma_op->r_write)
			rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
		if (rm->rdma.m_rdma_op->r_write)
			rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
		else
			rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
			rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
	}

	/* If anyone waited for this message to get flushed out, wake
@@ -244,8 +244,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)

				rm = rds_send_get_message(conn, send->s_op);
				if (rm) {
					if (rm->m_rdma_op)
						rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
					if (rm->rdma.m_rdma_op)
						rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op);
					rds_ib_send_rdma_complete(rm, wc.status);
					rds_message_put(rm);
				}
@@ -532,18 +532,20 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
				rm->m_inc.i_hdr.h_flags,
				be32_to_cpu(rm->m_inc.i_hdr.h_len));
		   */
		if (rm->m_nents) {
			rm->m_count = ib_dma_map_sg(dev,
					 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
			rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
			if (rm->m_count == 0) {
		if (rm->data.m_nents) {
			rm->data.m_count = ib_dma_map_sg(dev,
							    rm->data.m_sg,
							    rm->data.m_nents,
							    DMA_TO_DEVICE);
			rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.m_count);
			if (rm->data.m_count == 0) {
				rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
				rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
				ret = -ENOMEM; /* XXX ? */
				goto out;
			}
		} else {
			rm->m_count = 0;
			rm->data.m_count = 0;
		}

		ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
@@ -559,10 +561,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,

		/* If it has a RDMA op, tell the peer we did it. This is
		 * used by the peer to release use-once RDMA MRs. */
		if (rm->m_rdma_op) {
		if (rm->rdma.m_rdma_op) {
			struct rds_ext_header_rdma ext_hdr;

			ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
			ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key);
			rds_message_add_extension(&rm->m_inc.i_hdr,
					RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
		}
@@ -590,7 +592,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
	send = &ic->i_sends[pos];
	first = send;
	prev = NULL;
	scat = &rm->m_sg[sg];
	scat = &rm->data.m_sg[sg];
	sent = 0;
	i = 0;

@@ -600,7 +602,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
	 * or when requested by the user. Right now, we let
	 * the application choose.
	 */
	if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
	if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence)
		send_flags = IB_SEND_FENCE;

	/*
@@ -619,7 +621,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
	}

	/* if there's data reference it with a chain of work reqs */
	for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
	for (; i < work_alloc && scat != &rm->data.m_sg[rm->data.m_count]; i++) {
		unsigned int len;

		send = &ic->i_sends[pos];
@@ -697,7 +699,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
		sent += sizeof(struct rds_header);

	/* if we finished the message then send completion owns it */
	if (scat == &rm->m_sg[rm->m_count]) {
	if (scat == &rm->data.m_sg[rm->data.m_count]) {
		prev->s_rm = ic->i_rm;
		prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
		ic->i_rm = NULL;
+20 −18
Original line number Diff line number Diff line
@@ -83,11 +83,11 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
	rdsdebug("ic %p send %p rm %p\n", ic, send, rm);

	ib_dma_unmap_sg(ic->i_cm_id->device,
		     rm->m_sg, rm->m_nents,
		     rm->data.m_sg, rm->data.m_nents,
		     DMA_TO_DEVICE);

	if (rm->m_rdma_op) {
		rds_iw_send_unmap_rdma(ic, rm->m_rdma_op);
	if (rm->rdma.m_rdma_op) {
		rds_iw_send_unmap_rdma(ic, rm->rdma.m_rdma_op);

		/* If the user asked for a completion notification on this
		 * message, we can implement three different semantics:
@@ -111,10 +111,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
		 */
		rds_iw_send_rdma_complete(rm, wc_status);

		if (rm->m_rdma_op->r_write)
			rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
		if (rm->rdma.m_rdma_op->r_write)
			rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
		else
			rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
			rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
	}

	/* If anyone waited for this message to get flushed out, wake
@@ -563,18 +563,20 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
				rm->m_inc.i_hdr.h_flags,
				be32_to_cpu(rm->m_inc.i_hdr.h_len));
		   */
		if (rm->m_nents) {
			rm->m_count = ib_dma_map_sg(dev,
					 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
			rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
			if (rm->m_count == 0) {
		if (rm->data.m_nents) {
			rm->data.m_count = ib_dma_map_sg(dev,
						    rm->data.m_sg,
						    rm->data.m_nents,
						    DMA_TO_DEVICE);
			rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.m_count);
			if (rm->data.m_count == 0) {
				rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
				rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
				ret = -ENOMEM; /* XXX ? */
				goto out;
			}
		} else {
			rm->m_count = 0;
			rm->data.m_count = 0;
		}

		ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
@@ -590,10 +592,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,

		/* If it has a RDMA op, tell the peer we did it. This is
		 * used by the peer to release use-once RDMA MRs. */
		if (rm->m_rdma_op) {
		if (rm->rdma.m_rdma_op) {
			struct rds_ext_header_rdma ext_hdr;

			ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
			ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key);
			rds_message_add_extension(&rm->m_inc.i_hdr,
					RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
		}
@@ -621,7 +623,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
	send = &ic->i_sends[pos];
	first = send;
	prev = NULL;
	scat = &rm->m_sg[sg];
	scat = &rm->data.m_sg[sg];
	sent = 0;
	i = 0;

@@ -631,7 +633,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
	 * or when requested by the user. Right now, we let
	 * the application choose.
	 */
	if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
	if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence)
		send_flags = IB_SEND_FENCE;

	/*
@@ -650,7 +652,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
	}

	/* if there's data reference it with a chain of work reqs */
	for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
	for (; i < work_alloc && scat != &rm->data.m_sg[rm->data.m_count]; i++) {
		unsigned int len;

		send = &ic->i_sends[pos];
@@ -728,7 +730,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
		sent += sizeof(struct rds_header);

	/* if we finished the message then send completion owns it */
	if (scat == &rm->m_sg[rm->m_count]) {
	if (scat == &rm->data.m_sg[rm->data.m_count]) {
		prev->s_rm = ic->i_rm;
		prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
		ic->i_rm = NULL;
+15 −15
Original line number Diff line number Diff line
@@ -63,17 +63,17 @@ static void rds_message_purge(struct rds_message *rm)
	if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
		return;

	for (i = 0; i < rm->m_nents; i++) {
		rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i]));
	for (i = 0; i < rm->data.m_nents; i++) {
		rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.m_sg[i]));
		/* XXX will have to put_page for page refs */
		__free_page(sg_page(&rm->m_sg[i]));
		__free_page(sg_page(&rm->data.m_sg[i]));
	}
	rm->m_nents = 0;
	rm->data.m_nents = 0;

	if (rm->m_rdma_op)
		rds_rdma_free_op(rm->m_rdma_op);
	if (rm->m_rdma_mr)
		rds_mr_put(rm->m_rdma_mr);
	if (rm->rdma.m_rdma_op)
		rds_rdma_free_op(rm->rdma.m_rdma_op);
	if (rm->rdma.m_rdma_mr)
		rds_mr_put(rm->rdma.m_rdma_mr);
}

void rds_message_inc_purge(struct rds_incoming *inc)
@@ -224,7 +224,7 @@ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp)
		goto out;

	if (nents)
		sg_init_table(rm->m_sg, nents);
		sg_init_table(rm->data.m_sg, nents);
	atomic_set(&rm->m_refcount, 1);
	INIT_LIST_HEAD(&rm->m_sock_item);
	INIT_LIST_HEAD(&rm->m_conn_item);
@@ -245,10 +245,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in

	set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
	rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
	rm->m_nents = ceil(total_len, PAGE_SIZE);
	rm->data.m_nents = ceil(total_len, PAGE_SIZE);

	for (i = 0; i < rm->m_nents; ++i) {
		sg_set_page(&rm->m_sg[i],
	for (i = 0; i < rm->data.m_nents; ++i) {
		sg_set_page(&rm->data.m_sg[i],
				virt_to_page(page_addrs[i]),
				PAGE_SIZE, 0);
	}
@@ -278,7 +278,7 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
	/*
	 * now allocate and copy in the data payload.
	 */
	sg = rm->m_sg;
	sg = rm->data.m_sg;
	iov = first_iov;
	iov_off = 0;
	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
@@ -289,7 +289,7 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
						       GFP_HIGHUSER);
			if (ret)
				goto out;
			rm->m_nents++;
			rm->data.m_nents++;
			sg_off = 0;
		}

@@ -348,7 +348,7 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,

	iov = first_iov;
	iov_off = 0;
	sg = rm->m_sg;
	sg = rm->data.m_sg;
	vec_off = 0;
	copied = 0;

+5 −4
Original line number Diff line number Diff line
@@ -643,14 +643,14 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
	struct rds_rdma_op *op;

	if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) ||
	    rm->m_rdma_op)
	    rm->rdma.m_rdma_op)
		return -EINVAL;

	op = rds_rdma_prepare(rs, CMSG_DATA(cmsg));
	if (IS_ERR(op))
		return PTR_ERR(op);
	rds_stats_inc(s_send_rdma);
	rm->m_rdma_op = op;
	rm->rdma.m_rdma_op = op;
	return 0;
}

@@ -679,6 +679,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
	 */
	r_key = rds_rdma_cookie_key(rm->m_rdma_cookie);


	spin_lock_irqsave(&rs->rs_rdma_lock, flags);
	mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
	if (!mr)
@@ -689,7 +690,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,

	if (mr) {
		mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
		rm->m_rdma_mr = mr;
		rm->rdma.m_rdma_mr = mr;
	}
	return err;
}
@@ -707,5 +708,5 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
	    rm->m_rdma_cookie != 0)
		return -EINVAL;

	return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr);
	return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr);
}
+11 −5
Original line number Diff line number Diff line
@@ -259,12 +259,18 @@ struct rds_message {
	 */
	spinlock_t		m_rs_lock;
	struct rds_sock		*m_rs;
	struct rds_rdma_op	*m_rdma_op;
	rds_rdma_cookie_t	m_rdma_cookie;
	struct {
		struct {
			struct rds_rdma_op	*m_rdma_op;
			struct rds_mr		*m_rdma_mr;
		} rdma;
		struct {
			unsigned int		m_nents;
			unsigned int		m_count;
			struct scatterlist	m_sg[0];
		} data;
	};
};

/*
Loading