Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f4155eff authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'RDS-optimized-notification-for-zerocopy-completion'



Sowmini Varadhan says:

====================
RDS: optimized notification for zerocopy completion

Resending with acked-by additions: previous attempt does not show
up in Patchwork. This time with a new mail Message-Id.

RDS applications use predominantly request-response, transacation
based IPC, so that ingress and egress traffic are well-balanced,
and it is possible/desirable to reduce system-call overhead by
piggybacking the notifications for zerocopy completion response
with data.

Moreover, it has been pointed out that socket functions block
if sk_err is non-zero, thus if the RDS code does not plan/need
to use sk_error_queue path for completion notification, it
is preferable to remove the sk_errror_queue related paths in
RDS.

Both of these goals are implemented in this series.

v2: removed sk_error_queue support
v3: incorporated additional code review comments (details in each patch)
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c1de13bb 6f3899e6
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -20,13 +20,11 @@ struct sock_extended_err {
#define SO_EE_ORIGIN_ICMP6	3
#define SO_EE_ORIGIN_TXSTATUS	4
#define SO_EE_ORIGIN_ZEROCOPY	5
#define SO_EE_ORIGIN_ZCOOKIE	6
#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS

#define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))

#define SO_EE_CODE_ZEROCOPY_COPIED	1
#define	SO_EE_ORIGIN_MAX_ZCOOKIES	8

/**
 *	struct scm_timestamping - timestamps exposed through cmsg
+7 −0
Original line number Diff line number Diff line
@@ -104,6 +104,7 @@
#define RDS_CMSG_MASKED_ATOMIC_CSWP	9
#define RDS_CMSG_RXPATH_LATENCY		11
#define	RDS_CMSG_ZCOPY_COOKIE		12
#define	RDS_CMSG_ZCOPY_COMPLETION	13

#define RDS_INFO_FIRST			10000
#define RDS_INFO_COUNTERS		10000
@@ -317,6 +318,12 @@ struct rds_rdma_notify {
#define RDS_RDMA_DROPPED	3
#define RDS_RDMA_OTHER_ERROR	4

#define	RDS_MAX_ZCOOKIES	8
struct rds_zcopy_cookies {
	__u32 num;
	__u32 cookies[RDS_MAX_ZCOOKIES];
};

/*
 * Common set of flags for all RDMA related structs
 */
+5 −2
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
	rds_send_drop_to(rs, NULL);
	rds_rdma_drop_keys(rs);
	rds_notify_queue_get(rs, NULL);
	__skb_queue_purge(&rs->rs_zcookie_queue);

	spin_lock_bh(&rds_sock_lock);
	list_del_init(&rs->rs_item);
@@ -144,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
 *  -	to signal that a previously congested destination may have become
 *	uncongested
 *  -	A notification has been queued to the socket (this can be a congestion
 *	update, or a RDMA completion).
 *	update, or a RDMA completion, or a MSG_ZEROCOPY completion).
 *
 * EPOLLOUT is asserted if there is room on the send queue. This does not mean
 * however, that the next sendmsg() call will succeed. If the application tries
@@ -178,7 +179,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
		spin_unlock(&rs->rs_lock);
	}
	if (!list_empty(&rs->rs_recv_queue) ||
	    !list_empty(&rs->rs_notify_queue))
	    !list_empty(&rs->rs_notify_queue) ||
	    !skb_queue_empty(&rs->rs_zcookie_queue))
		mask |= (EPOLLIN | EPOLLRDNORM);
	if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
		mask |= (EPOLLOUT | EPOLLWRNORM);
@@ -513,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
	INIT_LIST_HEAD(&rs->rs_recv_queue);
	INIT_LIST_HEAD(&rs->rs_notify_queue);
	INIT_LIST_HEAD(&rs->rs_cong_list);
	skb_queue_head_init(&rs->rs_zcookie_queue);
	spin_lock_init(&rs->rs_rdma_lock);
	rs->rs_rdma_keys = RB_ROOT;
	rs->rs_rx_traces = 0;
+16 −22
Original line number Diff line number Diff line
@@ -58,32 +58,26 @@ EXPORT_SYMBOL_GPL(rds_message_addref);

static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
{
	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
	int ncookies;
	u32 *ptr;
	struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb;
	int ncookies = ck->num;

	if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE)
	if (ncookies == RDS_MAX_ZCOOKIES)
		return false;
	ncookies = serr->ee.ee_data;
	if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES)
		return false;
	ptr = skb_put(skb, sizeof(u32));
	*ptr = cookie;
	serr->ee.ee_data = ++ncookies;
	ck->cookies[ncookies] = cookie;
	ck->num =  ++ncookies;
	return true;
}

static void rds_rm_zerocopy_callback(struct rds_sock *rs,
				     struct rds_znotifier *znotif)
{
	struct sock *sk = rds_rs_to_sk(rs);
	struct sk_buff *skb, *tail;
	struct sock_exterr_skb *serr;
	unsigned long flags;
	struct sk_buff_head *q;
	u32 cookie = znotif->z_cookie;
	struct rds_zcopy_cookies *ck;

	q = &sk->sk_error_queue;
	q = &rs->rs_zcookie_queue;
	spin_lock_irqsave(&q->lock, flags);
	tail = skb_peek_tail(q);

@@ -91,22 +85,19 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
		spin_unlock_irqrestore(&q->lock, flags);
		mm_unaccount_pinned_pages(&znotif->z_mmp);
		consume_skb(rds_skb_from_znotifier(znotif));
		sk->sk_error_report(sk);
		/* caller invokes rds_wake_sk_sleep() */
		return;
	}

	skb = rds_skb_from_znotifier(znotif);
	serr = SKB_EXT_ERR(skb);
	memset(&serr->ee, 0, sizeof(serr->ee));
	serr->ee.ee_errno = 0;
	serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE;
	serr->ee.ee_info = 0;
	ck = (struct rds_zcopy_cookies *)skb->cb;
	memset(ck, 0, sizeof(*ck));
	WARN_ON(!skb_zcookie_add(skb, cookie));

	__skb_queue_tail(q, skb);

	spin_unlock_irqrestore(&q->lock, flags);
	sk->sk_error_report(sk);
	/* caller invokes rds_wake_sk_sleep() */

	mm_unaccount_pinned_pages(&znotif->z_mmp);
}
@@ -129,6 +120,7 @@ static void rds_message_purge(struct rds_message *rm)
		if (rm->data.op_mmp_znotifier) {
			zcopy = true;
			rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
			rds_wake_sk_sleep(rs);
			rm->data.op_mmp_znotifier = NULL;
		}
		sock_put(rds_rs_to_sk(rs));
@@ -362,10 +354,12 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
		int total_copied = 0;
		struct sk_buff *skb;

		skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
				GFP_KERNEL);
		skb = alloc_skb(0, GFP_KERNEL);
		if (!skb)
			return -ENOMEM;
		BUILD_BUG_ON(sizeof(skb->cb) <
			     max_t(int, sizeof(struct rds_znotifier),
				   sizeof(struct rds_zcopy_cookies)));
		rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
		if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
					    length)) {
+2 −0
Original line number Diff line number Diff line
@@ -603,6 +603,8 @@ struct rds_sock {
	/* Socket receive path trace points*/
	u8			rs_rx_traces;
	u8			rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];

	struct sk_buff_head	rs_zcookie_queue;
};

static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
Loading