Loading include/linux/sunrpc/svc_rdma.h +7 −5 Original line number Original line Diff line number Diff line Loading @@ -71,7 +71,8 @@ extern atomic_t rdma_stat_sq_prod; * completes. * completes. */ */ struct svc_rdma_op_ctxt { struct svc_rdma_op_ctxt { struct svc_rdma_op_ctxt *next; struct svc_rdma_op_ctxt *read_hdr; struct list_head free_list; struct xdr_buf arg; struct xdr_buf arg; struct list_head dto_q; struct list_head dto_q; enum ib_wr_opcode wr_op; enum ib_wr_opcode wr_op; Loading @@ -85,7 +86,6 @@ struct svc_rdma_op_ctxt { struct page *pages[RPCSVC_MAXPAGES]; struct page *pages[RPCSVC_MAXPAGES]; }; }; #define RDMACTXT_F_READ_DONE 1 #define RDMACTXT_F_LAST_CTXT 2 #define RDMACTXT_F_LAST_CTXT 2 struct svcxprt_rdma { struct svcxprt_rdma { Loading @@ -104,7 +104,8 @@ struct svcxprt_rdma { struct ib_pd *sc_pd; struct ib_pd *sc_pd; struct svc_rdma_op_ctxt *sc_ctxt_head; atomic_t sc_ctxt_used; struct list_head sc_ctxt_free; int sc_ctxt_cnt; int sc_ctxt_cnt; int sc_ctxt_bump; int sc_ctxt_bump; int sc_ctxt_max; int sc_ctxt_max; Loading @@ -123,6 +124,7 @@ struct svcxprt_rdma { struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ struct list_head sc_read_complete_q; struct list_head sc_read_complete_q; spinlock_t sc_read_complete_lock; spinlock_t sc_read_complete_lock; struct work_struct sc_work; }; }; /* sc_flags */ /* sc_flags */ #define RDMAXPRT_RQ_PENDING 1 #define RDMAXPRT_RQ_PENDING 1 Loading Loading @@ -164,7 +166,7 @@ extern int svc_rdma_sendto(struct svc_rqst *); /* svc_rdma_transport.c */ /* svc_rdma_transport.c */ extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode); enum rpcrdma_errcode); struct page *svc_rdma_get_page(void); struct page *svc_rdma_get_page(void); extern int svc_rdma_post_recv(struct svcxprt_rdma *); extern int svc_rdma_post_recv(struct svcxprt_rdma *); Loading net/sunrpc/svc_xprt.c +0 −23 Original line number Original line Diff line number Diff line Loading @@ -6,30 +6,9 @@ #include <linux/sched.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/net.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/udp.h> #include <linux/tcp.h> #include <linux/unistd.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/file.h> #include <linux/freezer.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/kthread.h> #include <net/sock.h> #include <net/sock.h> #include <net/checksum.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/tcp_states.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/svc_xprt.h> Loading Loading @@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) if (!(xprt->xpt_flags & if (!(xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) return; return; if (test_bit(XPT_DEAD, &xprt->xpt_flags)) return; cpu = get_cpu(); cpu = get_cpu(); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); Loading net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +31 −71 Original line number Original line Diff line number Diff line Loading @@ -260,11 +260,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) * On our side, we need to read into a pagelist. The first page immediately * On our side, we need to read into a pagelist. The first page immediately * follows the RPC header. * follows the RPC header. * * * This function returns 1 to indicate success. The data is not yet in * This function returns: * 0 - No error and no read-list found. * * 1 - Successful read-list processing. The data is not yet in * the pagelist and therefore the RPC request must be deferred. The * the pagelist and therefore the RPC request must be deferred. The * I/O completion will enqueue the transport again and * I/O completion will enqueue the transport again and * svc_rdma_recvfrom will complete the request. * svc_rdma_recvfrom will complete the request. * * * <0 - Error processing/posting read-list. * * NOTE: The ctxt must not be touched after the last WR has been posted * NOTE: The ctxt must not be touched after the last WR has been posted * because the I/O completion processing may occur on another * because the I/O completion processing may occur on another * processor and free / modify the context. Ne touche pas! * processor and free / modify the context. Ne touche pas! Loading @@ -284,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, u64 sgl_offset; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_op_ctxt *head; struct svc_rdma_op_ctxt *tmp_sge_ctxt; struct svc_rdma_op_ctxt *tmp_sge_ctxt; struct svc_rdma_op_ctxt *tmp_ch_ctxt; struct svc_rdma_op_ctxt *tmp_ch_ctxt; struct chunk_sge *ch_sge_ary; struct chunk_sge *ch_sge_ary; Loading @@ -302,25 +306,19 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, sge, ch_sge_ary, sge, ch_sge_ary, ch_count, byte_count); ch_count, byte_count); head = svc_rdma_get_context(xprt); sgl_offset = 0; sgl_offset = 0; ch_no = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { ch->rc_discrim != 0; ch++, ch_no++) { next_sge: next_sge: if (!ctxt) ctxt = svc_rdma_get_context(xprt); ctxt = head; else { ctxt->next = svc_rdma_get_context(xprt); ctxt = ctxt->next; } ctxt->next = NULL; ctxt->direction = DMA_FROM_DEVICE; ctxt->direction = DMA_FROM_DEVICE; clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); /* Prepare READ WR */ /* Prepare READ WR */ Loading @@ -347,20 +345,15 @@ next_sge: * the client and the RPC needs to be enqueued. * the client and the RPC needs to be enqueued. */ */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); ctxt->next = hdr_ctxt; ctxt->read_hdr = hdr_ctxt; hdr_ctxt->next = head; } } /* Post the read */ /* Post the read */ err = svc_rdma_send(xprt, &read_wr); err = svc_rdma_send(xprt, &read_wr); if (err) { if (err) { printk(KERN_ERR "svcrdma: Error posting send = %d\n", printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); err); /* set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); * Break the circular list so free knows when svc_rdma_put_context(ctxt, 0); * to stop if the error happened to occur on * the last read */ ctxt->next = NULL; goto out; goto out; } } atomic_inc(&rdma_stat_read); atomic_inc(&rdma_stat_read); Loading @@ -371,7 +364,7 @@ next_sge: goto next_sge; goto next_sge; } } sgl_offset = 0; sgl_offset = 0; err = 0; err = 1; } } out: out: Loading @@ -389,25 +382,12 @@ next_sge: while (rqstp->rq_resused) while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; rqstp->rq_respages[--rqstp->rq_resused] = NULL; if (err) { return err; printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); /* Free the linked list of read contexts */ while (head != NULL) { ctxt = head->next; svc_rdma_put_context(head, 1); head = ctxt; } return 0; } return 1; } } static int rdma_read_complete(struct svc_rqst *rqstp, static int rdma_read_complete(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *data) struct svc_rdma_op_ctxt *head) { { struct svc_rdma_op_ctxt *head = data->next; int page_no; int page_no; int ret; int ret; Loading @@ -433,21 +413,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp, rqstp->rq_arg.len = head->arg.len; rqstp->rq_arg.len = head->arg.len; rqstp->rq_arg.buflen = head->arg.buflen; rqstp->rq_arg.buflen = head->arg.buflen; /* Free the context */ svc_rdma_put_context(head, 0); /* XXX: What should this be? */ /* XXX: What should this be? */ rqstp->rq_prot = IPPROTO_MAX; rqstp->rq_prot = IPPROTO_MAX; svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt); /* * Free the contexts we used to build the RDMA_READ. We have * to be careful here because the context list uses the same * next pointer used to chain the contexts associated with the * RDMA_READ */ data->next = NULL; /* terminate circular list */ do { data = head->next; svc_rdma_put_context(head, 0); head = data; } while (head != NULL); ret = rqstp->rq_arg.head[0].iov_len ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + rqstp->rq_arg.page_len Loading @@ -457,8 +428,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); rqstp->rq_arg.head[0].iov_len); /* Indicate that we've consumed an RQ credit */ rqstp->rq_xprt_ctxt = rqstp->rq_xprt; svc_xprt_received(rqstp->rq_xprt); svc_xprt_received(rqstp->rq_xprt); return ret; return ret; } } Loading @@ -480,13 +449,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) dprintk("svcrdma: rqstp=%p\n", rqstp); dprintk("svcrdma: rqstp=%p\n", rqstp); /* * The rq_xprt_ctxt indicates if we've consumed an RQ credit * or not. It is used in the rdma xpo_release_rqst function to * determine whether or not to return an RQ WQE to the RQ. */ rqstp->rq_xprt_ctxt = NULL; spin_lock_bh(&rdma_xprt->sc_read_complete_lock); spin_lock_bh(&rdma_xprt->sc_read_complete_lock); if (!list_empty(&rdma_xprt->sc_read_complete_q)) { if (!list_empty(&rdma_xprt->sc_read_complete_q)) { ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, Loading Loading @@ -537,21 +499,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) /* If the request is invalid, reply with an error */ /* If the request is invalid, reply with an error */ if (len < 0) { if (len < 0) { if (len == -ENOSYS) if (len == -ENOSYS) (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); goto close_out; goto close_out; } } /* Read read-list data. If we would need to wait, defer /* Read read-list data. */ * it. Not that in this case, we don't return the RQ credit ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt); * until after the read completes. if (ret > 0) { */ /* read-list posted, defer until data received from client. */ if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) { svc_xprt_received(xprt); svc_xprt_received(xprt); return 0; return 0; } } if (ret < 0) { /* Indicate we've consumed an RQ credit */ /* Post of read-list failed, free context. */ rqstp->rq_xprt_ctxt = rqstp->rq_xprt; svc_rdma_put_context(ctxt, 1); return 0; } ret = rqstp->rq_arg.head[0].iov_len ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + rqstp->rq_arg.page_len Loading @@ -569,11 +532,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) return ret; return ret; close_out: close_out: if (ctxt) { if (ctxt) svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1); /* Indicate we've consumed an RQ credit */ rqstp->rq_xprt_ctxt = rqstp->rq_xprt; } dprintk("svcrdma: transport %p is closing\n", xprt); dprintk("svcrdma: transport %p is closing\n", xprt); /* /* * Set the close bit and enqueue it. svc_recv will see the * Set the close bit and enqueue it. svc_recv will see the Loading net/sunrpc/xprtrdma/svc_rdma_sendto.c +11 −0 Original line number Original line Diff line number Diff line Loading @@ -389,6 +389,17 @@ static int send_reply(struct svcxprt_rdma *rdma, int page_no; int page_no; int ret; int ret; /* Post a recv buffer to handle another request. */ ret = svc_rdma_post_recv(rdma); if (ret) { printk(KERN_INFO "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } /* Prepare the context */ /* Prepare the context */ ctxt->pages[0] = page; ctxt->pages[0] = page; ctxt->count = 1; ctxt->count = 1; Loading net/sunrpc/xprtrdma/svc_rdma_transport.c +174 −116 Original line number Original line Diff line number Diff line Loading @@ -103,8 +103,8 @@ static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) spin_lock_bh(&xprt->sc_ctxt_lock); spin_lock_bh(&xprt->sc_ctxt_lock); if (ctxt) { if (ctxt) { at_least_one = 1; at_least_one = 1; ctxt->next = xprt->sc_ctxt_head; INIT_LIST_HEAD(&ctxt->free_list); xprt->sc_ctxt_head = ctxt; list_add(&ctxt->free_list, &xprt->sc_ctxt_free); } else { } else { /* kmalloc failed...give up for now */ /* kmalloc failed...give up for now */ xprt->sc_ctxt_cnt--; xprt->sc_ctxt_cnt--; Loading @@ -123,7 +123,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) while (1) { while (1) { spin_lock_bh(&xprt->sc_ctxt_lock); spin_lock_bh(&xprt->sc_ctxt_lock); if (unlikely(xprt->sc_ctxt_head == NULL)) { if (unlikely(list_empty(&xprt->sc_ctxt_free))) { /* Try to bump my cache. */ /* Try to bump my cache. */ spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock_bh(&xprt->sc_ctxt_lock); Loading @@ -136,12 +136,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); schedule_timeout_uninterruptible(msecs_to_jiffies(500)); continue; continue; } } ctxt = xprt->sc_ctxt_head; ctxt = list_entry(xprt->sc_ctxt_free.next, xprt->sc_ctxt_head = ctxt->next; struct svc_rdma_op_ctxt, free_list); list_del_init(&ctxt->free_list); spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock_bh(&xprt->sc_ctxt_lock); ctxt->xprt = xprt; ctxt->xprt = xprt; INIT_LIST_HEAD(&ctxt->dto_q); INIT_LIST_HEAD(&ctxt->dto_q); ctxt->count = 0; ctxt->count = 0; atomic_inc(&xprt->sc_ctxt_used); break; break; } } return ctxt; return ctxt; Loading @@ -159,14 +162,15 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) put_page(ctxt->pages[i]); put_page(ctxt->pages[i]); for (i = 0; i < ctxt->count; i++) for (i = 0; i < ctxt->count; i++) dma_unmap_single(xprt->sc_cm_id->device->dma_device, ib_dma_unmap_single(xprt->sc_cm_id->device, ctxt->sge[i].addr, ctxt->sge[i].addr, ctxt->sge[i].length, ctxt->sge[i].length, ctxt->direction); ctxt->direction); spin_lock_bh(&xprt->sc_ctxt_lock); spin_lock_bh(&xprt->sc_ctxt_lock); ctxt->next = xprt->sc_ctxt_head; list_add(&ctxt->free_list, &xprt->sc_ctxt_free); xprt->sc_ctxt_head = ctxt; spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock_bh(&xprt->sc_ctxt_lock); atomic_dec(&xprt->sc_ctxt_used); } } /* ib_cq event handler */ /* ib_cq event handler */ Loading Loading @@ -228,23 +232,8 @@ static void dto_tasklet_func(unsigned long data) list_del_init(&xprt->sc_dto_q); list_del_init(&xprt->sc_dto_q); spin_unlock_irqrestore(&dto_lock, flags); spin_unlock_irqrestore(&dto_lock, flags); if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); rq_cq_reap(xprt); rq_cq_reap(xprt); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); /* * If data arrived before established event, * don't enqueue. This defers RPC I/O until the * RDMA connection is complete. */ if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) svc_xprt_enqueue(&xprt->sc_xprt); } if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) { ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); sq_cq_reap(xprt); sq_cq_reap(xprt); } svc_xprt_put(&xprt->sc_xprt); svc_xprt_put(&xprt->sc_xprt); spin_lock_irqsave(&dto_lock, flags); spin_lock_irqsave(&dto_lock, flags); Loading @@ -263,6 +252,10 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) struct svcxprt_rdma *xprt = cq_context; struct svcxprt_rdma *xprt = cq_context; unsigned long flags; unsigned long flags; /* Guard against unconditional flush call for destroyed QP */ if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0) return; /* /* * Set the bit regardless of whether or not it's on the list * Set the bit regardless of whether or not it's on the list * because it may be on the list already due to an SQ * because it may be on the list already due to an SQ Loading Loading @@ -290,6 +283,8 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) * * * Take all completing WC off the CQE and enqueue the associated DTO * Take all completing WC off the CQE and enqueue the associated DTO * context on the dto_q for the transport. * context on the dto_q for the transport. * * Note that caller must hold a transport reference. */ */ static void rq_cq_reap(struct svcxprt_rdma *xprt) static void rq_cq_reap(struct svcxprt_rdma *xprt) { { Loading @@ -297,29 +292,47 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) struct ib_wc wc; struct ib_wc wc; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_op_ctxt *ctxt = NULL; if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_rq_poll); atomic_inc(&rdma_stat_rq_poll); spin_lock_bh(&xprt->sc_rq_dto_lock); while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt->wc_status = wc.status; ctxt->wc_status = wc.status; ctxt->byte_len = wc.byte_len; ctxt->byte_len = wc.byte_len; if (wc.status != IB_WC_SUCCESS) { if (wc.status != IB_WC_SUCCESS) { /* Close the transport */ /* Close the transport */ dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); continue; continue; } } spin_lock_bh(&xprt->sc_rq_dto_lock); list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); } spin_unlock_bh(&xprt->sc_rq_dto_lock); spin_unlock_bh(&xprt->sc_rq_dto_lock); svc_xprt_put(&xprt->sc_xprt); } if (ctxt) if (ctxt) atomic_inc(&rdma_stat_rq_prod); atomic_inc(&rdma_stat_rq_prod); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); /* * If data arrived before established event, * don't enqueue. This defers RPC I/O until the * RDMA connection is complete. */ if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) svc_xprt_enqueue(&xprt->sc_xprt); } } /* /* * Send Queue Completion Handler - potentially called on interrupt context. * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ */ static void sq_cq_reap(struct svcxprt_rdma *xprt) static void sq_cq_reap(struct svcxprt_rdma *xprt) { { Loading @@ -328,6 +341,11 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) struct ib_cq *cq = xprt->sc_sq_cq; struct ib_cq *cq = xprt->sc_sq_cq; int ret; int ret; if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; Loading @@ -349,14 +367,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) case IB_WR_RDMA_READ: case IB_WR_RDMA_READ: if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; BUG_ON(!read_hdr); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); spin_lock_bh(&xprt->sc_read_complete_lock); spin_lock_bh(&xprt->sc_read_complete_lock); list_add_tail(&ctxt->dto_q, list_add_tail(&read_hdr->dto_q, &xprt->sc_read_complete_q); &xprt->sc_read_complete_q); spin_unlock_bh(&xprt->sc_read_complete_lock); spin_unlock_bh(&xprt->sc_read_complete_lock); svc_xprt_enqueue(&xprt->sc_xprt); svc_xprt_enqueue(&xprt->sc_xprt); } } svc_rdma_put_context(ctxt, 0); break; break; default: default: Loading @@ -365,6 +385,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) wc.opcode, wc.status); wc.opcode, wc.status); break; break; } } svc_xprt_put(&xprt->sc_xprt); } } if (ctxt) if (ctxt) Loading @@ -376,6 +397,10 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context) struct svcxprt_rdma *xprt = cq_context; struct svcxprt_rdma *xprt = cq_context; unsigned long flags; unsigned long flags; /* Guard against unconditional flush call for destroyed QP */ if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0) return; /* /* * Set the bit regardless of whether or not it's on the list * Set the bit regardless of whether or not it's on the list * because it may be on the list already due to an RQ * because it may be on the list already due to an RQ Loading Loading @@ -407,28 +432,29 @@ static void create_context_cache(struct svcxprt_rdma *xprt, xprt->sc_ctxt_max = ctxt_max; xprt->sc_ctxt_max = ctxt_max; xprt->sc_ctxt_bump = ctxt_bump; xprt->sc_ctxt_bump = ctxt_bump; xprt->sc_ctxt_cnt = 0; xprt->sc_ctxt_cnt = 0; xprt->sc_ctxt_head = NULL; atomic_set(&xprt->sc_ctxt_used, 0); INIT_LIST_HEAD(&xprt->sc_ctxt_free); for (i = 0; i < ctxt_count; i++) { for (i = 0; i < ctxt_count; i++) { ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt) { if (ctxt) { ctxt->next = xprt->sc_ctxt_head; INIT_LIST_HEAD(&ctxt->free_list); xprt->sc_ctxt_head = ctxt; list_add(&ctxt->free_list, &xprt->sc_ctxt_free); xprt->sc_ctxt_cnt++; xprt->sc_ctxt_cnt++; } } } } } } static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt) static void destroy_context_cache(struct svcxprt_rdma *xprt) { { struct svc_rdma_op_ctxt *next; while (!list_empty(&xprt->sc_ctxt_free)) { if (!ctxt) struct svc_rdma_op_ctxt *ctxt; return; ctxt = list_entry(xprt->sc_ctxt_free.next, struct svc_rdma_op_ctxt, do { free_list); next = ctxt->next; list_del_init(&ctxt->free_list); kfree(ctxt); kfree(ctxt); ctxt = next; } } while (next); } } static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, Loading Loading @@ -465,7 +491,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, reqs + reqs + cma_xprt->sc_sq_depth + cma_xprt->sc_sq_depth + RPCRDMA_MAX_THREADS + 1); /* max */ RPCRDMA_MAX_THREADS + 1); /* max */ if (!cma_xprt->sc_ctxt_head) { if (list_empty(&cma_xprt->sc_ctxt_free)) { kfree(cma_xprt); kfree(cma_xprt); return NULL; return NULL; } } Loading Loading @@ -520,7 +546,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) recv_wr.num_sge = ctxt->count; recv_wr.num_sge = ctxt->count; recv_wr.wr_id = (u64)(unsigned long)ctxt; recv_wr.wr_id = (u64)(unsigned long)ctxt; svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { svc_xprt_put(&xprt->sc_xprt); svc_rdma_put_context(ctxt, 1); } return ret; return ret; } } Loading @@ -539,6 +570,7 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id) { { struct svcxprt_rdma *listen_xprt = new_cma_id->context; struct svcxprt_rdma *listen_xprt = new_cma_id->context; struct svcxprt_rdma *newxprt; struct svcxprt_rdma *newxprt; struct sockaddr *sa; /* Create a new transport */ /* Create a new transport */ newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); Loading @@ -551,6 +583,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id) dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", newxprt, newxprt->sc_cm_id, listen_xprt); newxprt, newxprt->sc_cm_id, listen_xprt); /* Set the local and remote addresses in the transport */ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); /* /* * Enqueue the new transport on the accept queue of the listening * Enqueue the new transport on the accept queue of the listening * transport * transport Loading Loading @@ -627,6 +665,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, if (xprt) { if (xprt) { set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } } break; break; case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DEVICE_REMOVAL: Loading Loading @@ -661,31 +700,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, cma_xprt = rdma_create_xprt(serv, 1); cma_xprt = rdma_create_xprt(serv, 1); if (!cma_xprt) if (!cma_xprt) return ERR_PTR(ENOMEM); return ERR_PTR(-ENOMEM); xprt = &cma_xprt->sc_xprt; xprt = &cma_xprt->sc_xprt; listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); if (IS_ERR(listen_id)) { if (IS_ERR(listen_id)) { svc_xprt_put(&cma_xprt->sc_xprt); ret = PTR_ERR(listen_id); dprintk("svcrdma: rdma_create_id failed = %ld\n", dprintk("svcrdma: rdma_create_id failed = %d\n", ret); PTR_ERR(listen_id)); goto err0; return (void *)listen_id; } } ret = rdma_bind_addr(listen_id, sa); ret = rdma_bind_addr(listen_id, sa); if (ret) { if (ret) { rdma_destroy_id(listen_id); svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); return ERR_PTR(ret); goto err1; } } cma_xprt->sc_cm_id = listen_id; cma_xprt->sc_cm_id = listen_id; ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); if (ret) { if (ret) { rdma_destroy_id(listen_id); svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_listen failed = %d\n", ret); dprintk("svcrdma: rdma_listen failed = %d\n", ret); return ERR_PTR(ret); goto err1; } } /* /* Loading @@ -696,6 +731,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); return &cma_xprt->sc_xprt; return &cma_xprt->sc_xprt; err1: rdma_destroy_id(listen_id); err0: kfree(cma_xprt); return ERR_PTR(ret); } } /* /* Loading @@ -716,7 +757,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rdma_conn_param conn_param; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; struct ib_device_attr devattr; struct sockaddr *sa; int ret; int ret; int i; int i; Loading Loading @@ -826,7 +866,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; } } svc_xprt_get(&newxprt->sc_xprt); newxprt->sc_qp = newxprt->sc_cm_id->qp; newxprt->sc_qp = newxprt->sc_cm_id->qp; /* Register all of physical memory */ /* Register all of physical memory */ Loading @@ -850,6 +889,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Swap out the handler */ /* Swap out the handler */ newxprt->sc_cm_id->event_handler = rdma_cma_handler; newxprt->sc_cm_id->event_handler = rdma_cma_handler; /* * Arm the CQs for the SQ and RQ before accepting so we can't * miss the first message */ ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); /* Accept Connection */ /* Accept Connection */ set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); memset(&conn_param, 0, sizeof conn_param); memset(&conn_param, 0, sizeof conn_param); Loading Loading @@ -886,58 +932,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_requests, newxprt->sc_max_requests, newxprt->sc_ord); newxprt->sc_ord); /* Set the local and remote addresses in the transport */ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); return &newxprt->sc_xprt; return &newxprt->sc_xprt; errout: errout: dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); /* Take a reference in case the DTO handler runs */ /* Take a reference in case the DTO handler runs */ svc_xprt_get(&newxprt->sc_xprt); svc_xprt_get(&newxprt->sc_xprt); if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) ib_destroy_qp(newxprt->sc_qp); ib_destroy_qp(newxprt->sc_qp); svc_xprt_put(&newxprt->sc_xprt); } rdma_destroy_id(newxprt->sc_cm_id); rdma_destroy_id(newxprt->sc_cm_id); /* This call to put will destroy the transport */ /* This call to put will destroy the transport */ svc_xprt_put(&newxprt->sc_xprt); svc_xprt_put(&newxprt->sc_xprt); return NULL; return NULL; } } /* * Post an RQ WQE to the RQ when the rqst is being released. This * effectively returns an RQ credit to the client. The rq_xprt_ctxt * will be null if the request is deferred due to an RDMA_READ or the * transport had no data ready (EAGAIN). Note that an RPC deferred in * svc_process will still return the credit, this is because the data * is copied and no longer consume a WQE/WC. */ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) static void svc_rdma_release_rqst(struct svc_rqst *rqstp) { { int err; struct svcxprt_rdma *rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); if (rqstp->rq_xprt_ctxt) { BUG_ON(rqstp->rq_xprt_ctxt != rdma); err = svc_rdma_post_recv(rdma); if (err) dprintk("svcrdma: failed to post an RQ WQE error=%d\n", err); } rqstp->rq_xprt_ctxt = NULL; } } /* /* * When connected, an svc_xprt has at least three references: * When connected, an svc_xprt has at least two references: * * - A reference held by the QP. We still hold that here because this * code deletes the QP and puts the reference. * * * - A reference held by the cm_id between the ESTABLISHED and * - A reference held by the cm_id between the ESTABLISHED and * DISCONNECTED events. If the remote peer disconnected first, this * DISCONNECTED events. If the remote peer disconnected first, this Loading @@ -946,7 +960,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) * - A reference held by the svc_recv code that called this function * - A reference held by the svc_recv code that called this function * as part of close processing. * as part of close processing. * * * At a minimum two references should still be held. * At a minimum one references should still be held. */ */ static void svc_rdma_detach(struct svc_xprt *xprt) static void svc_rdma_detach(struct svc_xprt *xprt) { { Loading @@ -956,23 +970,53 @@ static void svc_rdma_detach(struct svc_xprt *xprt) /* Disconnect and flush posted WQE */ /* Disconnect and flush posted WQE */ rdma_disconnect(rdma->sc_cm_id); rdma_disconnect(rdma->sc_cm_id); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) { ib_destroy_qp(rdma->sc_qp); svc_xprt_put(xprt); } /* Destroy the CM ID */ rdma_destroy_id(rdma->sc_cm_id); } } static void svc_rdma_free(struct svc_xprt *xprt) static void __svc_rdma_free(struct work_struct *work) { { struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; struct svcxprt_rdma *rdma = container_of(work, struct svcxprt_rdma, sc_work); dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); /* We should only be called from kref_put */ /* We should only be called from kref_put */ BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0); /* * Destroy queued, but not processed read completions. Note * that this cleanup has to be done before destroying the * cm_id because the device ptr is needed to unmap the dma in * svc_rdma_put_context. */ spin_lock_bh(&rdma->sc_read_complete_lock); while (!list_empty(&rdma->sc_read_complete_q)) { struct svc_rdma_op_ctxt *ctxt; ctxt = list_entry(rdma->sc_read_complete_q.next, struct svc_rdma_op_ctxt, dto_q); list_del_init(&ctxt->dto_q); svc_rdma_put_context(ctxt, 1); } spin_unlock_bh(&rdma->sc_read_complete_lock); /* Destroy queued, but not processed recv completions */ spin_lock_bh(&rdma->sc_rq_dto_lock); while (!list_empty(&rdma->sc_rq_dto_q)) { struct svc_rdma_op_ctxt *ctxt; ctxt = list_entry(rdma->sc_rq_dto_q.next, struct svc_rdma_op_ctxt, dto_q); list_del_init(&ctxt->dto_q); svc_rdma_put_context(ctxt, 1); } spin_unlock_bh(&rdma->sc_rq_dto_lock); /* Warn if we leaked a resource or under-referenced */ WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_destroy_qp(rdma->sc_qp); if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) ib_destroy_cq(rdma->sc_sq_cq); ib_destroy_cq(rdma->sc_sq_cq); Loading @@ -985,10 +1029,21 @@ static void svc_rdma_free(struct svc_xprt *xprt) if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) ib_dealloc_pd(rdma->sc_pd); ib_dealloc_pd(rdma->sc_pd); destroy_context_cache(rdma->sc_ctxt_head); /* Destroy the CM ID */ rdma_destroy_id(rdma->sc_cm_id); destroy_context_cache(rdma); kfree(rdma); kfree(rdma); } } static void svc_rdma_free(struct svc_xprt *xprt) { struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); INIT_WORK(&rdma->sc_work, __svc_rdma_free); schedule_work(&rdma->sc_work); } static int svc_rdma_has_wspace(struct svc_xprt *xprt) static int svc_rdma_has_wspace(struct svc_xprt *xprt) { { struct svcxprt_rdma *rdma = struct svcxprt_rdma *rdma = Loading Loading @@ -1018,7 +1073,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) int ret; int ret; if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) return 0; return -ENOTCONN; BUG_ON(wr->send_flags != IB_SEND_SIGNALED); BUG_ON(wr->send_flags != IB_SEND_SIGNALED); BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != Loading @@ -1029,7 +1084,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { spin_unlock_bh(&xprt->sc_lock); spin_unlock_bh(&xprt->sc_lock); atomic_inc(&rdma_stat_sq_starve); atomic_inc(&rdma_stat_sq_starve); /* See if we can reap some SQ WR */ /* See if we can opportunistically reap SQ WR to make room */ sq_cq_reap(xprt); sq_cq_reap(xprt); /* Wait until SQ WR available if SQ still full */ /* Wait until SQ WR available if SQ still full */ Loading @@ -1041,21 +1097,24 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) continue; continue; } } /* Bumped used SQ WR count and post */ /* Bumped used SQ WR count and post */ svc_xprt_get(&xprt->sc_xprt); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); if (!ret) if (!ret) atomic_inc(&xprt->sc_sq_count); atomic_inc(&xprt->sc_sq_count); else else { svc_xprt_put(&xprt->sc_xprt); dprintk("svcrdma: failed to post SQ WR rc=%d, " dprintk("svcrdma: failed to post SQ WR rc=%d, " "sc_sq_count=%d, sc_sq_depth=%d\n", "sc_sq_count=%d, sc_sq_depth=%d\n", ret, atomic_read(&xprt->sc_sq_count), ret, atomic_read(&xprt->sc_sq_count), xprt->sc_sq_depth); xprt->sc_sq_depth); } spin_unlock_bh(&xprt->sc_lock); spin_unlock_bh(&xprt->sc_lock); break; break; } } return ret; return ret; } } int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, enum rpcrdma_errcode err) enum rpcrdma_errcode err) { { struct ib_send_wr err_wr; struct ib_send_wr err_wr; Loading Loading @@ -1094,9 +1153,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, /* Post It */ /* Post It */ ret = svc_rdma_send(xprt, &err_wr); ret = svc_rdma_send(xprt, &err_wr); if (ret) { if (ret) { dprintk("svcrdma: Error posting send = %d\n", ret); dprintk("svcrdma: Error %d posting send for protocol error\n", ret); svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1); } } return ret; } } Loading
include/linux/sunrpc/svc_rdma.h +7 −5 Original line number Original line Diff line number Diff line Loading @@ -71,7 +71,8 @@ extern atomic_t rdma_stat_sq_prod; * completes. * completes. */ */ struct svc_rdma_op_ctxt { struct svc_rdma_op_ctxt { struct svc_rdma_op_ctxt *next; struct svc_rdma_op_ctxt *read_hdr; struct list_head free_list; struct xdr_buf arg; struct xdr_buf arg; struct list_head dto_q; struct list_head dto_q; enum ib_wr_opcode wr_op; enum ib_wr_opcode wr_op; Loading @@ -85,7 +86,6 @@ struct svc_rdma_op_ctxt { struct page *pages[RPCSVC_MAXPAGES]; struct page *pages[RPCSVC_MAXPAGES]; }; }; #define RDMACTXT_F_READ_DONE 1 #define RDMACTXT_F_LAST_CTXT 2 #define RDMACTXT_F_LAST_CTXT 2 struct svcxprt_rdma { struct svcxprt_rdma { Loading @@ -104,7 +104,8 @@ struct svcxprt_rdma { struct ib_pd *sc_pd; struct ib_pd *sc_pd; struct svc_rdma_op_ctxt *sc_ctxt_head; atomic_t sc_ctxt_used; struct list_head sc_ctxt_free; int sc_ctxt_cnt; int sc_ctxt_cnt; int sc_ctxt_bump; int sc_ctxt_bump; int sc_ctxt_max; int sc_ctxt_max; Loading @@ -123,6 +124,7 @@ struct svcxprt_rdma { struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ struct list_head sc_read_complete_q; struct list_head sc_read_complete_q; spinlock_t sc_read_complete_lock; spinlock_t sc_read_complete_lock; struct work_struct sc_work; }; }; /* sc_flags */ /* sc_flags */ #define RDMAXPRT_RQ_PENDING 1 #define RDMAXPRT_RQ_PENDING 1 Loading Loading @@ -164,7 +166,7 @@ extern int svc_rdma_sendto(struct svc_rqst *); /* svc_rdma_transport.c */ /* svc_rdma_transport.c */ extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode); enum rpcrdma_errcode); struct page *svc_rdma_get_page(void); struct page *svc_rdma_get_page(void); extern int svc_rdma_post_recv(struct svcxprt_rdma *); extern int svc_rdma_post_recv(struct svcxprt_rdma *); Loading
net/sunrpc/svc_xprt.c +0 −23 Original line number Original line Diff line number Diff line Loading @@ -6,30 +6,9 @@ #include <linux/sched.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/net.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/udp.h> #include <linux/tcp.h> #include <linux/unistd.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/file.h> #include <linux/freezer.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/kthread.h> #include <net/sock.h> #include <net/sock.h> #include <net/checksum.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/tcp_states.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/svc_xprt.h> Loading Loading @@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) if (!(xprt->xpt_flags & if (!(xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) return; return; if (test_bit(XPT_DEAD, &xprt->xpt_flags)) return; cpu = get_cpu(); cpu = get_cpu(); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); Loading
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +31 −71 Original line number Original line Diff line number Diff line Loading @@ -260,11 +260,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) * On our side, we need to read into a pagelist. The first page immediately * On our side, we need to read into a pagelist. The first page immediately * follows the RPC header. * follows the RPC header. * * * This function returns 1 to indicate success. The data is not yet in * This function returns: * 0 - No error and no read-list found. * * 1 - Successful read-list processing. The data is not yet in * the pagelist and therefore the RPC request must be deferred. The * the pagelist and therefore the RPC request must be deferred. The * I/O completion will enqueue the transport again and * I/O completion will enqueue the transport again and * svc_rdma_recvfrom will complete the request. * svc_rdma_recvfrom will complete the request. * * * <0 - Error processing/posting read-list. * * NOTE: The ctxt must not be touched after the last WR has been posted * NOTE: The ctxt must not be touched after the last WR has been posted * because the I/O completion processing may occur on another * because the I/O completion processing may occur on another * processor and free / modify the context. Ne touche pas! * processor and free / modify the context. Ne touche pas! Loading @@ -284,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, u64 sgl_offset; u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_op_ctxt *head; struct svc_rdma_op_ctxt *tmp_sge_ctxt; struct svc_rdma_op_ctxt *tmp_sge_ctxt; struct svc_rdma_op_ctxt *tmp_ch_ctxt; struct svc_rdma_op_ctxt *tmp_ch_ctxt; struct chunk_sge *ch_sge_ary; struct chunk_sge *ch_sge_ary; Loading @@ -302,25 +306,19 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, sge, ch_sge_ary, sge, ch_sge_ary, ch_count, byte_count); ch_count, byte_count); head = svc_rdma_get_context(xprt); sgl_offset = 0; sgl_offset = 0; ch_no = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { ch->rc_discrim != 0; ch++, ch_no++) { next_sge: next_sge: if (!ctxt) ctxt = svc_rdma_get_context(xprt); ctxt = head; else { ctxt->next = svc_rdma_get_context(xprt); ctxt = ctxt->next; } ctxt->next = NULL; ctxt->direction = DMA_FROM_DEVICE; ctxt->direction = DMA_FROM_DEVICE; clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); /* Prepare READ WR */ /* Prepare READ WR */ Loading @@ -347,20 +345,15 @@ next_sge: * the client and the RPC needs to be enqueued. * the client and the RPC needs to be enqueued. */ */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); ctxt->next = hdr_ctxt; ctxt->read_hdr = hdr_ctxt; hdr_ctxt->next = head; } } /* Post the read */ /* Post the read */ err = svc_rdma_send(xprt, &read_wr); err = svc_rdma_send(xprt, &read_wr); if (err) { if (err) { printk(KERN_ERR "svcrdma: Error posting send = %d\n", printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); err); /* set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); * Break the circular list so free knows when svc_rdma_put_context(ctxt, 0); * to stop if the error happened to occur on * the last read */ ctxt->next = NULL; goto out; goto out; } } atomic_inc(&rdma_stat_read); atomic_inc(&rdma_stat_read); Loading @@ -371,7 +364,7 @@ next_sge: goto next_sge; goto next_sge; } } sgl_offset = 0; sgl_offset = 0; err = 0; err = 1; } } out: out: Loading @@ -389,25 +382,12 @@ next_sge: while (rqstp->rq_resused) while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; rqstp->rq_respages[--rqstp->rq_resused] = NULL; if (err) { return err; printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); /* Free the linked list of read contexts */ while (head != NULL) { ctxt = head->next; svc_rdma_put_context(head, 1); head = ctxt; } return 0; } return 1; } } static int rdma_read_complete(struct svc_rqst *rqstp, static int rdma_read_complete(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *data) struct svc_rdma_op_ctxt *head) { { struct svc_rdma_op_ctxt *head = data->next; int page_no; int page_no; int ret; int ret; Loading @@ -433,21 +413,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp, rqstp->rq_arg.len = head->arg.len; rqstp->rq_arg.len = head->arg.len; rqstp->rq_arg.buflen = head->arg.buflen; rqstp->rq_arg.buflen = head->arg.buflen; /* Free the context */ svc_rdma_put_context(head, 0); /* XXX: What should this be? */ /* XXX: What should this be? */ rqstp->rq_prot = IPPROTO_MAX; rqstp->rq_prot = IPPROTO_MAX; svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt); /* * Free the contexts we used to build the RDMA_READ. We have * to be careful here because the context list uses the same * next pointer used to chain the contexts associated with the * RDMA_READ */ data->next = NULL; /* terminate circular list */ do { data = head->next; svc_rdma_put_context(head, 0); head = data; } while (head != NULL); ret = rqstp->rq_arg.head[0].iov_len ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + rqstp->rq_arg.page_len Loading @@ -457,8 +428,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); rqstp->rq_arg.head[0].iov_len); /* Indicate that we've consumed an RQ credit */ rqstp->rq_xprt_ctxt = rqstp->rq_xprt; svc_xprt_received(rqstp->rq_xprt); svc_xprt_received(rqstp->rq_xprt); return ret; return ret; } } Loading @@ -480,13 +449,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) dprintk("svcrdma: rqstp=%p\n", rqstp); dprintk("svcrdma: rqstp=%p\n", rqstp); /* * The rq_xprt_ctxt indicates if we've consumed an RQ credit * or not. It is used in the rdma xpo_release_rqst function to * determine whether or not to return an RQ WQE to the RQ. */ rqstp->rq_xprt_ctxt = NULL; spin_lock_bh(&rdma_xprt->sc_read_complete_lock); spin_lock_bh(&rdma_xprt->sc_read_complete_lock); if (!list_empty(&rdma_xprt->sc_read_complete_q)) { if (!list_empty(&rdma_xprt->sc_read_complete_q)) { ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, Loading Loading @@ -537,21 +499,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) /* If the request is invalid, reply with an error */ /* If the request is invalid, reply with an error */ if (len < 0) { if (len < 0) { if (len == -ENOSYS) if (len == -ENOSYS) (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); goto close_out; goto close_out; } } /* Read read-list data. If we would need to wait, defer /* Read read-list data. */ * it. Not that in this case, we don't return the RQ credit ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt); * until after the read completes. if (ret > 0) { */ /* read-list posted, defer until data received from client. */ if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) { svc_xprt_received(xprt); svc_xprt_received(xprt); return 0; return 0; } } if (ret < 0) { /* Indicate we've consumed an RQ credit */ /* Post of read-list failed, free context. */ rqstp->rq_xprt_ctxt = rqstp->rq_xprt; svc_rdma_put_context(ctxt, 1); return 0; } ret = rqstp->rq_arg.head[0].iov_len ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len + rqstp->rq_arg.page_len Loading @@ -569,11 +532,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) return ret; return ret; close_out: close_out: if (ctxt) { if (ctxt) svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1); /* Indicate we've consumed an RQ credit */ rqstp->rq_xprt_ctxt = rqstp->rq_xprt; } dprintk("svcrdma: transport %p is closing\n", xprt); dprintk("svcrdma: transport %p is closing\n", xprt); /* /* * Set the close bit and enqueue it. svc_recv will see the * Set the close bit and enqueue it. svc_recv will see the Loading
net/sunrpc/xprtrdma/svc_rdma_sendto.c +11 −0 Original line number Original line Diff line number Diff line Loading @@ -389,6 +389,17 @@ static int send_reply(struct svcxprt_rdma *rdma, int page_no; int page_no; int ret; int ret; /* Post a recv buffer to handle another request. */ ret = svc_rdma_post_recv(rdma); if (ret) { printk(KERN_INFO "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } /* Prepare the context */ /* Prepare the context */ ctxt->pages[0] = page; ctxt->pages[0] = page; ctxt->count = 1; ctxt->count = 1; Loading
net/sunrpc/xprtrdma/svc_rdma_transport.c +174 −116 Original line number Original line Diff line number Diff line Loading @@ -103,8 +103,8 @@ static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) spin_lock_bh(&xprt->sc_ctxt_lock); spin_lock_bh(&xprt->sc_ctxt_lock); if (ctxt) { if (ctxt) { at_least_one = 1; at_least_one = 1; ctxt->next = xprt->sc_ctxt_head; INIT_LIST_HEAD(&ctxt->free_list); xprt->sc_ctxt_head = ctxt; list_add(&ctxt->free_list, &xprt->sc_ctxt_free); } else { } else { /* kmalloc failed...give up for now */ /* kmalloc failed...give up for now */ xprt->sc_ctxt_cnt--; xprt->sc_ctxt_cnt--; Loading @@ -123,7 +123,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) while (1) { while (1) { spin_lock_bh(&xprt->sc_ctxt_lock); spin_lock_bh(&xprt->sc_ctxt_lock); if (unlikely(xprt->sc_ctxt_head == NULL)) { if (unlikely(list_empty(&xprt->sc_ctxt_free))) { /* Try to bump my cache. */ /* Try to bump my cache. */ spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock_bh(&xprt->sc_ctxt_lock); Loading @@ -136,12 +136,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); schedule_timeout_uninterruptible(msecs_to_jiffies(500)); continue; continue; } } ctxt = xprt->sc_ctxt_head; ctxt = list_entry(xprt->sc_ctxt_free.next, xprt->sc_ctxt_head = ctxt->next; struct svc_rdma_op_ctxt, free_list); list_del_init(&ctxt->free_list); spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock_bh(&xprt->sc_ctxt_lock); ctxt->xprt = xprt; ctxt->xprt = xprt; INIT_LIST_HEAD(&ctxt->dto_q); INIT_LIST_HEAD(&ctxt->dto_q); ctxt->count = 0; ctxt->count = 0; atomic_inc(&xprt->sc_ctxt_used); break; break; } } return ctxt; return ctxt; Loading @@ -159,14 +162,15 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) put_page(ctxt->pages[i]); put_page(ctxt->pages[i]); for (i = 0; i < ctxt->count; i++) for (i = 0; i < ctxt->count; i++) dma_unmap_single(xprt->sc_cm_id->device->dma_device, ib_dma_unmap_single(xprt->sc_cm_id->device, ctxt->sge[i].addr, ctxt->sge[i].addr, ctxt->sge[i].length, ctxt->sge[i].length, ctxt->direction); ctxt->direction); spin_lock_bh(&xprt->sc_ctxt_lock); spin_lock_bh(&xprt->sc_ctxt_lock); ctxt->next = xprt->sc_ctxt_head; list_add(&ctxt->free_list, &xprt->sc_ctxt_free); xprt->sc_ctxt_head = ctxt; spin_unlock_bh(&xprt->sc_ctxt_lock); spin_unlock_bh(&xprt->sc_ctxt_lock); atomic_dec(&xprt->sc_ctxt_used); } } /* ib_cq event handler */ /* ib_cq event handler */ Loading Loading @@ -228,23 +232,8 @@ static void dto_tasklet_func(unsigned long data) list_del_init(&xprt->sc_dto_q); list_del_init(&xprt->sc_dto_q); spin_unlock_irqrestore(&dto_lock, flags); spin_unlock_irqrestore(&dto_lock, flags); if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); rq_cq_reap(xprt); rq_cq_reap(xprt); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); /* * If data arrived before established event, * don't enqueue. This defers RPC I/O until the * RDMA connection is complete. */ if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) svc_xprt_enqueue(&xprt->sc_xprt); } if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) { ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); sq_cq_reap(xprt); sq_cq_reap(xprt); } svc_xprt_put(&xprt->sc_xprt); svc_xprt_put(&xprt->sc_xprt); spin_lock_irqsave(&dto_lock, flags); spin_lock_irqsave(&dto_lock, flags); Loading @@ -263,6 +252,10 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) struct svcxprt_rdma *xprt = cq_context; struct svcxprt_rdma *xprt = cq_context; unsigned long flags; unsigned long flags; /* Guard against unconditional flush call for destroyed QP */ if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0) return; /* /* * Set the bit regardless of whether or not it's on the list * Set the bit regardless of whether or not it's on the list * because it may be on the list already due to an SQ * because it may be on the list already due to an SQ Loading Loading @@ -290,6 +283,8 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) * * * Take all completing WC off the CQE and enqueue the associated DTO * Take all completing WC off the CQE and enqueue the associated DTO * context on the dto_q for the transport. * context on the dto_q for the transport. * * Note that caller must hold a transport reference. */ */ static void rq_cq_reap(struct svcxprt_rdma *xprt) static void rq_cq_reap(struct svcxprt_rdma *xprt) { { Loading @@ -297,29 +292,47 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) struct ib_wc wc; struct ib_wc wc; struct svc_rdma_op_ctxt *ctxt = NULL; struct svc_rdma_op_ctxt *ctxt = NULL; if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_rq_poll); atomic_inc(&rdma_stat_rq_poll); spin_lock_bh(&xprt->sc_rq_dto_lock); while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt->wc_status = wc.status; ctxt->wc_status = wc.status; ctxt->byte_len = wc.byte_len; ctxt->byte_len = wc.byte_len; if (wc.status != IB_WC_SUCCESS) { if (wc.status != IB_WC_SUCCESS) { /* Close the transport */ /* Close the transport */ dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1); svc_xprt_put(&xprt->sc_xprt); continue; continue; } } spin_lock_bh(&xprt->sc_rq_dto_lock); list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); } spin_unlock_bh(&xprt->sc_rq_dto_lock); spin_unlock_bh(&xprt->sc_rq_dto_lock); svc_xprt_put(&xprt->sc_xprt); } if (ctxt) if (ctxt) atomic_inc(&rdma_stat_rq_prod); atomic_inc(&rdma_stat_rq_prod); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); /* * If data arrived before established event, * don't enqueue. This defers RPC I/O until the * RDMA connection is complete. */ if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) svc_xprt_enqueue(&xprt->sc_xprt); } } /* /* * Send Queue Completion Handler - potentially called on interrupt context. * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. */ */ static void sq_cq_reap(struct svcxprt_rdma *xprt) static void sq_cq_reap(struct svcxprt_rdma *xprt) { { Loading @@ -328,6 +341,11 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) struct ib_cq *cq = xprt->sc_sq_cq; struct ib_cq *cq = xprt->sc_sq_cq; int ret; int ret; if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; Loading @@ -349,14 +367,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) case IB_WR_RDMA_READ: case IB_WR_RDMA_READ: if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; BUG_ON(!read_hdr); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); spin_lock_bh(&xprt->sc_read_complete_lock); spin_lock_bh(&xprt->sc_read_complete_lock); list_add_tail(&ctxt->dto_q, list_add_tail(&read_hdr->dto_q, &xprt->sc_read_complete_q); &xprt->sc_read_complete_q); spin_unlock_bh(&xprt->sc_read_complete_lock); spin_unlock_bh(&xprt->sc_read_complete_lock); svc_xprt_enqueue(&xprt->sc_xprt); svc_xprt_enqueue(&xprt->sc_xprt); } } svc_rdma_put_context(ctxt, 0); break; break; default: default: Loading @@ -365,6 +385,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) wc.opcode, wc.status); wc.opcode, wc.status); break; break; } } svc_xprt_put(&xprt->sc_xprt); } } if (ctxt) if (ctxt) Loading @@ -376,6 +397,10 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context) struct svcxprt_rdma *xprt = cq_context; struct svcxprt_rdma *xprt = cq_context; unsigned long flags; unsigned long flags; /* Guard against unconditional flush call for destroyed QP */ if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0) return; /* /* * Set the bit regardless of whether or not it's on the list * Set the bit regardless of whether or not it's on the list * because it may be on the list already due to an RQ * because it may be on the list already due to an RQ Loading Loading @@ -407,28 +432,29 @@ static void create_context_cache(struct svcxprt_rdma *xprt, xprt->sc_ctxt_max = ctxt_max; xprt->sc_ctxt_max = ctxt_max; xprt->sc_ctxt_bump = ctxt_bump; xprt->sc_ctxt_bump = ctxt_bump; xprt->sc_ctxt_cnt = 0; xprt->sc_ctxt_cnt = 0; xprt->sc_ctxt_head = NULL; atomic_set(&xprt->sc_ctxt_used, 0); INIT_LIST_HEAD(&xprt->sc_ctxt_free); for (i = 0; i < ctxt_count; i++) { for (i = 0; i < ctxt_count; i++) { ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt) { if (ctxt) { ctxt->next = xprt->sc_ctxt_head; INIT_LIST_HEAD(&ctxt->free_list); xprt->sc_ctxt_head = ctxt; list_add(&ctxt->free_list, &xprt->sc_ctxt_free); xprt->sc_ctxt_cnt++; xprt->sc_ctxt_cnt++; } } } } } } static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt) static void destroy_context_cache(struct svcxprt_rdma *xprt) { { struct svc_rdma_op_ctxt *next; while (!list_empty(&xprt->sc_ctxt_free)) { if (!ctxt) struct svc_rdma_op_ctxt *ctxt; return; ctxt = list_entry(xprt->sc_ctxt_free.next, struct svc_rdma_op_ctxt, do { free_list); next = ctxt->next; list_del_init(&ctxt->free_list); kfree(ctxt); kfree(ctxt); ctxt = next; } } while (next); } } static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, Loading Loading @@ -465,7 +491,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, reqs + reqs + cma_xprt->sc_sq_depth + cma_xprt->sc_sq_depth + RPCRDMA_MAX_THREADS + 1); /* max */ RPCRDMA_MAX_THREADS + 1); /* max */ if (!cma_xprt->sc_ctxt_head) { if (list_empty(&cma_xprt->sc_ctxt_free)) { kfree(cma_xprt); kfree(cma_xprt); return NULL; return NULL; } } Loading Loading @@ -520,7 +546,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) recv_wr.num_sge = ctxt->count; recv_wr.num_sge = ctxt->count; recv_wr.wr_id = (u64)(unsigned long)ctxt; recv_wr.wr_id = (u64)(unsigned long)ctxt; svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { svc_xprt_put(&xprt->sc_xprt); svc_rdma_put_context(ctxt, 1); } return ret; return ret; } } Loading @@ -539,6 +570,7 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id) { { struct svcxprt_rdma *listen_xprt = new_cma_id->context; struct svcxprt_rdma *listen_xprt = new_cma_id->context; struct svcxprt_rdma *newxprt; struct svcxprt_rdma *newxprt; struct sockaddr *sa; /* Create a new transport */ /* Create a new transport */ newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); Loading @@ -551,6 +583,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id) dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", newxprt, newxprt->sc_cm_id, listen_xprt); newxprt, newxprt->sc_cm_id, listen_xprt); /* Set the local and remote addresses in the transport */ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); /* /* * Enqueue the new transport on the accept queue of the listening * Enqueue the new transport on the accept queue of the listening * transport * transport Loading Loading @@ -627,6 +665,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, if (xprt) { if (xprt) { set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } } break; break; case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DEVICE_REMOVAL: Loading Loading @@ -661,31 +700,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, cma_xprt = rdma_create_xprt(serv, 1); cma_xprt = rdma_create_xprt(serv, 1); if (!cma_xprt) if (!cma_xprt) return ERR_PTR(ENOMEM); return ERR_PTR(-ENOMEM); xprt = &cma_xprt->sc_xprt; xprt = &cma_xprt->sc_xprt; listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); if (IS_ERR(listen_id)) { if (IS_ERR(listen_id)) { svc_xprt_put(&cma_xprt->sc_xprt); ret = PTR_ERR(listen_id); dprintk("svcrdma: rdma_create_id failed = %ld\n", dprintk("svcrdma: rdma_create_id failed = %d\n", ret); PTR_ERR(listen_id)); goto err0; return (void *)listen_id; } } ret = rdma_bind_addr(listen_id, sa); ret = rdma_bind_addr(listen_id, sa); if (ret) { if (ret) { rdma_destroy_id(listen_id); svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); return ERR_PTR(ret); goto err1; } } cma_xprt->sc_cm_id = listen_id; cma_xprt->sc_cm_id = listen_id; ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); if (ret) { if (ret) { rdma_destroy_id(listen_id); svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_listen failed = %d\n", ret); dprintk("svcrdma: rdma_listen failed = %d\n", ret); return ERR_PTR(ret); goto err1; } } /* /* Loading @@ -696,6 +731,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); return &cma_xprt->sc_xprt; return &cma_xprt->sc_xprt; err1: rdma_destroy_id(listen_id); err0: kfree(cma_xprt); return ERR_PTR(ret); } } /* /* Loading @@ -716,7 +757,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rdma_conn_param conn_param; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; struct ib_device_attr devattr; struct sockaddr *sa; int ret; int ret; int i; int i; Loading Loading @@ -826,7 +866,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; } } svc_xprt_get(&newxprt->sc_xprt); newxprt->sc_qp = newxprt->sc_cm_id->qp; newxprt->sc_qp = newxprt->sc_cm_id->qp; /* Register all of physical memory */ /* Register all of physical memory */ Loading @@ -850,6 +889,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Swap out the handler */ /* Swap out the handler */ newxprt->sc_cm_id->event_handler = rdma_cma_handler; newxprt->sc_cm_id->event_handler = rdma_cma_handler; /* * Arm the CQs for the SQ and RQ before accepting so we can't * miss the first message */ ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); /* Accept Connection */ /* Accept Connection */ set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); memset(&conn_param, 0, sizeof conn_param); memset(&conn_param, 0, sizeof conn_param); Loading Loading @@ -886,58 +932,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_requests, newxprt->sc_max_requests, newxprt->sc_ord); newxprt->sc_ord); /* Set the local and remote addresses in the transport */ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); return &newxprt->sc_xprt; return &newxprt->sc_xprt; errout: errout: dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); /* Take a reference in case the DTO handler runs */ /* Take a reference in case the DTO handler runs */ svc_xprt_get(&newxprt->sc_xprt); svc_xprt_get(&newxprt->sc_xprt); if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) ib_destroy_qp(newxprt->sc_qp); ib_destroy_qp(newxprt->sc_qp); svc_xprt_put(&newxprt->sc_xprt); } rdma_destroy_id(newxprt->sc_cm_id); rdma_destroy_id(newxprt->sc_cm_id); /* This call to put will destroy the transport */ /* This call to put will destroy the transport */ svc_xprt_put(&newxprt->sc_xprt); svc_xprt_put(&newxprt->sc_xprt); return NULL; return NULL; } } /* * Post an RQ WQE to the RQ when the rqst is being released. This * effectively returns an RQ credit to the client. The rq_xprt_ctxt * will be null if the request is deferred due to an RDMA_READ or the * transport had no data ready (EAGAIN). Note that an RPC deferred in * svc_process will still return the credit, this is because the data * is copied and no longer consume a WQE/WC. */ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) static void svc_rdma_release_rqst(struct svc_rqst *rqstp) { { int err; struct svcxprt_rdma *rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); if (rqstp->rq_xprt_ctxt) { BUG_ON(rqstp->rq_xprt_ctxt != rdma); err = svc_rdma_post_recv(rdma); if (err) dprintk("svcrdma: failed to post an RQ WQE error=%d\n", err); } rqstp->rq_xprt_ctxt = NULL; } } /* /* * When connected, an svc_xprt has at least three references: * When connected, an svc_xprt has at least two references: * * - A reference held by the QP. We still hold that here because this * code deletes the QP and puts the reference. * * * - A reference held by the cm_id between the ESTABLISHED and * - A reference held by the cm_id between the ESTABLISHED and * DISCONNECTED events. If the remote peer disconnected first, this * DISCONNECTED events. If the remote peer disconnected first, this Loading @@ -946,7 +960,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) * - A reference held by the svc_recv code that called this function * - A reference held by the svc_recv code that called this function * as part of close processing. * as part of close processing. * * * At a minimum two references should still be held. * At a minimum one references should still be held. */ */ static void svc_rdma_detach(struct svc_xprt *xprt) static void svc_rdma_detach(struct svc_xprt *xprt) { { Loading @@ -956,23 +970,53 @@ static void svc_rdma_detach(struct svc_xprt *xprt) /* Disconnect and flush posted WQE */ /* Disconnect and flush posted WQE */ rdma_disconnect(rdma->sc_cm_id); rdma_disconnect(rdma->sc_cm_id); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) { ib_destroy_qp(rdma->sc_qp); svc_xprt_put(xprt); } /* Destroy the CM ID */ rdma_destroy_id(rdma->sc_cm_id); } } static void svc_rdma_free(struct svc_xprt *xprt) static void __svc_rdma_free(struct work_struct *work) { { struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; struct svcxprt_rdma *rdma = container_of(work, struct svcxprt_rdma, sc_work); dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); /* We should only be called from kref_put */ /* We should only be called from kref_put */ BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0); /* * Destroy queued, but not processed read completions. Note * that this cleanup has to be done before destroying the * cm_id because the device ptr is needed to unmap the dma in * svc_rdma_put_context. */ spin_lock_bh(&rdma->sc_read_complete_lock); while (!list_empty(&rdma->sc_read_complete_q)) { struct svc_rdma_op_ctxt *ctxt; ctxt = list_entry(rdma->sc_read_complete_q.next, struct svc_rdma_op_ctxt, dto_q); list_del_init(&ctxt->dto_q); svc_rdma_put_context(ctxt, 1); } spin_unlock_bh(&rdma->sc_read_complete_lock); /* Destroy queued, but not processed recv completions */ spin_lock_bh(&rdma->sc_rq_dto_lock); while (!list_empty(&rdma->sc_rq_dto_q)) { struct svc_rdma_op_ctxt *ctxt; ctxt = list_entry(rdma->sc_rq_dto_q.next, struct svc_rdma_op_ctxt, dto_q); list_del_init(&ctxt->dto_q); svc_rdma_put_context(ctxt, 1); } spin_unlock_bh(&rdma->sc_rq_dto_lock); /* Warn if we leaked a resource or under-referenced */ WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_destroy_qp(rdma->sc_qp); if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) ib_destroy_cq(rdma->sc_sq_cq); ib_destroy_cq(rdma->sc_sq_cq); Loading @@ -985,10 +1029,21 @@ static void svc_rdma_free(struct svc_xprt *xprt) if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) ib_dealloc_pd(rdma->sc_pd); ib_dealloc_pd(rdma->sc_pd); destroy_context_cache(rdma->sc_ctxt_head); /* Destroy the CM ID */ rdma_destroy_id(rdma->sc_cm_id); destroy_context_cache(rdma); kfree(rdma); kfree(rdma); } } static void svc_rdma_free(struct svc_xprt *xprt) { struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); INIT_WORK(&rdma->sc_work, __svc_rdma_free); schedule_work(&rdma->sc_work); } static int svc_rdma_has_wspace(struct svc_xprt *xprt) static int svc_rdma_has_wspace(struct svc_xprt *xprt) { { struct svcxprt_rdma *rdma = struct svcxprt_rdma *rdma = Loading Loading @@ -1018,7 +1073,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) int ret; int ret; if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) return 0; return -ENOTCONN; BUG_ON(wr->send_flags != IB_SEND_SIGNALED); BUG_ON(wr->send_flags != IB_SEND_SIGNALED); BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != Loading @@ -1029,7 +1084,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { spin_unlock_bh(&xprt->sc_lock); spin_unlock_bh(&xprt->sc_lock); atomic_inc(&rdma_stat_sq_starve); atomic_inc(&rdma_stat_sq_starve); /* See if we can reap some SQ WR */ /* See if we can opportunistically reap SQ WR to make room */ sq_cq_reap(xprt); sq_cq_reap(xprt); /* Wait until SQ WR available if SQ still full */ /* Wait until SQ WR available if SQ still full */ Loading @@ -1041,21 +1097,24 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) continue; continue; } } /* Bumped used SQ WR count and post */ /* Bumped used SQ WR count and post */ svc_xprt_get(&xprt->sc_xprt); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); if (!ret) if (!ret) atomic_inc(&xprt->sc_sq_count); atomic_inc(&xprt->sc_sq_count); else else { svc_xprt_put(&xprt->sc_xprt); dprintk("svcrdma: failed to post SQ WR rc=%d, " dprintk("svcrdma: failed to post SQ WR rc=%d, " "sc_sq_count=%d, sc_sq_depth=%d\n", "sc_sq_count=%d, sc_sq_depth=%d\n", ret, atomic_read(&xprt->sc_sq_count), ret, atomic_read(&xprt->sc_sq_count), xprt->sc_sq_depth); xprt->sc_sq_depth); } spin_unlock_bh(&xprt->sc_lock); spin_unlock_bh(&xprt->sc_lock); break; break; } } return ret; return ret; } } int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, enum rpcrdma_errcode err) enum rpcrdma_errcode err) { { struct ib_send_wr err_wr; struct ib_send_wr err_wr; Loading Loading @@ -1094,9 +1153,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, /* Post It */ /* Post It */ ret = svc_rdma_send(xprt, &err_wr); ret = svc_rdma_send(xprt, &err_wr); if (ret) { if (ret) { dprintk("svcrdma: Error posting send = %d\n", ret); dprintk("svcrdma: Error %d posting send for protocol error\n", ret); svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1); } } return ret; } }