Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 373d9915 authored by Ralph Campbell's avatar Ralph Campbell Committed by Roland Dreier
Browse files

IB/ipath: Performance improvements via mmap of queues



Improve performance of userspace post receive, post SRQ receive, and
poll CQ operations for ipath by allowing userspace to directly mmap()
receive queues and completion queues.  This eliminates the copying
between userspace and the kernel in the data path.

Signed-off-by: default avatarRalph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 9bc57e2d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ ib_ipath-y := \
	ipath_cq.o \
	ipath_keys.o \
	ipath_mad.o \
	ipath_mmap.o \
	ipath_mr.o \
	ipath_qp.o \
	ipath_rc.o \
+133 −43
Original line number Diff line number Diff line
@@ -42,20 +42,28 @@
 * @entry: work completion entry to add
 * @sig: true if @entry is a solicitated entry
 *
 * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
 * This may be called with qp->s_lock held.
 */
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
	struct ipath_cq_wc *wc = cq->queue;
	unsigned long flags;
	u32 head;
	u32 next;

	spin_lock_irqsave(&cq->lock, flags);

	if (cq->head == cq->ibcq.cqe)
	/*
	 * Note that the head pointer might be writable by user processes.
	 * Take care to verify it is a sane value.
	 */
	head = wc->head;
	if (head >= (unsigned) cq->ibcq.cqe) {
		head = cq->ibcq.cqe;
		next = 0;
	else
		next = cq->head + 1;
	if (unlikely(next == cq->tail)) {
	} else
		next = head + 1;
	if (unlikely(next == wc->tail)) {
		spin_unlock_irqrestore(&cq->lock, flags);
		if (cq->ibcq.event_handler) {
			struct ib_event ev;
@@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
		}
		return;
	}
	cq->queue[cq->head] = *entry;
	cq->head = next;
	wc->queue[head] = *entry;
	wc->head = next;

	if (cq->notify == IB_CQ_NEXT_COMP ||
	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
	struct ipath_cq *cq = to_icq(ibcq);
	struct ipath_cq_wc *wc = cq->queue;
	unsigned long flags;
	int npolled;

	spin_lock_irqsave(&cq->lock, flags);

	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
		if (cq->tail == cq->head)
		if (wc->tail == wc->head)
			break;
		*entry = cq->queue[cq->tail];
		if (cq->tail == cq->ibcq.cqe)
			cq->tail = 0;
		*entry = wc->queue[wc->tail];
		if (wc->tail >= cq->ibcq.cqe)
			wc->tail = 0;
		else
			cq->tail++;
			wc->tail++;
	}

	spin_unlock_irqrestore(&cq->lock, flags);
@@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
{
	struct ipath_ibdev *dev = to_idev(ibdev);
	struct ipath_cq *cq;
	struct ib_wc *wc;
	struct ipath_cq_wc *wc;
	struct ib_cq *ret;

	if (entries > ib_ipath_max_cqes) {
		ret = ERR_PTR(-EINVAL);
		goto bail;
		goto done;
	}

	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
		ret = ERR_PTR(-ENOMEM);
		goto bail;
		goto done;
	}

	/*
	 * Need to use vmalloc() if we want to support large #s of
	 * entries.
	 */
	/* Allocate the completion queue structure. */
	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
	if (!cq) {
		ret = ERR_PTR(-ENOMEM);
		goto bail;
		goto done;
	}

	/*
	 * Need to use vmalloc() if we want to support large #s of entries.
	 * Allocate the completion queue entries and head/tail pointers.
	 * This is allocated separately so that it can be resized and
	 * also mapped into user space.
	 * We need to use vmalloc() in order to support mmap and large
	 * numbers of entries.
	 */
	wc = vmalloc(sizeof(*wc) * (entries + 1));
	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
	if (!wc) {
		kfree(cq);
		ret = ERR_PTR(-ENOMEM);
		goto bail;
		goto bail_cq;
	}

	/*
	 * Return the address of the WC as the offset to mmap.
	 * See ipath_mmap() for details.
	 */
	if (udata && udata->outlen >= sizeof(__u64)) {
		struct ipath_mmap_info *ip;
		__u64 offset = (__u64) wc;
		int err;

		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
		if (err) {
			ret = ERR_PTR(err);
			goto bail_wc;
		}

		/* Allocate info for ipath_mmap(). */
		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
		if (!ip) {
			ret = ERR_PTR(-ENOMEM);
			goto bail_wc;
		}
		cq->ip = ip;
		ip->context = context;
		ip->obj = wc;
		kref_init(&ip->ref);
		ip->mmap_cnt = 0;
		ip->size = PAGE_ALIGN(sizeof(*wc) +
				      sizeof(struct ib_wc) * entries);
		spin_lock_irq(&dev->pending_lock);
		ip->next = dev->pending_mmaps;
		dev->pending_mmaps = ip;
		spin_unlock_irq(&dev->pending_lock);
	} else
		cq->ip = NULL;

	/*
	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
	 * The number of entries should be >= the number requested or return
@@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
	cq->triggered = 0;
	spin_lock_init(&cq->lock);
	tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
	cq->head = 0;
	cq->tail = 0;
	wc->head = 0;
	wc->tail = 0;
	cq->queue = wc;

	ret = &cq->ibcq;

	dev->n_cqs_allocated++;
	goto done;

bail:
bail_wc:
	vfree(wc);

bail_cq:
	kfree(cq);

done:
	return ret;
}

@@ -229,6 +281,9 @@ int ipath_destroy_cq(struct ib_cq *ibcq)

	tasklet_kill(&cq->comptask);
	dev->n_cqs_allocated--;
	if (cq->ip)
		kref_put(&cq->ip->ref, ipath_release_mmap_info);
	else
		vfree(cq->queue);
	kfree(cq);

@@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
	spin_lock_irqsave(&cq->lock, flags);
	/*
	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
	 * any other transitions.
	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
	 */
	if (cq->notify != IB_CQ_NEXT_COMP)
		cq->notify = notify;
@@ -264,46 +319,81 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
	struct ipath_cq *cq = to_icq(ibcq);
	struct ib_wc *wc, *old_wc;
	u32 n;
	struct ipath_cq_wc *old_wc = cq->queue;
	struct ipath_cq_wc *wc;
	u32 head, tail, n;
	int ret;

	/*
	 * Need to use vmalloc() if we want to support large #s of entries.
	 */
	wc = vmalloc(sizeof(*wc) * (cqe + 1));
	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
	if (!wc) {
		ret = -ENOMEM;
		goto bail;
	}

	/*
	 * Return the address of the WC as the offset to mmap.
	 * See ipath_mmap() for details.
	 */
	if (udata && udata->outlen >= sizeof(__u64)) {
		__u64 offset = (__u64) wc;

		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
		if (ret)
			goto bail;
	}

	spin_lock_irq(&cq->lock);
	if (cq->head < cq->tail)
		n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
	/*
	 * Make sure head and tail are sane since they
	 * might be user writable.
	 */
	head = old_wc->head;
	if (head > (u32) cq->ibcq.cqe)
		head = (u32) cq->ibcq.cqe;
	tail = old_wc->tail;
	if (tail > (u32) cq->ibcq.cqe)
		tail = (u32) cq->ibcq.cqe;
	if (head < tail)
		n = cq->ibcq.cqe + 1 + head - tail;
	else
		n = cq->head - cq->tail;
		n = head - tail;
	if (unlikely((u32)cqe < n)) {
		spin_unlock_irq(&cq->lock);
		vfree(wc);
		ret = -EOVERFLOW;
		goto bail;
	}
	for (n = 0; cq->tail != cq->head; n++) {
		wc[n] = cq->queue[cq->tail];
		if (cq->tail == cq->ibcq.cqe)
			cq->tail = 0;
	for (n = 0; tail != head; n++) {
		wc->queue[n] = old_wc->queue[tail];
		if (tail == (u32) cq->ibcq.cqe)
			tail = 0;
		else
			cq->tail++;
			tail++;
	}
	cq->ibcq.cqe = cqe;
	cq->head = n;
	cq->tail = 0;
	old_wc = cq->queue;
	wc->head = n;
	wc->tail = 0;
	cq->queue = wc;
	spin_unlock_irq(&cq->lock);

	vfree(old_wc);

	if (cq->ip) {
		struct ipath_ibdev *dev = to_idev(ibcq->device);
		struct ipath_mmap_info *ip = cq->ip;

		ip->obj = wc;
		ip->size = PAGE_ALIGN(sizeof(*wc) +
				      sizeof(struct ib_wc) * cqe);
		spin_lock_irq(&dev->pending_lock);
		ip->next = dev->pending_mmaps;
		dev->pending_mmaps = ip;
		spin_unlock_irq(&dev->pending_lock);
	}

	ret = 0;

bail:
+122 −0
Original line number Diff line number Diff line
/*
 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/config.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <asm/pgtable.h>

#include "ipath_verbs.h"

/**
 * ipath_release_mmap_info - free mmap info structure
 * @ref: a pointer to the kref within struct ipath_mmap_info
 */
void ipath_release_mmap_info(struct kref *ref)
{
	struct ipath_mmap_info *ip =
		container_of(ref, struct ipath_mmap_info, ref);

	vfree(ip->obj);
	kfree(ip);
}

/*
 * open and close keep track of how many times the CQ is mapped,
 * to avoid releasing it.
 */
static void ipath_vma_open(struct vm_area_struct *vma)
{
	struct ipath_mmap_info *ip = vma->vm_private_data;

	kref_get(&ip->ref);
	ip->mmap_cnt++;
}

static void ipath_vma_close(struct vm_area_struct *vma)
{
	struct ipath_mmap_info *ip = vma->vm_private_data;

	ip->mmap_cnt--;
	kref_put(&ip->ref, ipath_release_mmap_info);
}

static struct vm_operations_struct ipath_vm_ops = {
	.open =     ipath_vma_open,
	.close =    ipath_vma_close,
};

/**
 * ipath_mmap - create a new mmap region
 * @context: the IB user context of the process making the mmap() call
 * @vma: the VMA to be initialized
 * Return zero if the mmap is OK. Otherwise, return an errno.
 */
int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
	struct ipath_ibdev *dev = to_idev(context->device);
	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
	unsigned long size = vma->vm_end - vma->vm_start;
	struct ipath_mmap_info *ip, **pp;
	int ret = -EINVAL;

	/*
	 * Search the device's list of objects waiting for a mmap call.
	 * Normally, this list is very short since a call to create a
	 * CQ, QP, or SRQ is soon followed by a call to mmap().
	 */
	spin_lock_irq(&dev->pending_lock);
	for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
		/* Only the creator is allowed to mmap the object */
		if (context != ip->context || (void *) offset != ip->obj)
			continue;
		/* Don't allow a mmap larger than the object. */
		if (size > ip->size)
			break;

		*pp = ip->next;
		spin_unlock_irq(&dev->pending_lock);

		ret = remap_vmalloc_range(vma, ip->obj, 0);
		if (ret)
			goto done;
		vma->vm_ops = &ipath_vm_ops;
		vma->vm_private_data = ip;
		ipath_vma_open(vma);
		goto done;
	}
	spin_unlock_irq(&dev->pending_lock);
done:
	return ret;
}
+112 −44
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@
#include <linux/vmalloc.h>

#include "ipath_verbs.h"
#include "ipath_common.h"
#include "ipath_kernel.h"

#define BITS_PER_PAGE		(PAGE_SIZE*BITS_PER_BYTE)
#define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
@@ -44,19 +44,6 @@
#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
						      BITS_PER_PAGE, off)

#define TRANS_INVALID	0
#define TRANS_ANY2RST	1
#define TRANS_RST2INIT	2
#define TRANS_INIT2INIT	3
#define TRANS_INIT2RTR	4
#define TRANS_RTR2RTS	5
#define TRANS_RTS2RTS	6
#define TRANS_SQERR2RTS	7
#define TRANS_ANY2ERR	8
#define TRANS_RTS2SQD	9  /* XXX Wait for expected ACKs & signal event */
#define TRANS_SQD2SQD	10 /* error if not drained & parameter change */
#define TRANS_SQD2RTS	11 /* error if not drained */

/*
 * Convert the AETH credit code into the number of credits.
 */
@@ -355,8 +342,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
	qp->s_last = 0;
	qp->s_ssn = 1;
	qp->s_lsn = 0;
	qp->r_rq.head = 0;
	qp->r_rq.tail = 0;
	if (qp->r_rq.wq) {
		qp->r_rq.wq->head = 0;
		qp->r_rq.wq->tail = 0;
	}
	qp->r_reuse_sge = 0;
}

@@ -410,16 +399,33 @@ void ipath_error_qp(struct ipath_qp *qp)
	qp->s_hdrwords = 0;
	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;

	wc.opcode = IB_WC_RECV;
	if (qp->r_rq.wq) {
		struct ipath_rwq *wq;
		u32 head;
		u32 tail;

		spin_lock(&qp->r_rq.lock);
	while (qp->r_rq.tail != qp->r_rq.head) {
		wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
		if (++qp->r_rq.tail >= qp->r_rq.size)
			qp->r_rq.tail = 0;

		/* sanity check pointers before trusting them */
		wq = qp->r_rq.wq;
		head = wq->head;
		if (head >= qp->r_rq.size)
			head = 0;
		tail = wq->tail;
		if (tail >= qp->r_rq.size)
			tail = 0;
		wc.opcode = IB_WC_RECV;
		while (tail != head) {
			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
			if (++tail >= qp->r_rq.size)
				tail = 0;
			ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
		}
		wq->tail = tail;

		spin_unlock(&qp->r_rq.lock);
	}
}

/**
 * ipath_modify_qp - modify the attributes of a queue pair
@@ -544,7 +550,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
	attr->dest_qp_num = qp->remote_qpn;
	attr->qp_access_flags = qp->qp_access_flags;
	attr->cap.max_send_wr = qp->s_size - 1;
	attr->cap.max_recv_wr = qp->r_rq.size - 1;
	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
	attr->cap.max_send_sge = qp->s_max_sge;
	attr->cap.max_recv_sge = qp->r_rq.max_sge;
	attr->cap.max_inline_data = 0;
@@ -597,13 +603,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
	} else {
		u32 min, max, x;
		u32 credits;

		struct ipath_rwq *wq = qp->r_rq.wq;
		u32 head;
		u32 tail;

		/* sanity check pointers before trusting them */
		head = wq->head;
		if (head >= qp->r_rq.size)
			head = 0;
		tail = wq->tail;
		if (tail >= qp->r_rq.size)
			tail = 0;
		/*
		 * Compute the number of credits available (RWQEs).
		 * XXX Not holding the r_rq.lock here so there is a small
		 * chance that the pair of reads are not atomic.
		 */
		credits = qp->r_rq.head - qp->r_rq.tail;
		credits = head - tail;
		if ((int)credits < 0)
			credits += qp->r_rq.size;
		/*
@@ -680,27 +696,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
	case IB_QPT_UD:
	case IB_QPT_SMI:
	case IB_QPT_GSI:
		qp = kmalloc(sizeof(*qp), GFP_KERNEL);
		sz = sizeof(*qp);
		if (init_attr->srq) {
			struct ipath_srq *srq = to_isrq(init_attr->srq);

			sz += sizeof(*qp->r_sg_list) *
				srq->rq.max_sge;
		} else
			sz += sizeof(*qp->r_sg_list) *
				init_attr->cap.max_recv_sge;
		qp = kmalloc(sz, GFP_KERNEL);
		if (!qp) {
			vfree(swq);
			ret = ERR_PTR(-ENOMEM);
			goto bail;
			goto bail_swq;
		}
		if (init_attr->srq) {
			sz = 0;
			qp->r_rq.size = 0;
			qp->r_rq.max_sge = 0;
			qp->r_rq.wq = NULL;
			init_attr->cap.max_recv_wr = 0;
			init_attr->cap.max_recv_sge = 0;
		} else {
			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
			sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) +
			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
				sizeof(struct ipath_rwqe);
			qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
			qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
					      qp->r_rq.size * sz);
			if (!qp->r_rq.wq) {
				kfree(qp);
				vfree(swq);
				ret = ERR_PTR(-ENOMEM);
				goto bail;
				goto bail_qp;
			}
		}

@@ -726,12 +752,10 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
		err = ipath_alloc_qpn(&dev->qp_table, qp,
				      init_attr->qp_type);
		if (err) {
			vfree(swq);
			vfree(qp->r_rq.wq);
			kfree(qp);
			ret = ERR_PTR(err);
			goto bail;
			goto bail_rwq;
		}
		qp->ip = NULL;
		ipath_reset_qp(qp);

		/* Tell the core driver that the kernel SMA is present. */
@@ -748,8 +772,51 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,

	init_attr->cap.max_inline_data = 0;

	/*
	 * Return the address of the RWQ as the offset to mmap.
	 * See ipath_mmap() for details.
	 */
	if (udata && udata->outlen >= sizeof(__u64)) {
		struct ipath_mmap_info *ip;
		__u64 offset = (__u64) qp->r_rq.wq;
		int err;

		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
		if (err) {
			ret = ERR_PTR(err);
			goto bail_rwq;
		}

		if (qp->r_rq.wq) {
			/* Allocate info for ipath_mmap(). */
			ip = kmalloc(sizeof(*ip), GFP_KERNEL);
			if (!ip) {
				ret = ERR_PTR(-ENOMEM);
				goto bail_rwq;
			}
			qp->ip = ip;
			ip->context = ibpd->uobject->context;
			ip->obj = qp->r_rq.wq;
			kref_init(&ip->ref);
			ip->mmap_cnt = 0;
			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
					      qp->r_rq.size * sz);
			spin_lock_irq(&dev->pending_lock);
			ip->next = dev->pending_mmaps;
			dev->pending_mmaps = ip;
			spin_unlock_irq(&dev->pending_lock);
		}
	}

	ret = &qp->ibqp;
	goto bail;

bail_rwq:
	vfree(qp->r_rq.wq);
bail_qp:
	kfree(qp);
bail_swq:
	vfree(swq);
bail:
	return ret;
}
@@ -773,11 +840,9 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
	if (qp->ibqp.qp_type == IB_QPT_SMI)
		ipath_layer_set_verbs_flags(dev->dd, 0);

	spin_lock_irqsave(&qp->r_rq.lock, flags);
	spin_lock(&qp->s_lock);
	spin_lock_irqsave(&qp->s_lock, flags);
	qp->state = IB_QPS_ERR;
	spin_unlock(&qp->s_lock);
	spin_unlock_irqrestore(&qp->r_rq.lock, flags);
	spin_unlock_irqrestore(&qp->s_lock, flags);

	/* Stop the sending tasklet. */
	tasklet_kill(&qp->s_task);
@@ -798,8 +863,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
	if (atomic_read(&qp->refcount) != 0)
		ipath_free_qp(&dev->qp_table, qp);

	vfree(qp->s_wq);
	if (qp->ip)
		kref_put(&qp->ip->ref, ipath_release_mmap_info);
	else
		vfree(qp->r_rq.wq);
	vfree(qp->s_wq);
	kfree(qp);
	return 0;
}
+93 −45
Original line number Diff line number Diff line
@@ -32,7 +32,7 @@
 */

#include "ipath_verbs.h"
#include "ipath_common.h"
#include "ipath_kernel.h"

/*
 * Convert the AETH RNR timeout code into the number of milliseconds.
@@ -106,6 +106,54 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
	spin_unlock_irqrestore(&dev->pending_lock, flags);
}

static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
{
	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
	int user = to_ipd(qp->ibqp.pd)->user;
	int i, j, ret;
	struct ib_wc wc;

	qp->r_len = 0;
	for (i = j = 0; i < wqe->num_sge; i++) {
		if (wqe->sg_list[i].length == 0)
			continue;
		/* Check LKEY */
		if ((user && wqe->sg_list[i].lkey == 0) ||
		    !ipath_lkey_ok(&dev->lk_table,
				   &qp->r_sg_list[j], &wqe->sg_list[i],
				   IB_ACCESS_LOCAL_WRITE))
			goto bad_lkey;
		qp->r_len += wqe->sg_list[i].length;
		j++;
	}
	qp->r_sge.sge = qp->r_sg_list[0];
	qp->r_sge.sg_list = qp->r_sg_list + 1;
	qp->r_sge.num_sge = j;
	ret = 1;
	goto bail;

bad_lkey:
	wc.wr_id = wqe->wr_id;
	wc.status = IB_WC_LOC_PROT_ERR;
	wc.opcode = IB_WC_RECV;
	wc.vendor_err = 0;
	wc.byte_len = 0;
	wc.imm_data = 0;
	wc.qp_num = qp->ibqp.qp_num;
	wc.src_qp = 0;
	wc.wc_flags = 0;
	wc.pkey_index = 0;
	wc.slid = 0;
	wc.sl = 0;
	wc.dlid_path_bits = 0;
	wc.port_num = 0;
	/* Signal solicited completion event. */
	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
	ret = 0;
bail:
	return ret;
}

/**
 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
 * @qp: the QP
@@ -119,71 +167,71 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
{
	unsigned long flags;
	struct ipath_rq *rq;
	struct ipath_rwq *wq;
	struct ipath_srq *srq;
	struct ipath_rwqe *wqe;
	int ret = 1;
	void (*handler)(struct ib_event *, void *);
	u32 tail;
	int ret;

	if (!qp->ibqp.srq) {
	if (qp->ibqp.srq) {
		srq = to_isrq(qp->ibqp.srq);
		handler = srq->ibsrq.event_handler;
		rq = &srq->rq;
	} else {
		srq = NULL;
		handler = NULL;
		rq = &qp->r_rq;
		spin_lock_irqsave(&rq->lock, flags);

		if (unlikely(rq->tail == rq->head)) {
			ret = 0;
			goto done;
		}
		wqe = get_rwqe_ptr(rq, rq->tail);
		qp->r_wr_id = wqe->wr_id;
		if (!wr_id_only) {
			qp->r_sge.sge = wqe->sg_list[0];
			qp->r_sge.sg_list = wqe->sg_list + 1;
			qp->r_sge.num_sge = wqe->num_sge;
			qp->r_len = wqe->length;
		}
		if (++rq->tail >= rq->size)
			rq->tail = 0;
		goto done;
	}

	srq = to_isrq(qp->ibqp.srq);
	rq = &srq->rq;
	spin_lock_irqsave(&rq->lock, flags);

	if (unlikely(rq->tail == rq->head)) {
	wq = rq->wq;
	tail = wq->tail;
	/* Validate tail before using it since it is user writable. */
	if (tail >= rq->size)
		tail = 0;
	do {
		if (unlikely(tail == wq->head)) {
			spin_unlock_irqrestore(&rq->lock, flags);
			ret = 0;
		goto done;
			goto bail;
		}
	wqe = get_rwqe_ptr(rq, rq->tail);
		wqe = get_rwqe_ptr(rq, tail);
		if (++tail >= rq->size)
			tail = 0;
	} while (!wr_id_only && !init_sge(qp, wqe));
	qp->r_wr_id = wqe->wr_id;
	if (!wr_id_only) {
		qp->r_sge.sge = wqe->sg_list[0];
		qp->r_sge.sg_list = wqe->sg_list + 1;
		qp->r_sge.num_sge = wqe->num_sge;
		qp->r_len = wqe->length;
	}
	if (++rq->tail >= rq->size)
		rq->tail = 0;
	if (srq->ibsrq.event_handler) {
		struct ib_event ev;
	wq->tail = tail;

	ret = 1;
	if (handler) {
		u32 n;

		if (rq->head < rq->tail)
			n = rq->size + rq->head - rq->tail;
		/*
		 * validate head pointer value and compute
		 * the number of remaining WQEs.
		 */
		n = wq->head;
		if (n >= rq->size)
			n = 0;
		if (n < tail)
			n += rq->size - tail;
		else
			n = rq->head - rq->tail;
			n -= tail;
		if (n < srq->limit) {
			struct ib_event ev;

			srq->limit = 0;
			spin_unlock_irqrestore(&rq->lock, flags);
			ev.device = qp->ibqp.device;
			ev.element.srq = qp->ibqp.srq;
			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
			srq->ibsrq.event_handler(&ev,
						 srq->ibsrq.srq_context);
			handler(&ev, srq->ibsrq.srq_context);
			goto bail;
		}
	}

done:
	spin_unlock_irqrestore(&rq->lock, flags);

bail:
	return ret;
}
Loading