Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 515667f8 authored by Dennis Dalessandro's avatar Dennis Dalessandro Committed by Doug Ledford
Browse files

IB/rdmavt: Add create queue pair functionality



Add create queue pair verbs call as well as supporting functions.

Reviewed-by: default avatarIra Weiny <ira.weiny@intel.com>
Reviewed-by: default avatarHarish Chegondi <harish.chegondi@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 050eb7fb
Loading
Loading
Loading
Loading
+403 −22
Original line number Original line Diff line number Diff line
@@ -47,8 +47,11 @@


#include <linux/bitops.h>
#include <linux/bitops.h>
#include <linux/lockdep.h>
#include <linux/lockdep.h>
#include "vt.h"
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include "qp.h"
#include "qp.h"
#include "vt.h"


static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map)
static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map)
{
{
@@ -151,7 +154,10 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
	 * If driver is not doing any QP allocation then make sure it is
	 * If driver is not doing any QP allocation then make sure it is
	 * providing the necessary QP functions.
	 * providing the necessary QP functions.
	 */
	 */
	if (!rdi->driver_f.free_all_qps)
	if (!rdi->driver_f.free_all_qps ||
	    !rdi->driver_f.qp_priv_alloc ||
	    !rdi->driver_f.qp_priv_free ||
	    !rdi->driver_f.notify_qp_reset)
		return -EINVAL;
		return -EINVAL;


	/* allocate parent object */
	/* allocate parent object */
@@ -178,7 +184,9 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
	if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
	if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
		goto fail_table;
		goto fail_table;


	return ret;
	spin_lock_init(&rdi->n_qps_lock);

	return 0;


fail_table:
fail_table:
	kfree(rdi->qp_dev->qp_table);
	kfree(rdi->qp_dev->qp_table);
@@ -197,31 +205,29 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
 * There should not be any QPs still in use.
 * There should not be any QPs still in use.
 * Free memory for table.
 * Free memory for table.
 */
 */
static unsigned free_all_qps(struct rvt_dev_info *rdi)
static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
{
{
	unsigned long flags;
	unsigned long flags;
	struct rvt_qp *qp;
	struct rvt_qp *qp;
	unsigned n, qp_inuse = 0;
	unsigned n, qp_inuse = 0;
	spinlock_t *ql; /* work around too long line below */
	spinlock_t *ql; /* work around too long line below */


	rdi->driver_f.free_all_qps(rdi);
	if (rdi->driver_f.free_all_qps)
		qp_inuse = rdi->driver_f.free_all_qps(rdi);


	if (!rdi->qp_dev)
	if (!rdi->qp_dev)
		return 0;
		return qp_inuse;


	ql = &rdi->qp_dev->qpt_lock;
	ql = &rdi->qp_dev->qpt_lock;
	spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
	spin_lock_irqsave(ql, flags);
	for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
	for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
		qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
		qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
					       lockdep_is_held(ql));
					       lockdep_is_held(ql));
		RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
		RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
		qp =  rcu_dereference_protected(qp->next,

						lockdep_is_held(ql));
		for (; qp; qp = rcu_dereference_protected(qp->next,
		while (qp) {
							  lockdep_is_held(ql)))
			qp_inuse++;
			qp_inuse++;
			qp =  rcu_dereference_protected(qp->next,
							lockdep_is_held(ql));
		}
	}
	}
	spin_unlock_irqrestore(ql, flags);
	spin_unlock_irqrestore(ql, flags);
	synchronize_rcu();
	synchronize_rcu();
@@ -230,26 +236,190 @@ static unsigned free_all_qps(struct rvt_dev_info *rdi)


void rvt_qp_exit(struct rvt_dev_info *rdi)
void rvt_qp_exit(struct rvt_dev_info *rdi)
{
{
	u32 qps_inuse = free_all_qps(rdi);
	u32 qps_inuse = rvt_free_all_qps(rdi);


	qps_inuse = free_all_qps(rdi);
	if (qps_inuse)
	if (qps_inuse)
		rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
		rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
			   qps_inuse);
			   qps_inuse);
	if (!rdi->qp_dev)
	if (!rdi->qp_dev)
		return;
		return;


	if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER)
		return; /* driver did the qp init so nothing else to do */

	kfree(rdi->qp_dev->qp_table);
	kfree(rdi->qp_dev->qp_table);
	free_qpn_table(&rdi->qp_dev->qpn_table);
	free_qpn_table(&rdi->qp_dev->qpn_table);
	kfree(rdi->qp_dev);
	kfree(rdi->qp_dev);
}
}


static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
			      struct rvt_qpn_map *map, unsigned off)
{
	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
}

/*
 * Allocate the next available QPN or
 * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
 */
static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
		     enum ib_qp_type type, u8 port)
{
	u32 i, offset, max_scan, qpn;
	struct rvt_qpn_map *map;
	u32 ret;

	if (rdi->driver_f.alloc_qpn)
		return rdi->driver_f.alloc_qpn(rdi, qpt, type, port);

	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
		unsigned n;

		ret = type == IB_QPT_GSI;
		n = 1 << (ret + 2 * (port - 1));
		spin_lock(&qpt->lock);
		if (qpt->flags & n)
			ret = -EINVAL;
		else
			qpt->flags |= n;
		spin_unlock(&qpt->lock);
		goto bail;
	}

	qpn = qpt->last + qpt->incr;
	if (qpn >= RVT_QPN_MAX)
		qpn = qpt->incr | ((qpt->last & 1) ^ 1);
	/* offset carries bit 0 */
	offset = qpn & RVT_BITS_PER_PAGE_MASK;
	map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
	max_scan = qpt->nmaps - !offset;
	for (i = 0;;) {
		if (unlikely(!map->page)) {
			get_map_page(qpt, map);
			if (unlikely(!map->page))
				break;
		}
		do {
			if (!test_and_set_bit(offset, map->page)) {
				qpt->last = qpn;
				ret = qpn;
				goto bail;
			}
			offset += qpt->incr;
			/*
			 * This qpn might be bogus if offset >= BITS_PER_PAGE.
			 * That is OK.   It gets re-assigned below
			 */
			qpn = mk_qpn(qpt, map, offset);
		} while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
		/*
		 * In order to keep the number of pages allocated to a
		 * minimum, we scan the all existing pages before increasing
		 * the size of the bitmap table.
		 */
		if (++i > max_scan) {
			if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
				break;
			map = &qpt->map[qpt->nmaps++];
			/* start at incr with current bit 0 */
			offset = qpt->incr | (offset & 1);
		} else if (map < &qpt->map[qpt->nmaps]) {
			++map;
			/* start at incr with current bit 0 */
			offset = qpt->incr | (offset & 1);
		} else {
			map = &qpt->map[0];
			/* wrap to first map page, invert bit 0 */
			offset = qpt->incr | ((offset & 1) ^ 1);
		}
		/* there can be no bits at shift and below */
		WARN_ON(offset & (rdi->dparms.qos_shift - 1));
		qpn = mk_qpn(qpt, map, offset);
	}

	ret = -ENOMEM;

bail:
	return ret;
}

static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
{
	struct rvt_qpn_map *map;

	map = qpt->map + qpn / RVT_BITS_PER_PAGE;
	if (map->page)
		clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
}

/**
 * reset_qp - initialize the QP state to the reset state
 * @qp: the QP to reset
 * @type: the QP type
 */
static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
		     enum ib_qp_type type)
{
	qp->remote_qpn = 0;
	qp->qkey = 0;
	qp->qp_access_flags = 0;

	/*
	 * Let driver do anything it needs to for a new/reset qp
	 */
	rdi->driver_f.notify_qp_reset(qp);

	qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
	qp->s_hdrwords = 0;
	qp->s_wqe = NULL;
	qp->s_draining = 0;
	qp->s_next_psn = 0;
	qp->s_last_psn = 0;
	qp->s_sending_psn = 0;
	qp->s_sending_hpsn = 0;
	qp->s_psn = 0;
	qp->r_psn = 0;
	qp->r_msn = 0;
	if (type == IB_QPT_RC) {
		qp->s_state = IB_OPCODE_RC_SEND_LAST;
		qp->r_state = IB_OPCODE_RC_SEND_LAST;
	} else {
		qp->s_state = IB_OPCODE_UC_SEND_LAST;
		qp->r_state = IB_OPCODE_UC_SEND_LAST;
	}
	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
	qp->r_nak_state = 0;
	qp->r_aflags = 0;
	qp->r_flags = 0;
	qp->s_head = 0;
	qp->s_tail = 0;
	qp->s_cur = 0;
	qp->s_acked = 0;
	qp->s_last = 0;
	qp->s_ssn = 1;
	qp->s_lsn = 0;
	qp->s_mig_state = IB_MIG_MIGRATED;
	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
	qp->r_head_ack_queue = 0;
	qp->s_tail_ack_queue = 0;
	qp->s_num_rd_atomic = 0;
	if (qp->r_rq.wq) {
		qp->r_rq.wq->head = 0;
		qp->r_rq.wq->tail = 0;
	}
	qp->r_sge.num_sge = 0;
}

/**
/**
 * rvt_create_qp - create a queue pair for a device
 * rvt_create_qp - create a queue pair for a device
 * @ibpd: the protection domain who's device we create the queue pair for
 * @ibpd: the protection domain who's device we create the queue pair for
 * @init_attr: the attributes of the queue pair
 * @init_attr: the attributes of the queue pair
 * @udata: user data for libibverbs.so
 * @udata: user data for libibverbs.so
 *
 *
 * Queue pair creation is mostly an rvt issue. However, drivers have their own
 * unique idea of what queue pair numbers mean. For instance there is a reserved
 * range for PSM.
 *
 * Returns the queue pair on success, otherwise returns an errno.
 * Returns the queue pair on success, otherwise returns an errno.
 *
 *
 * Called by the ib_create_qp() core verbs function.
 * Called by the ib_create_qp() core verbs function.
@@ -258,15 +428,226 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
			    struct ib_qp_init_attr *init_attr,
			    struct ib_qp_init_attr *init_attr,
			    struct ib_udata *udata)
			    struct ib_udata *udata)
{
{
	struct rvt_qp *qp;
	int err;
	struct rvt_swqe *swq = NULL;
	size_t sz;
	size_t sg_list_sz;
	struct ib_qp *ret = ERR_PTR(-ENOMEM);
	struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
	void *priv = NULL;

	if (!rdi)
		return ERR_PTR(-EINVAL);

	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
	    init_attr->create_flags)
		return ERR_PTR(-EINVAL);

	/* Check receive queue parameters if no SRQ is specified. */
	if (!init_attr->srq) {
		if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
		    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
			return ERR_PTR(-EINVAL);

		if (init_attr->cap.max_send_sge +
		    init_attr->cap.max_send_wr +
		    init_attr->cap.max_recv_sge +
		    init_attr->cap.max_recv_wr == 0)
			return ERR_PTR(-EINVAL);
	}

	switch (init_attr->qp_type) {
	case IB_QPT_SMI:
	case IB_QPT_GSI:
		if (init_attr->port_num == 0 ||
		    init_attr->port_num > ibpd->device->phys_port_cnt)
			return ERR_PTR(-EINVAL);
	case IB_QPT_UC:
	case IB_QPT_RC:
	case IB_QPT_UD:
		sz = sizeof(struct rvt_sge) *
			init_attr->cap.max_send_sge +
			sizeof(struct rvt_swqe);
		swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
		if (!swq)
			return ERR_PTR(-ENOMEM);

		sz = sizeof(*qp);
		sg_list_sz = 0;
		if (init_attr->srq) {
			struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);

			if (srq->rq.max_sge > 1)
				sg_list_sz = sizeof(*qp->r_sg_list) *
					(srq->rq.max_sge - 1);
		} else if (init_attr->cap.max_recv_sge > 1)
			sg_list_sz = sizeof(*qp->r_sg_list) *
				(init_attr->cap.max_recv_sge - 1);
		qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
		if (!qp)
			goto bail_swq;

		RCU_INIT_POINTER(qp->next, NULL);

		/*
		/*
	 * Queue pair creation is mostly an rvt issue. However, drivers have
		 * Driver needs to set up it's private QP structure and do any
	 * their own unique idea of what queue pare numbers mean. For instance
		 * initialization that is needed.
	 * there is a reserved range for PSM.
	 *
	 * VI-DRIVER-API: make_qpn()
	 * Returns a valid QPN for verbs to use
		 */
		 */
	return ERR_PTR(-EOPNOTSUPP);
		priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
		if (!priv)
			goto bail_qp;
		qp->priv = priv;
		qp->timeout_jiffies =
			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
				1000UL);
		if (init_attr->srq) {
			sz = 0;
		} else {
			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
				sizeof(struct rvt_rwqe);
			qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) +
						   qp->r_rq.size * sz);
			if (!qp->r_rq.wq)
				goto bail_driver_priv;
		}

		/*
		 * ib_create_qp() will initialize qp->ibqp
		 * except for qp->ibqp.qp_num.
		 */
		spin_lock_init(&qp->r_lock);
		spin_lock_init(&qp->s_lock);
		spin_lock_init(&qp->r_rq.lock);
		atomic_set(&qp->refcount, 0);
		init_waitqueue_head(&qp->wait);
		init_timer(&qp->s_timer);
		qp->s_timer.data = (unsigned long)qp;
		INIT_LIST_HEAD(&qp->rspwait);
		qp->state = IB_QPS_RESET;
		qp->s_wq = swq;
		qp->s_size = init_attr->cap.max_send_wr + 1;
		qp->s_max_sge = init_attr->cap.max_send_sge;
		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
			qp->s_flags = RVT_S_SIGNAL_REQ_WR;

		err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
				init_attr->qp_type,
				init_attr->port_num);
		if (err < 0) {
			ret = ERR_PTR(err);
			goto bail_rq_wq;
		}
		qp->ibqp.qp_num = err;
		qp->port_num = init_attr->port_num;
		reset_qp(rdi, qp, init_attr->qp_type);
		break;

	default:
		/* Don't support raw QPs */
		return ERR_PTR(-EINVAL);
	}

	init_attr->cap.max_inline_data = 0;

	/*
	 * Return the address of the RWQ as the offset to mmap.
	 * See hfi1_mmap() for details.
	 */
	if (udata && udata->outlen >= sizeof(__u64)) {
		if (!qp->r_rq.wq) {
			__u64 offset = 0;

			err = ib_copy_to_udata(udata, &offset,
					       sizeof(offset));
			if (err) {
				ret = ERR_PTR(err);
				goto bail_qpn;
			}
		} else {
			u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;

			qp->ip = rvt_create_mmap_info(rdi, s,
						      ibpd->uobject->context,
						      qp->r_rq.wq);
			if (!qp->ip) {
				ret = ERR_PTR(-ENOMEM);
				goto bail_qpn;
			}

			err = ib_copy_to_udata(udata, &qp->ip->offset,
					       sizeof(qp->ip->offset));
			if (err) {
				ret = ERR_PTR(err);
				goto bail_ip;
			}
		}
	}

	spin_lock(&rdi->n_qps_lock);
	if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
		spin_unlock(&rdi->n_qps_lock);
		ret = ERR_PTR(-ENOMEM);
		goto bail_ip;
	}

	rdi->n_qps_allocated++;
	spin_unlock(&rdi->n_qps_lock);

	if (qp->ip) {
		spin_lock_irq(&rdi->pending_lock);
		list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
		spin_unlock_irq(&rdi->pending_lock);
	}

	ret = &qp->ibqp;

	/*
	 * We have our QP and its good, now keep track of what types of opcodes
	 * can be processed on this QP. We do this by keeping track of what the
	 * 3 high order bits of the opcode are.
	 */
	switch (init_attr->qp_type) {
	case IB_QPT_SMI:
	case IB_QPT_GSI:
	case IB_QPT_UD:
		qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK;
		break;
	case IB_QPT_RC:
		qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK;
		break;
	case IB_QPT_UC:
		qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK;
		break;
	default:
		ret = ERR_PTR(-EINVAL);
		goto bail_ip;
	}

	return ret;

bail_ip:
	kref_put(&qp->ip->ref, rvt_release_mmap_info);

bail_qpn:
	free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);

bail_rq_wq:
	vfree(qp->r_rq.wq);

bail_driver_priv:
	rdi->driver_f.qp_priv_free(rdi, qp);

bail_qp:
	kfree(qp);

bail_swq:
	vfree(swq);

	return ret;
}
}


/**
/**
+1 −0
Original line number Original line Diff line number Diff line
@@ -362,6 +362,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)


	ib_unregister_device(&rdi->ibdev);
	ib_unregister_device(&rdi->ibdev);
	rvt_mr_exit(rdi);
	rvt_mr_exit(rdi);
	rvt_qp_exit(rdi);
}
}
EXPORT_SYMBOL(rvt_unregister_device);
EXPORT_SYMBOL(rvt_unregister_device);


+9 −1
Original line number Original line Diff line number Diff line
@@ -222,7 +222,10 @@ struct rvt_driver_provided {
	int (*port_callback)(struct ib_device *, u8, struct kobject *);
	int (*port_callback)(struct ib_device *, u8, struct kobject *);
	const char * (*get_card_name)(struct rvt_dev_info *rdi);
	const char * (*get_card_name)(struct rvt_dev_info *rdi);
	struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
	struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
	void (*free_all_qps)(struct rvt_dev_info *rdi);
	unsigned (*free_all_qps)(struct rvt_dev_info *rdi);
	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
	void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
	void (*notify_qp_reset)(struct rvt_qp *qp);


	/*--------------------*/
	/*--------------------*/
	/* Optional functions */
	/* Optional functions */
@@ -230,6 +233,8 @@ struct rvt_driver_provided {
	int (*check_ah)(struct ib_device *, struct ib_ah_attr *);
	int (*check_ah)(struct ib_device *, struct ib_ah_attr *);
	void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *,
	void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *,
			      struct rvt_ah *);
			      struct rvt_ah *);
	int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
			 enum ib_qp_type type, u8 port);
};
};


struct rvt_dev_info {
struct rvt_dev_info {
@@ -262,7 +267,10 @@ struct rvt_dev_info {
	int flags;
	int flags;
	struct rvt_ibport **ports;
	struct rvt_ibport **ports;


	/* QP */
	struct rvt_qp_ibdev *qp_dev;
	struct rvt_qp_ibdev *qp_dev;
	u32 n_qps_allocated;    /* number of QPs allocated for device */
	spinlock_t n_qps_lock; /* keep track of number of qps */


	/* memory maps */
	/* memory maps */
	struct list_head pending_mmaps;
	struct list_head pending_mmaps;