Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2a600f14 authored by Mike Marciniszyn's avatar Mike Marciniszyn Committed by Roland Dreier
Browse files

IB/qib: RDMA lkey/rkey validation is inefficient for large MRs



The current code loops during rkey/lkey validiation to isolate the MR
for the RDMA, which is expensive when the current operation is inside
a very large memory region.

This fix optimizes rkey/lkey validation routines for user memory
regions and fast memory regions.  The MR entry can be isolated by
shifts/mods instead of looping.  The existing loop is preserved for
phys memory regions for now.

Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@qlogic.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 7c3edd3f
Loading
Loading
Loading
Loading
+54 −20
Original line number Original line Diff line number Diff line
@@ -158,20 +158,36 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
		isge->sge_length = sge->length;
		isge->sge_length = sge->length;
		isge->m = 0;
		isge->m = 0;
		isge->n = 0;
		isge->n = 0;
		spin_unlock_irqrestore(&rkt->lock, flags);
		goto ok;
		goto ok;
	}
	}
	mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))];
	mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))];
	if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
	if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
		     mr->pd != &pd->ibpd))
		     mr->pd != &pd->ibpd))
		goto bail;
		goto bail;
	atomic_inc(&mr->refcount);
	spin_unlock_irqrestore(&rkt->lock, flags);


	off = sge->addr - mr->user_base;
	off = sge->addr - mr->user_base;
	if (unlikely(sge->addr < mr->user_base ||
	if (unlikely(sge->addr < mr->user_base ||
		     off + sge->length > mr->length ||
		     off + sge->length > mr->length ||
		     (mr->access_flags & acc) != acc))
		     (mr->access_flags & acc) != acc))
		goto bail;
		return ret;


	off += mr->offset;
	off += mr->offset;
	if (mr->page_shift) {
		/*
		page sizes are uniform power of 2 so no loop is necessary
		entries_spanned_by_off is the number of times the loop below
		would have executed.
		*/
		size_t entries_spanned_by_off;

		entries_spanned_by_off = off >> mr->page_shift;
		off -= (entries_spanned_by_off << mr->page_shift);
		m = entries_spanned_by_off/QIB_SEGSZ;
		n = entries_spanned_by_off%QIB_SEGSZ;
	} else {
		m = 0;
		m = 0;
		n = 0;
		n = 0;
		while (off >= mr->map[m]->segs[n].length) {
		while (off >= mr->map[m]->segs[n].length) {
@@ -182,7 +198,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
				n = 0;
				n = 0;
			}
			}
		}
		}
	atomic_inc(&mr->refcount);
	}
	isge->mr = mr;
	isge->mr = mr;
	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
	isge->length = mr->map[m]->segs[n].length - off;
	isge->length = mr->map[m]->segs[n].length - off;
@@ -191,6 +207,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
	isge->n = n;
	isge->n = n;
ok:
ok:
	ret = 1;
	ret = 1;
	return ret;
bail:
bail:
	spin_unlock_irqrestore(&rkt->lock, flags);
	spin_unlock_irqrestore(&rkt->lock, flags);
	return ret;
	return ret;
@@ -237,19 +254,35 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
		sge->sge_length = len;
		sge->sge_length = len;
		sge->m = 0;
		sge->m = 0;
		sge->n = 0;
		sge->n = 0;
		spin_unlock_irqrestore(&rkt->lock, flags);
		goto ok;
		goto ok;
	}
	}


	mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
	mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
	if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
	if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
		goto bail;
		goto bail;
	atomic_inc(&mr->refcount);
	spin_unlock_irqrestore(&rkt->lock, flags);


	off = vaddr - mr->iova;
	off = vaddr - mr->iova;
	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
		     (mr->access_flags & acc) == 0))
		     (mr->access_flags & acc) == 0))
		goto bail;
		return ret;


	off += mr->offset;
	off += mr->offset;
	if (mr->page_shift) {
		/*
		page sizes are uniform power of 2 so no loop is necessary
		entries_spanned_by_off is the number of times the loop below
		would have executed.
		*/
		size_t entries_spanned_by_off;

		entries_spanned_by_off = off >> mr->page_shift;
		off -= (entries_spanned_by_off << mr->page_shift);
		m = entries_spanned_by_off/QIB_SEGSZ;
		n = entries_spanned_by_off%QIB_SEGSZ;
	} else {
		m = 0;
		m = 0;
		n = 0;
		n = 0;
		while (off >= mr->map[m]->segs[n].length) {
		while (off >= mr->map[m]->segs[n].length) {
@@ -260,7 +293,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
				n = 0;
				n = 0;
			}
			}
		}
		}
	atomic_inc(&mr->refcount);
	}
	sge->mr = mr;
	sge->mr = mr;
	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
	sge->length = mr->map[m]->segs[n].length - off;
	sge->length = mr->map[m]->segs[n].length - off;
@@ -269,6 +302,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
	sge->n = n;
	sge->n = n;
ok:
ok:
	ret = 1;
	ret = 1;
	return ret;
bail:
bail:
	spin_unlock_irqrestore(&rkt->lock, flags);
	spin_unlock_irqrestore(&rkt->lock, flags);
	return ret;
	return ret;
+5 −3
Original line number Original line Diff line number Diff line
@@ -39,7 +39,6 @@
/* Fast memory region */
/* Fast memory region */
struct qib_fmr {
struct qib_fmr {
	struct ib_fmr ibfmr;
	struct ib_fmr ibfmr;
	u8 page_shift;
	struct qib_mregion mr;        /* must be last */
	struct qib_mregion mr;        /* must be last */
};
};


@@ -107,6 +106,7 @@ static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
			goto bail;
			goto bail;
	}
	}
	mr->mr.mapsz = m;
	mr->mr.mapsz = m;
	mr->mr.page_shift = 0;
	mr->mr.max_segs = count;
	mr->mr.max_segs = count;


	/*
	/*
@@ -231,6 +231,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
	mr->mr.access_flags = mr_access_flags;
	mr->mr.access_flags = mr_access_flags;
	mr->umem = umem;
	mr->umem = umem;


	if (is_power_of_2(umem->page_size))
		mr->mr.page_shift = ilog2(umem->page_size);
	m = 0;
	m = 0;
	n = 0;
	n = 0;
	list_for_each_entry(chunk, &umem->chunk_list, list) {
	list_for_each_entry(chunk, &umem->chunk_list, list) {
@@ -390,7 +392,7 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
	fmr->mr.offset = 0;
	fmr->mr.offset = 0;
	fmr->mr.access_flags = mr_access_flags;
	fmr->mr.access_flags = mr_access_flags;
	fmr->mr.max_segs = fmr_attr->max_pages;
	fmr->mr.max_segs = fmr_attr->max_pages;
	fmr->page_shift = fmr_attr->page_shift;
	fmr->mr.page_shift = fmr_attr->page_shift;


	atomic_set(&fmr->mr.refcount, 0);
	atomic_set(&fmr->mr.refcount, 0);
	ret = &fmr->ibfmr;
	ret = &fmr->ibfmr;
@@ -437,7 +439,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
	spin_lock_irqsave(&rkt->lock, flags);
	spin_lock_irqsave(&rkt->lock, flags);
	fmr->mr.user_base = iova;
	fmr->mr.user_base = iova;
	fmr->mr.iova = iova;
	fmr->mr.iova = iova;
	ps = 1 << fmr->page_shift;
	ps = 1 << fmr->mr.page_shift;
	fmr->mr.length = list_len * ps;
	fmr->mr.length = list_len * ps;
	m = 0;
	m = 0;
	n = 0;
	n = 0;
+1 −0
Original line number Original line Diff line number Diff line
@@ -301,6 +301,7 @@ struct qib_mregion {
	int access_flags;
	int access_flags;
	u32 max_segs;           /* number of qib_segs in all the arrays */
	u32 max_segs;           /* number of qib_segs in all the arrays */
	u32 mapsz;              /* size of the map array */
	u32 mapsz;              /* size of the map array */
	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
	atomic_t refcount;
	atomic_t refcount;
	struct qib_segarray *map[0];    /* the segments */
	struct qib_segarray *map[0];    /* the segments */
};
};