Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d2183c6f authored by Jason Gunthorpe's avatar Jason Gunthorpe
Browse files

RDMA/umem: Move page_shift from ib_umem to ib_odp_umem



This value has always been set to PAGE_SHIFT in the core code, the only
thing that does differently was the ODP path. Move the value into the ODP
struct and still use it for ODP, but change all the non-ODP things to just
use PAGE_SHIFT/PAGE_SIZE/PAGE_MASK directly.

Reviewed-by: default avatarShiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
parent 69054666
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -244,7 +244,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
	umem->context    = context;
	umem->length     = size;
	umem->address    = addr;
	umem->page_shift = PAGE_SHIFT;
	umem->writable   = ib_access_writable(access);
	umem->owning_mm = mm = current->mm;
	mmgrab(mm);
@@ -385,7 +384,7 @@ int ib_umem_page_count(struct ib_umem *umem)

	n = 0;
	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
		n += sg_dma_len(sg) >> umem->page_shift;
		n += sg_dma_len(sg) >> PAGE_SHIFT;

	return n;
}
+36 −43
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ static u64 node_start(struct umem_odp_node *n)
	struct ib_umem_odp *umem_odp =
			container_of(n, struct ib_umem_odp, interval_tree);

	return ib_umem_start(&umem_odp->umem);
	return ib_umem_start(umem_odp);
}

/* Note that the representation of the intervals in the interval tree
@@ -72,7 +72,7 @@ static u64 node_last(struct umem_odp_node *n)
	struct ib_umem_odp *umem_odp =
			container_of(n, struct ib_umem_odp, interval_tree);

	return ib_umem_end(&umem_odp->umem) - 1;
	return ib_umem_end(umem_odp) - 1;
}

INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
@@ -107,8 +107,6 @@ static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
					       u64 start, u64 end, void *cookie)
{
	struct ib_umem *umem = &umem_odp->umem;

	/*
	 * Increase the number of notifiers running, to
	 * prevent any further fault handling on this MR.
@@ -119,8 +117,8 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
	 * all pending page faults. */
	smp_wmb();
	complete_all(&umem_odp->notifier_completion);
	umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
					ib_umem_end(umem));
	umem_odp->umem.context->invalidate_range(
		umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp));
	return 0;
}

@@ -205,10 +203,9 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
{
	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
	struct ib_umem *umem = &umem_odp->umem;

	down_write(&per_mm->umem_rwsem);
	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
	if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
		rbt_ib_umem_insert(&umem_odp->interval_tree,
				   &per_mm->umem_tree);
	up_write(&per_mm->umem_rwsem);
@@ -217,10 +214,9 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
{
	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
	struct ib_umem *umem = &umem_odp->umem;

	down_write(&per_mm->umem_rwsem);
	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
	if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
		rbt_ib_umem_remove(&umem_odp->interval_tree,
				   &per_mm->umem_tree);
	complete_all(&umem_odp->notifier_completion);
@@ -351,7 +347,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
	umem->context    = ctx;
	umem->length     = size;
	umem->address    = addr;
	umem->page_shift = PAGE_SHIFT;
	odp_data->page_shift = PAGE_SHIFT;
	umem->writable   = root->umem.writable;
	umem->is_odp = 1;
	odp_data->per_mm = per_mm;
@@ -405,18 +401,19 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
	struct mm_struct *mm = umem->owning_mm;
	int ret_val;

	umem_odp->page_shift = PAGE_SHIFT;
	if (access & IB_ACCESS_HUGETLB) {
		struct vm_area_struct *vma;
		struct hstate *h;

		down_read(&mm->mmap_sem);
		vma = find_vma(mm, ib_umem_start(umem));
		vma = find_vma(mm, ib_umem_start(umem_odp));
		if (!vma || !is_vm_hugetlb_page(vma)) {
			up_read(&mm->mmap_sem);
			return -EINVAL;
		}
		h = hstate_vma(vma);
		umem->page_shift = huge_page_shift(h);
		umem_odp->page_shift = huge_page_shift(h);
		up_read(&mm->mmap_sem);
	}

@@ -424,16 +421,16 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)

	init_completion(&umem_odp->notifier_completion);

	if (ib_umem_num_pages(umem)) {
	if (ib_umem_odp_num_pages(umem_odp)) {
		umem_odp->page_list =
			vzalloc(array_size(sizeof(*umem_odp->page_list),
					   ib_umem_num_pages(umem)));
					   ib_umem_odp_num_pages(umem_odp)));
		if (!umem_odp->page_list)
			return -ENOMEM;

		umem_odp->dma_list =
			vzalloc(array_size(sizeof(*umem_odp->dma_list),
					   ib_umem_num_pages(umem)));
					   ib_umem_odp_num_pages(umem_odp)));
		if (!umem_odp->dma_list) {
			ret_val = -ENOMEM;
			goto out_page_list;
@@ -456,16 +453,14 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)

void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{
	struct ib_umem *umem = &umem_odp->umem;

	/*
	 * Ensure that no more pages are mapped in the umem.
	 *
	 * It is the driver's responsibility to ensure, before calling us,
	 * that the hardware will not attempt to access the MR any more.
	 */
	ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
				    ib_umem_end(umem));
	ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
				    ib_umem_end(umem_odp));

	remove_umem_from_per_mm(umem_odp);
	put_per_mm(umem_odp);
@@ -498,8 +493,8 @@ static int ib_umem_odp_map_dma_single_page(
		u64 access_mask,
		unsigned long current_seq)
{
	struct ib_umem *umem = &umem_odp->umem;
	struct ib_device *dev = umem->context->device;
	struct ib_ucontext *context = umem_odp->umem.context;
	struct ib_device *dev = context->device;
	dma_addr_t dma_addr;
	int remove_existing_mapping = 0;
	int ret = 0;
@@ -514,9 +509,8 @@ static int ib_umem_odp_map_dma_single_page(
		goto out;
	}
	if (!(umem_odp->dma_list[page_index])) {
		dma_addr = ib_dma_map_page(dev,
					   page,
					   0, BIT(umem->page_shift),
		dma_addr =
			ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift),
					DMA_BIDIRECTIONAL);
		if (ib_dma_mapping_error(dev, dma_addr)) {
			ret = -EFAULT;
@@ -540,11 +534,12 @@ static int ib_umem_odp_map_dma_single_page(

	if (remove_existing_mapping) {
		ib_umem_notifier_start_account(umem_odp);
		umem->context->invalidate_range(
		context->invalidate_range(
			umem_odp,
			ib_umem_start(umem) + (page_index << umem->page_shift),
			ib_umem_start(umem) +
				((page_index + 1) << umem->page_shift));
			ib_umem_start(umem_odp) +
				(page_index << umem_odp->page_shift),
			ib_umem_start(umem_odp) +
				((page_index + 1) << umem_odp->page_shift));
		ib_umem_notifier_end_account(umem_odp);
		ret = -EAGAIN;
	}
@@ -581,27 +576,26 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
			      u64 bcnt, u64 access_mask,
			      unsigned long current_seq)
{
	struct ib_umem *umem = &umem_odp->umem;
	struct task_struct *owning_process  = NULL;
	struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
	struct page       **local_page_list = NULL;
	u64 page_mask, off;
	int j, k, ret = 0, start_idx, npages = 0, page_shift;
	unsigned int flags = 0;
	int j, k, ret = 0, start_idx, npages = 0;
	unsigned int flags = 0, page_shift;
	phys_addr_t p = 0;

	if (access_mask == 0)
		return -EINVAL;

	if (user_virt < ib_umem_start(umem) ||
	    user_virt + bcnt > ib_umem_end(umem))
	if (user_virt < ib_umem_start(umem_odp) ||
	    user_virt + bcnt > ib_umem_end(umem_odp))
		return -EFAULT;

	local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
	if (!local_page_list)
		return -ENOMEM;

	page_shift = umem->page_shift;
	page_shift = umem_odp->page_shift;
	page_mask = ~(BIT(page_shift) - 1);
	off = user_virt & (~page_mask);
	user_virt = user_virt & page_mask;
@@ -621,7 +615,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
	if (access_mask & ODP_WRITE_ALLOWED_BIT)
		flags |= FOLL_WRITE;

	start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
	start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift;
	k = start_idx;

	while (bcnt > 0) {
@@ -711,21 +705,20 @@ EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
				 u64 bound)
{
	struct ib_umem *umem = &umem_odp->umem;
	int idx;
	u64 addr;
	struct ib_device *dev = umem->context->device;
	struct ib_device *dev = umem_odp->umem.context->device;

	virt  = max_t(u64, virt,  ib_umem_start(umem));
	bound = min_t(u64, bound, ib_umem_end(umem));
	virt = max_t(u64, virt, ib_umem_start(umem_odp));
	bound = min_t(u64, bound, ib_umem_end(umem_odp));
	/* Note that during the run of this function, the
	 * notifiers_count of the MR is > 0, preventing any racing
	 * faults from completion. We might be racing with other
	 * invalidations, so we must make sure we free each page only
	 * once. */
	mutex_lock(&umem_odp->umem_mutex);
	for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
		idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
	for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
		idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
		if (umem_odp->page_list[idx]) {
			struct page *page = umem_odp->page_list[idx];
			dma_addr_t dma = umem_odp->dma_list[idx];
+1 −2
Original line number Diff line number Diff line
@@ -235,8 +235,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
					&buf->hr_mtt);
	} else {
		ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
				(*umem)->page_shift,
				&buf->hr_mtt);
					PAGE_SHIFT, &buf->hr_mtt);
	}
	if (ret)
		goto err_buf;
+4 −6
Original line number Diff line number Diff line
@@ -264,8 +264,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
		} else
			ret = hns_roce_mtt_init(hr_dev,
						ib_umem_page_count(srq->umem),
						srq->umem->page_shift,
						&srq->mtt);
						PAGE_SHIFT, &srq->mtt);
		if (ret)
			goto err_buf;

@@ -291,10 +290,9 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
			ret = hns_roce_mtt_init(hr_dev, npages,
						page_shift, &srq->idx_que.mtt);
		} else {
			ret = hns_roce_mtt_init(hr_dev,
				       ib_umem_page_count(srq->idx_que.umem),
				       srq->idx_que.umem->page_shift,
				       &srq->idx_que.mtt);
			ret = hns_roce_mtt_init(
				hr_dev, ib_umem_page_count(srq->idx_que.umem),
				PAGE_SHIFT, &srq->idx_que.mtt);
		}

		if (ret) {
+4 −4
Original line number Diff line number Diff line
@@ -258,7 +258,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
				       int *num_of_mtts)
{
	u64 block_shift = MLX4_MAX_MTT_SHIFT;
	u64 min_shift = umem->page_shift;
	u64 min_shift = PAGE_SHIFT;
	u64 last_block_aligned_end = 0;
	u64 current_block_start = 0;
	u64 first_block_start = 0;
@@ -295,8 +295,8 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
			 * in access to the wrong data.
			 */
			misalignment_bits =
			(start_va & (~(((u64)(BIT(umem->page_shift))) - 1ULL)))
			^ current_block_start;
				(start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^
				current_block_start;
			block_shift = min(alignment_of(misalignment_bits),
					  block_shift);
		}
@@ -514,7 +514,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
			goto release_mpt_entry;
		}
		n = ib_umem_page_count(mmr->umem);
		shift = mmr->umem->page_shift;
		shift = PAGE_SHIFT;

		err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
					      virt_addr, length, n, shift,
Loading