Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1b7dbc26 authored by Artemy Kovalyov's avatar Artemy Kovalyov Committed by Doug Ledford
Browse files

IB/mlx5: Extract page fault code



To make page fault handling code more flexible
split pagefault_single_data_segment() function.
Keep MR resolution in pagefault_single_data_segment() and
move actual updates into pagefault_single_mr().

Signed-off-by: default avatarArtemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 0008b84e
Loading
Loading
Loading
Loading
+104 −99
Original line number Diff line number Diff line
@@ -511,81 +511,38 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
	wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}

/*
 * Handle a single data segment in a page-fault WQE or RDMA region.
 *
 * Returns number of OS pages retrieved on success. The caller may continue to
 * the next data segment.
 * Can return the following error codes:
 * -EAGAIN to designate a temporary error. The caller will abort handling the
 *  page fault and resolve it.
 * -EFAULT when there's an error mapping the requested pages. The caller will
 *  abort the page fault handling.
 */
static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
					 u32 key, u64 io_virt, size_t bcnt,
					 u32 *bytes_committed,
					 u32 *bytes_mapped)
static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
			u64 io_virt, size_t bcnt, u32 *bytes_mapped)
{
	int srcu_key;
	unsigned int current_seq = 0;
	u64 start_idx, page_mask;
	int npages = 0, ret = 0;
	struct mlx5_ib_mr *mr;
	u64 access_mask = ODP_READ_ALLOWED_BIT;
	int npages = 0, page_shift, np;
	u64 start_idx, page_mask;
	struct ib_umem_odp *odp;
	int implicit = 0;
	int current_seq;
	size_t size;
	int page_shift;

	srcu_key = srcu_read_lock(&dev->mr_srcu);
	mr = mlx5_ib_odp_find_mr_lkey(dev, key);
	/*
	 * If we didn't find the MR, it means the MR was closed while we were
	 * handling the ODP event. In this case we return -EFAULT so that the
	 * QP will be closed.
	 */
	if (!mr || !mr->ibmr.pd) {
		mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
			    key);
		ret = -EFAULT;
		goto srcu_unlock;
	}
	if (!mr->umem->odp_data) {
		mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
			    key);
		if (bytes_mapped)
			*bytes_mapped +=
				(bcnt - *bytes_committed);
		goto srcu_unlock;
	}

	/*
	 * Avoid branches - this code will perform correctly
	 * in all iterations (in iteration 2 and above,
	 * bytes_committed == 0).
	 */
	io_virt += *bytes_committed;
	bcnt -= *bytes_committed;
	int ret;

	if (!mr->umem->odp_data->page_list) {
		odp = implicit_mr_get_data(mr, io_virt, bcnt);

		if (IS_ERR(odp)) {
			ret = PTR_ERR(odp);
			goto srcu_unlock;
		}
		if (IS_ERR(odp))
			return PTR_ERR(odp);
		mr = odp->private;
		implicit = 1;

	} else {
		odp = mr->umem->odp_data;
	}

next_mr:
	size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);

	page_shift = mr->umem->page_shift;
	page_mask = ~(BIT(page_shift) - 1);
	start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;

	if (mr->umem->writable)
		access_mask |= ODP_WRITE_ALLOWED_BIT;

next_mr:
	current_seq = READ_ONCE(odp->notifiers_seq);
	/*
	 * Ensure the sequence number is valid for some time before we call
@@ -593,20 +550,13 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
	 */
	smp_rmb();

	size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
	start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;

	if (mr->umem->writable)
		access_mask |= ODP_WRITE_ALLOWED_BIT;

	ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
					access_mask, current_seq);

	if (ret < 0)
		goto srcu_unlock;
		goto out;

	if (ret > 0) {
		int np = ret;
	np = ret;

	mutex_lock(&odp->umem_mutex);
	if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
@@ -616,28 +566,27 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
		 * checks this.
		 */
		ret = mlx5_ib_update_xlt(mr, start_idx, np,
						 page_shift,
						 MLX5_IB_UPD_XLT_ATOMIC);
					 page_shift, MLX5_IB_UPD_XLT_ATOMIC);
	} else {
		ret = -EAGAIN;
	}
	mutex_unlock(&odp->umem_mutex);

	if (ret < 0) {
		if (ret != -EAGAIN)
			mlx5_ib_err(dev, "Failed to update mkey page tables\n");
			goto srcu_unlock;
		goto out;
	}

	if (bytes_mapped) {
		u32 new_mappings = (np << page_shift) -
				(io_virt - round_down(io_virt,
						      1 << page_shift));
			(io_virt - round_down(io_virt, 1 << page_shift));
		*bytes_mapped += min_t(u32, new_mappings, size);
	}

	npages += np << (page_shift - PAGE_SHIFT);
	}

	bcnt -= size;

	if (unlikely(bcnt)) {
		struct ib_umem_odp *next;

@@ -646,17 +595,18 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
		if (unlikely(!next || next->umem->address != io_virt)) {
			mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
				    io_virt, next);
			ret = -EAGAIN;
			goto srcu_unlock_no_wait;
			return -EAGAIN;
		}
		odp = next;
		mr = odp->private;
		goto next_mr;
	}

srcu_unlock:
	return npages;

out:
	if (ret == -EAGAIN) {
		if (implicit || !odp->dying) {
		if (mr->parent || !odp->dying) {
			unsigned long timeout =
				msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);

@@ -672,7 +622,62 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
		}
	}

srcu_unlock_no_wait:
	return ret;
}

/*
 * Handle a single data segment in a page-fault WQE or RDMA region.
 *
 * Returns number of OS pages retrieved on success. The caller may continue to
 * the next data segment.
 * Can return the following error codes:
 * -EAGAIN to designate a temporary error. The caller will abort handling the
 *  page fault and resolve it.
 * -EFAULT when there's an error mapping the requested pages. The caller will
 *  abort the page fault handling.
 */
static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
					 u32 key, u64 io_virt, size_t bcnt,
					 u32 *bytes_committed,
					 u32 *bytes_mapped)
{
	int npages = 0, srcu_key, ret;
	struct mlx5_ib_mr *mr;
	size_t size;

	srcu_key = srcu_read_lock(&dev->mr_srcu);
	mr = mlx5_ib_odp_find_mr_lkey(dev, key);
	/*
	 * If we didn't find the MR, it means the MR was closed while we were
	 * handling the ODP event. In this case we return -EFAULT so that the
	 * QP will be closed.
	 */
	if (!mr || !mr->ibmr.pd) {
		mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
			    key);
		ret = -EFAULT;
		goto srcu_unlock;
	}
	if (!mr->umem->odp_data) {
		mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
			    key);
		if (bytes_mapped)
			*bytes_mapped +=
				(bcnt - *bytes_committed);
		goto srcu_unlock;
	}

	/*
	 * Avoid branches - this code will perform correctly
	 * in all iterations (in iteration 2 and above,
	 * bytes_committed == 0).
	 */
	io_virt += *bytes_committed;
	bcnt -= *bytes_committed;

	npages = pagefault_mr(dev, mr, io_virt, size, bytes_mapped);

srcu_unlock:
	srcu_read_unlock(&dev->mr_srcu, srcu_key);
	*bytes_committed = 0;
	return ret ? ret : npages;