Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eab668a6 authored by Haggai Eran's avatar Haggai Eran Committed by Roland Dreier
Browse files

IB/mlx5: Add support for RDMA read/write responder page faults

parent 7bdf65d4
Loading
Loading
Loading
Loading
+79 −0
Original line number Original line Diff line number Diff line
@@ -35,6 +35,8 @@


#include "mlx5_ib.h"
#include "mlx5_ib.h"


#define MAX_PREFETCH_LEN (4*1024*1024U)

struct workqueue_struct *mlx5_ib_page_fault_wq;
struct workqueue_struct *mlx5_ib_page_fault_wq;


#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do {	\
#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do {	\
@@ -490,6 +492,80 @@ resolve_page_fault:
	free_page((unsigned long)buffer);
	free_page((unsigned long)buffer);
}
}


static int pages_in_range(u64 address, u32 length)
{
	return (ALIGN(address + length, PAGE_SIZE) -
		(address & PAGE_MASK)) >> PAGE_SHIFT;
}

static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
					   struct mlx5_ib_pfault *pfault)
{
	struct mlx5_pagefault *mpfault = &pfault->mpfault;
	u64 address;
	u32 length;
	u32 prefetch_len = mpfault->bytes_committed;
	int prefetch_activated = 0;
	u32 rkey = mpfault->rdma.r_key;
	int ret;

	/* The RDMA responder handler handles the page fault in two parts.
	 * First it brings the necessary pages for the current packet
	 * (and uses the pfault context), and then (after resuming the QP)
	 * prefetches more pages. The second operation cannot use the pfault
	 * context and therefore uses the dummy_pfault context allocated on
	 * the stack */
	struct mlx5_ib_pfault dummy_pfault = {};

	dummy_pfault.mpfault.bytes_committed = 0;

	mpfault->rdma.rdma_va += mpfault->bytes_committed;
	mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
					 mpfault->rdma.rdma_op_len);
	mpfault->bytes_committed = 0;

	address = mpfault->rdma.rdma_va;
	length  = mpfault->rdma.rdma_op_len;

	/* For some operations, the hardware cannot tell the exact message
	 * length, and in those cases it reports zero. Use prefetch
	 * logic. */
	if (length == 0) {
		prefetch_activated = 1;
		length = mpfault->rdma.packet_size;
		prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
	}

	ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
					    NULL);
	if (ret == -EAGAIN) {
		/* We're racing with an invalidation, don't prefetch */
		prefetch_activated = 0;
	} else if (ret < 0 || pages_in_range(address, length) > ret) {
		mlx5_ib_page_fault_resume(qp, pfault, 1);
		return;
	}

	mlx5_ib_page_fault_resume(qp, pfault, 0);

	/* At this point, there might be a new pagefault already arriving in
	 * the eq, switch to the dummy pagefault for the rest of the
	 * processing. We're still OK with the objects being alive as the
	 * work-queue is being fenced. */

	if (prefetch_activated) {
		ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
						    address,
						    prefetch_len,
						    NULL);
		if (ret < 0) {
			pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
				ret, prefetch_activated,
				qp->ibqp.qp_num, address, prefetch_len);
		}
	}
}

void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
			       struct mlx5_ib_pfault *pfault)
			       struct mlx5_ib_pfault *pfault)
{
{
@@ -499,6 +575,9 @@ void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
	case MLX5_PFAULT_SUBTYPE_WQE:
	case MLX5_PFAULT_SUBTYPE_WQE:
		mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
		mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
		break;
		break;
	case MLX5_PFAULT_SUBTYPE_RDMA:
		mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
		break;
	default:
	default:
		pr_warn("Invalid page fault event subtype: 0x%x\n",
		pr_warn("Invalid page fault event subtype: 0x%x\n",
			event_subtype);
			event_subtype);