Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e420f0c0 authored by Haggai Eran's avatar Haggai Eran Committed by Roland Dreier
Browse files

mlx5_core: Add support for page faults events and low level handling



* Add a handler function pointer in the mlx5_core_qp struct for page
  fault events. Handle page fault events by calling the handler
  function, if not NULL.
* Add on-demand paging capability query command.
* Export command for resuming QPs after page faults.
* Add various constants related to paging support.

Signed-off-by: default avatarSagi Grimberg <sagig@mellanox.com>
Signed-off-by: default avatarShachar Raindel <raindel@mellanox.com>
Signed-off-by: default avatarHaggai Eran <haggaie@mellanox.com>
Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
parent 6cb7ff3d
Loading
Loading
Loading
Loading
+12 −1
Original line number Diff line number Diff line
@@ -157,6 +157,8 @@ static const char *eqe_type_str(u8 type)
		return "MLX5_EVENT_TYPE_CMD";
	case MLX5_EVENT_TYPE_PAGE_REQUEST:
		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
	case MLX5_EVENT_TYPE_PAGE_FAULT:
		return "MLX5_EVENT_TYPE_PAGE_FAULT";
	default:
		return "Unrecognized event";
	}
@@ -279,6 +281,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
			}
			break;

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
		case MLX5_EVENT_TYPE_PAGE_FAULT:
			mlx5_eq_pagefault(dev, eqe);
			break;
#endif

		default:
			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
@@ -446,8 +453,12 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
int mlx5_start_eqs(struct mlx5_core_dev *dev)
{
	struct mlx5_eq_table *table = &dev->priv.eq_table;
	u32 async_event_mask = MLX5_ASYNC_EVENT_MASK;
	int err;

	if (dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)
		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);

	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
@@ -459,7 +470,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
	mlx5_cmd_use_events(dev);

	err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
				 MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK,
				 MLX5_NUM_ASYNC_EQE, async_event_mask,
				 "mlx5_async_eq", &dev->priv.uuari.uars[0]);
	if (err) {
		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+40 −0
Original line number Diff line number Diff line
@@ -69,6 +69,46 @@ int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, struct mlx5_caps *caps)
	return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR);
}

int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps)
{
	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
	void *out;
	int err;

	if (!(dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG))
		return -ENOTSUPP;

	memset(in, 0, sizeof(in));
	out = kzalloc(out_sz, GFP_KERNEL);
	if (!out)
		return -ENOMEM;
	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
	MLX5_SET(query_hca_cap_in, in, op_mod, HCA_CAP_OPMOD_GET_ODP_CUR);
	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
	if (err)
		goto out;

	err = mlx5_cmd_status_to_err_v2(out);
	if (err) {
		mlx5_core_warn(dev, "query cur hca ODP caps failed, %d\n", err);
		goto out;
	}

	memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct),
	       sizeof(*caps));

	mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n",
		be32_to_cpu(caps->per_transport_caps.rc_odp_caps),
		be32_to_cpu(caps->per_transport_caps.uc_odp_caps),
		be32_to_cpu(caps->per_transport_caps.ud_odp_caps));

out:
	kfree(out);
	return err;
}
EXPORT_SYMBOL(mlx5_query_odp_caps);

int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
{
	struct mlx5_cmd_init_hca_mbox_in in;
+119 −0
Original line number Diff line number Diff line
@@ -88,6 +88,95 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
	mlx5_core_put_rsc(common);
}

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
{
	struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault;
	int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK;
	struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn);
	struct mlx5_core_qp *qp =
		container_of(common, struct mlx5_core_qp, common);
	struct mlx5_pagefault pfault;

	if (!qp) {
		mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n",
			       qpn);
		return;
	}

	pfault.event_subtype = eqe->sub_type;
	pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) &
		(MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA);
	pfault.bytes_committed = be32_to_cpu(
		pf_eqe->bytes_committed);

	mlx5_core_dbg(dev,
		      "PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n",
		      eqe->sub_type, pfault.flags);

	switch (eqe->sub_type) {
	case MLX5_PFAULT_SUBTYPE_RDMA:
		/* RDMA based event */
		pfault.rdma.r_key =
			be32_to_cpu(pf_eqe->rdma.r_key);
		pfault.rdma.packet_size =
			be16_to_cpu(pf_eqe->rdma.packet_length);
		pfault.rdma.rdma_op_len =
			be32_to_cpu(pf_eqe->rdma.rdma_op_len);
		pfault.rdma.rdma_va =
			be64_to_cpu(pf_eqe->rdma.rdma_va);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n",
			      qpn, pfault.rdma.r_key);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: rdma_op_len: 0x%08x,\n",
			      pfault.rdma.rdma_op_len);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: rdma_va: 0x%016llx,\n",
			      pfault.rdma.rdma_va);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
			      pfault.bytes_committed);
		break;

	case MLX5_PFAULT_SUBTYPE_WQE:
		/* WQE based event */
		pfault.wqe.wqe_index =
			be16_to_cpu(pf_eqe->wqe.wqe_index);
		pfault.wqe.packet_size =
			be16_to_cpu(pf_eqe->wqe.packet_length);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n",
			      qpn, pfault.wqe.wqe_index);
		mlx5_core_dbg(dev,
			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
			      pfault.bytes_committed);
		break;

	default:
		mlx5_core_warn(dev,
			       "Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n",
			       eqe->sub_type, qpn);
		/* Unsupported page faults should still be resolved by the
		 * page fault handler
		 */
	}

	if (qp->pfault_handler) {
		qp->pfault_handler(qp, &pfault);
	} else {
		mlx5_core_err(dev,
			      "ODP event for QP %08x, without a fault handler in QP\n",
			      qpn);
		/* Page fault will remain unresolved. QP will hang until it is
		 * destroyed
		 */
	}

	mlx5_core_put_rsc(common);
}
#endif

int mlx5_core_create_qp(struct mlx5_core_dev *dev,
			struct mlx5_core_qp *qp,
			struct mlx5_create_qp_mbox_in *in,
@@ -322,3 +411,33 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
	return err;
}
EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
				u8 flags, int error)
{
	struct mlx5_page_fault_resume_mbox_in in;
	struct mlx5_page_fault_resume_mbox_out out;
	int err;

	memset(&in, 0, sizeof(in));
	memset(&out, 0, sizeof(out));
	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_PAGE_FAULT_RESUME);
	in.hdr.opmod = 0;
	flags &= (MLX5_PAGE_FAULT_RESUME_REQUESTOR |
		  MLX5_PAGE_FAULT_RESUME_WRITE	   |
		  MLX5_PAGE_FAULT_RESUME_RDMA);
	flags |= (error ? MLX5_PAGE_FAULT_RESUME_ERROR : 0);
	in.flags_qpn = cpu_to_be32((qpn & MLX5_QPN_MASK) |
				   (flags << MLX5_QPN_BITS));
	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
	if (err)
		return err;

	if (out.hdr.status)
		err = mlx5_cmd_status_to_err(&out.hdr);

	return err;
}
EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
#endif
+53 −1
Original line number Diff line number Diff line
@@ -119,6 +119,15 @@ enum {
	MLX5_MAX_LOG_PKEY_TABLE  = 5,
};

enum {
	MLX5_MKEY_INBOX_PG_ACCESS = 1 << 31
};

enum {
	MLX5_PFAULT_SUBTYPE_WQE = 0,
	MLX5_PFAULT_SUBTYPE_RDMA = 1,
};

enum {
	MLX5_PERM_LOCAL_READ	= 1 << 2,
	MLX5_PERM_LOCAL_WRITE	= 1 << 3,
@@ -215,6 +224,8 @@ enum mlx5_event {

	MLX5_EVENT_TYPE_CMD		   = 0x0a,
	MLX5_EVENT_TYPE_PAGE_REQUEST	   = 0xb,

	MLX5_EVENT_TYPE_PAGE_FAULT	   = 0xc,
};

enum {
@@ -300,6 +311,8 @@ enum {
enum {
	HCA_CAP_OPMOD_GET_MAX	= 0,
	HCA_CAP_OPMOD_GET_CUR	= 1,
	HCA_CAP_OPMOD_GET_ODP_MAX = 4,
	HCA_CAP_OPMOD_GET_ODP_CUR = 5
};

struct mlx5_inbox_hdr {
@@ -329,6 +342,23 @@ struct mlx5_cmd_query_adapter_mbox_out {
	u8			vsd_psid[16];
};

enum mlx5_odp_transport_cap_bits {
	MLX5_ODP_SUPPORT_SEND	 = 1 << 31,
	MLX5_ODP_SUPPORT_RECV	 = 1 << 30,
	MLX5_ODP_SUPPORT_WRITE	 = 1 << 29,
	MLX5_ODP_SUPPORT_READ	 = 1 << 28,
};

struct mlx5_odp_caps {
	char reserved[0x10];
	struct {
		__be32			rc_odp_caps;
		__be32			uc_odp_caps;
		__be32			ud_odp_caps;
	} per_transport_caps;
	char reserved2[0xe4];
};

struct mlx5_cmd_init_hca_mbox_in {
	struct mlx5_inbox_hdr	hdr;
	u8			rsvd0[2];
@@ -449,6 +479,27 @@ struct mlx5_eqe_page_req {
	__be32		rsvd1[5];
};

struct mlx5_eqe_page_fault {
	__be32 bytes_committed;
	union {
		struct {
			u16     reserved1;
			__be16  wqe_index;
			u16	reserved2;
			__be16  packet_length;
			u8	reserved3[12];
		} __packed wqe;
		struct {
			__be32  r_key;
			u16	reserved1;
			__be16  packet_length;
			__be32  rdma_op_len;
			__be64  rdma_va;
		} __packed rdma;
	} __packed;
	__be32 flags_qpn;
} __packed;

union ev_data {
	__be32				raw[7];
	struct mlx5_eqe_cmd		cmd;
@@ -460,6 +511,7 @@ union ev_data {
	struct mlx5_eqe_congestion	cong;
	struct mlx5_eqe_stall_vl	stall_vl;
	struct mlx5_eqe_page_req	req_pages;
	struct mlx5_eqe_page_fault	page_fault;
} __packed;

struct mlx5_eqe {
@@ -826,7 +878,7 @@ struct mlx5_query_special_ctxs_mbox_out {
struct mlx5_create_mkey_mbox_in {
	struct mlx5_inbox_hdr	hdr;
	__be32			input_mkey_index;
	u8			rsvd0[4];
	__be32			flags;
	struct mlx5_mkey_seg	seg;
	u8			rsvd1[16];
	__be32			xlat_oct_act_size;
+12 −0
Original line number Diff line number Diff line
@@ -113,6 +113,13 @@ enum {
	MLX5_REG_HOST_ENDIANNESS = 0x7004,
};

enum mlx5_page_fault_resume_flags {
	MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
	MLX5_PAGE_FAULT_RESUME_WRITE	 = 1 << 1,
	MLX5_PAGE_FAULT_RESUME_RDMA	 = 1 << 2,
	MLX5_PAGE_FAULT_RESUME_ERROR	 = 1 << 7,
};

enum dbg_rsc_type {
	MLX5_DBG_RSC_QP,
	MLX5_DBG_RSC_EQ,
@@ -703,6 +710,9 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
#endif
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -740,6 +750,8 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
			 int npsvs, u32 *sig_index);
int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
			struct mlx5_odp_caps *odp_caps);

static inline u32 mlx5_mkey_to_idx(u32 mkey)
{
Loading