Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2f62747c authored by Saeed Mahameed's avatar Saeed Mahameed
Browse files


mlx5-next shared branch with rdma subtree to avoid mlx5 rdma v.s. netdev
conflicts.

Highlights:

1) RDMA ODP  (On Demand Paging) improvements and moving ODP logic to
mlx5 RDMA driver
2) Improved mlx5 core driver and device events handling and provided API
for upper layers to subscribe to device events.
3) RDMA only code cleanup from mlx5 core
4) Add helper to get CQE opcode
5) Rework handling of port module events
6) shared mlx5_ifc.h updates to avoid conflicts

Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parents d8ed257f 6c22a119
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -647,8 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
				flags, local_page_list, NULL, NULL);
		up_read(&owning_mm->mmap_sem);

		if (npages < 0)
		if (npages < 0) {
			if (npages != -EAGAIN)
				pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
			else
				pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
			break;
		}

		bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
		mutex_lock(&umem_odp->umem_mutex);
@@ -666,8 +671,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
			ret = ib_umem_odp_map_dma_single_page(
					umem_odp, k, local_page_list[j],
					access_mask, current_seq);
			if (ret < 0)
			if (ret < 0) {
				if (ret != -EAGAIN)
					pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
				else
					pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
				break;
			}

			p = page_to_phys(local_page_list[j]);
			k++;
+3 −1
Original line number Diff line number Diff line
obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o

mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
		srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
		cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
+6 −6
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
#include "mlx5_ib.h"
#include "srq.h"

static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
{
@@ -81,7 +82,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)

	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;

	if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
		return cqe;
	} else {
@@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
		struct mlx5_core_srq *msrq = NULL;

		if (qp->ibqp.xrcd) {
			msrq = mlx5_core_get_srq(dev->mdev,
						 be32_to_cpu(cqe->srqn));
			msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
			srq = to_mibsrq(msrq);
		} else {
			srq = to_msrq(qp->ibqp.srq);
@@ -197,7 +197,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
	}
	wc->byte_len = be32_to_cpu(cqe->byte_cnt);

	switch (cqe->op_own >> 4) {
	switch (get_cqe_opcode(cqe)) {
	case MLX5_CQE_RESP_WR_IMM:
		wc->opcode	= IB_WC_RECV_RDMA_WITH_IMM;
		wc->wc_flags	= IB_WC_WITH_IMM;
@@ -537,7 +537,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
	 */
	rmb();

	opcode = cqe64->op_own >> 4;
	opcode = get_cqe_opcode(cqe64);
	if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
		if (likely(cq->resize_buf)) {
			free_cq_buf(dev, &cq->buf);
@@ -1295,7 +1295,7 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
		return -EINVAL;
	}

	while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
	while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) {
		dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
					     (i + 1) & cq->resize_buf->nent);
		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
+4 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
 */

#include "ib_rep.h"
#include "srq.h"

static const struct mlx5_ib_profile rep_profile = {
	STAGE_CREATE(MLX5_IB_STAGE_INIT,
@@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = {
	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
		     mlx5_ib_stage_rep_roce_init,
		     mlx5_ib_stage_rep_roce_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_SRQ,
		     mlx5_init_srq_table,
		     mlx5_cleanup_srq_table),
	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
		     mlx5_ib_stage_dev_res_init,
		     mlx5_ib_stage_dev_res_cleanup),
+165 −66
Original line number Diff line number Diff line
@@ -60,6 +60,7 @@
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
#include "srq.h"
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
@@ -82,10 +83,13 @@ static char mlx5_version[] =

struct mlx5_ib_event_work {
	struct work_struct	work;
	struct mlx5_core_dev	*dev;
	void			*context;
	enum mlx5_dev_event	event;
	unsigned long		param;
	union {
		struct mlx5_ib_dev	      *dev;
		struct mlx5_ib_multiport_info *mpi;
	};
	bool			is_slave;
	unsigned int		event;
	void			*param;
};

enum {
@@ -2669,11 +2673,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
			 ntohs(ib_spec->gre.val.protocol));

		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
				    gre_key_h),
				    gre_key.nvgre.hi),
		       &ib_spec->gre.mask.key,
		       sizeof(ib_spec->gre.mask.key));
		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
				    gre_key_h),
				    gre_key.nvgre.hi),
		       &ib_spec->gre.val.key,
		       sizeof(ib_spec->gre.val.key));
		break;
@@ -4226,72 +4230,101 @@ static void delay_drop_handler(struct work_struct *work)
	mutex_unlock(&delay_drop->lock);
}

static void mlx5_ib_handle_event(struct work_struct *_work)
static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
				 struct ib_event *ibev)
{
	struct mlx5_ib_event_work *work =
		container_of(_work, struct mlx5_ib_event_work, work);
	struct mlx5_ib_dev *ibdev;
	struct ib_event ibev;
	bool fatal = false;
	u8 port = (u8)work->param;

	if (mlx5_core_is_mp_slave(work->dev)) {
		ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
		if (!ibdev)
			goto out;
	} else {
		ibdev = work->context;
	switch (eqe->sub_type) {
	case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
		schedule_work(&ibdev->delay_drop.delay_drop_work);
		break;
	default: /* do nothing */
		return;
	}
}

	switch (work->event) {
	case MLX5_DEV_EVENT_SYS_ERROR:
		ibev.event = IB_EVENT_DEVICE_FATAL;
		mlx5_ib_handle_internal_error(ibdev);
		fatal = true;
		break;
static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
			      struct ib_event *ibev)
{
	u8 port = (eqe->data.port.port >> 4) & 0xf;

	ibev->element.port_num = port;

	case MLX5_DEV_EVENT_PORT_UP:
	case MLX5_DEV_EVENT_PORT_DOWN:
	case MLX5_DEV_EVENT_PORT_INITIALIZED:
	switch (eqe->sub_type) {
	case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
	case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
	case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
		/* In RoCE, port up/down events are handled in
		 * mlx5_netdev_event().
		 */
		if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
					    IB_LINK_LAYER_ETHERNET)
			goto out;
			return -EINVAL;

		ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
		ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ?
				IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
		break;

	case MLX5_DEV_EVENT_LID_CHANGE:
		ibev.event = IB_EVENT_LID_CHANGE;
	case MLX5_PORT_CHANGE_SUBTYPE_LID:
		ibev->event = IB_EVENT_LID_CHANGE;
		break;

	case MLX5_DEV_EVENT_PKEY_CHANGE:
		ibev.event = IB_EVENT_PKEY_CHANGE;
	case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
		ibev->event = IB_EVENT_PKEY_CHANGE;
		schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
		break;

	case MLX5_DEV_EVENT_GUID_CHANGE:
		ibev.event = IB_EVENT_GID_CHANGE;
	case MLX5_PORT_CHANGE_SUBTYPE_GUID:
		ibev->event = IB_EVENT_GID_CHANGE;
		break;

	case MLX5_DEV_EVENT_CLIENT_REREG:
		ibev.event = IB_EVENT_CLIENT_REREGISTER;
	case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
		ibev->event = IB_EVENT_CLIENT_REREGISTER;
		break;
	case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
		schedule_work(&ibdev->delay_drop.delay_drop_work);
	default:
		return -EINVAL;
	}

	return 0;
}

static void mlx5_ib_handle_event(struct work_struct *_work)
{
	struct mlx5_ib_event_work *work =
		container_of(_work, struct mlx5_ib_event_work, work);
	struct mlx5_ib_dev *ibdev;
	struct ib_event ibev;
	bool fatal = false;

	if (work->is_slave) {
		ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
		if (!ibdev)
			goto out;
	} else {
		ibdev = work->dev;
	}

	switch (work->event) {
	case MLX5_DEV_EVENT_SYS_ERROR:
		ibev.event = IB_EVENT_DEVICE_FATAL;
		mlx5_ib_handle_internal_error(ibdev);
		ibev.element.port_num  = (u8)(unsigned long)work->param;
		fatal = true;
		break;
	case MLX5_EVENT_TYPE_PORT_CHANGE:
		if (handle_port_change(ibdev, work->param, &ibev))
			goto out;
		break;
	case MLX5_EVENT_TYPE_GENERAL_EVENT:
		handle_general_event(ibdev, work->param, &ibev);
		/* fall through */
	default:
		goto out;
	}

	ibev.device = &ibdev->ib_dev;
	ibev.element.port_num = port;

	if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
		mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
	if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
		mlx5_ib_warn(ibdev, "warning: event on port %d\n",  ibev.element.port_num);
		goto out;
	}

@@ -4304,22 +4337,43 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
	kfree(work);
}

static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
			  enum mlx5_dev_event event, unsigned long param)
static int mlx5_ib_event(struct notifier_block *nb,
			 unsigned long event, void *param)
{
	struct mlx5_ib_event_work *work;

	work = kmalloc(sizeof(*work), GFP_ATOMIC);
	if (!work)
		return;
		return NOTIFY_DONE;

	INIT_WORK(&work->work, mlx5_ib_handle_event);
	work->dev = dev;
	work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events);
	work->is_slave = false;
	work->param = param;
	work->context = context;
	work->event = event;

	queue_work(mlx5_ib_event_wq, &work->work);

	return NOTIFY_OK;
}

static int mlx5_ib_event_slave_port(struct notifier_block *nb,
				    unsigned long event, void *param)
{
	struct mlx5_ib_event_work *work;

	work = kmalloc(sizeof(*work), GFP_ATOMIC);
	if (!work)
		return NOTIFY_DONE;

	INIT_WORK(&work->work, mlx5_ib_handle_event);
	work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events);
	work->is_slave = true;
	work->param = param;
	work->event = event;
	queue_work(mlx5_ib_event_wq, &work->work);

	return NOTIFY_OK;
}

static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -5330,7 +5384,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
{
	struct mlx5_ib_dev *dev = to_mdev(ibdev);

	return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
	return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector);
}

/* The mlx5_ib_multiport_mutex should be held when calling this function */
@@ -5350,6 +5404,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
		spin_unlock(&port->mp.mpi_lock);
		return;
	}

	if (mpi->mdev_events.notifier_call)
		mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
	mpi->mdev_events.notifier_call = NULL;

	mpi->ibdev = NULL;

	spin_unlock(&port->mp.mpi_lock);
@@ -5405,6 +5464,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,

	ibdev->port[port_num].mp.mpi = mpi;
	mpi->ibdev = ibdev;
	mpi->mdev_events.notifier_call = NULL;
	spin_unlock(&ibdev->port[port_num].mp.mpi_lock);

	err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
@@ -5422,6 +5482,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
		goto unbind;
	}

	mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
	mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);

	err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
	if (err)
		goto unbind;
@@ -5694,8 +5757,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
	dev->ib_dev.local_dma_lkey	= 0 /* not supported for now */;
	dev->ib_dev.phys_port_cnt	= dev->num_ports;
	dev->ib_dev.num_comp_vectors    =
		dev->mdev->priv.eq_table.num_comp_vectors;
	dev->ib_dev.num_comp_vectors    = mlx5_comp_vectors_count(mdev);
	dev->ib_dev.dev.parent		= &mdev->pdev->dev;

	mutex_init(&dev->cap_mask_mutex);
@@ -6034,6 +6096,11 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
	return mlx5_ib_odp_init_one(dev);
}

void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
{
	mlx5_ib_odp_cleanup_one(dev);
}

int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
@@ -6152,6 +6219,34 @@ static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
	mlx5_ib_unregister_vport_reps(dev);
}

static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
{
	dev->mdev_events.notifier_call = mlx5_ib_event;
	mlx5_notifier_register(dev->mdev, &dev->mdev_events);
	return 0;
}

static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
{
	mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
}

static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
{
	int uid;

	uid = mlx5_ib_devx_create(dev);
	if (uid > 0)
		dev->devx_whitelist_uid = uid;

	return 0;
}
static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
{
	if (dev->devx_whitelist_uid)
		mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
}

void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
		      const struct mlx5_ib_profile *profile,
		      int stage)
@@ -6163,8 +6258,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
			profile->stage[stage].cleanup(dev);
	}

	if (dev->devx_whitelist_uid)
		mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
	ib_dealloc_device((struct ib_device *)dev);
}

@@ -6173,7 +6266,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
{
	int err;
	int i;
	int uid;

	for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
		if (profile->stage[i].init) {
@@ -6183,10 +6275,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
		}
	}

	uid = mlx5_ib_devx_create(dev);
	if (uid > 0)
		dev->devx_whitelist_uid = uid;

	dev->profile = profile;
	dev->ib_active = true;

@@ -6214,12 +6302,18 @@ static const struct mlx5_ib_profile pf_profile = {
	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
		     mlx5_ib_stage_roce_init,
		     mlx5_ib_stage_roce_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_SRQ,
		     mlx5_init_srq_table,
		     mlx5_cleanup_srq_table),
	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
		     mlx5_ib_stage_dev_res_init,
		     mlx5_ib_stage_dev_res_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
		     mlx5_ib_stage_dev_notifier_init,
		     mlx5_ib_stage_dev_notifier_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_ODP,
		     mlx5_ib_stage_odp_init,
		     NULL),
		     mlx5_ib_stage_odp_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
		     mlx5_ib_stage_counters_init,
		     mlx5_ib_stage_counters_cleanup),
@@ -6238,6 +6332,9 @@ static const struct mlx5_ib_profile pf_profile = {
	STAGE_CREATE(MLX5_IB_STAGE_SPECS,
		     mlx5_ib_stage_populate_specs,
		     NULL),
	STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
		     mlx5_ib_stage_devx_init,
		     mlx5_ib_stage_devx_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
		     mlx5_ib_stage_ib_reg_init,
		     mlx5_ib_stage_ib_reg_cleanup),
@@ -6265,9 +6362,15 @@ static const struct mlx5_ib_profile nic_rep_profile = {
	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
		     mlx5_ib_stage_rep_roce_init,
		     mlx5_ib_stage_rep_roce_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_SRQ,
		     mlx5_init_srq_table,
		     mlx5_cleanup_srq_table),
	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
		     mlx5_ib_stage_dev_res_init,
		     mlx5_ib_stage_dev_res_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
		     mlx5_ib_stage_dev_notifier_init,
		     mlx5_ib_stage_dev_notifier_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
		     mlx5_ib_stage_counters_init,
		     mlx5_ib_stage_counters_cleanup),
@@ -6388,10 +6491,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
static struct mlx5_interface mlx5_ib_interface = {
	.add            = mlx5_ib_add,
	.remove         = mlx5_ib_remove,
	.event          = mlx5_ib_event,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
	.pfault		= mlx5_ib_pfault,
#endif
	.protocol	= MLX5_INTERFACE_PROTOCOL_IB,
};

Loading