Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 24da0016 authored by Ariel Levkovich's avatar Ariel Levkovich Committed by Jason Gunthorpe
Browse files

IB/mlx5: Device memory support in mlx5_ib



This patch adds the mlx5_ib driver implementation for the device
memory allocation API.
It implements the ib_device callbacks for allocation and deallocation
operations as well as a new mmap command support which allows mapping
an allocated device memory to a VMA.

The change also adds reporting of device memory maximum size and
alignment parameters reported in device capabilities.

The allocation/deallocation operations are using new firmware
commands to allocate MEMIC memory on the device.

Signed-off-by: default avatarAriel Levkovich <lariel@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent e72bd817
Loading
Loading
Loading
Loading
+106 −0
Original line number Diff line number Diff line
@@ -66,3 +66,109 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,

	return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
}

int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
			  u64 length, u32 alignment)
{
	struct mlx5_core_dev *dev = memic->dev;
	u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
					>> PAGE_SHIFT;
	u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
	u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
	u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
	u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
	u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
	u32 mlx5_alignment;
	u64 page_idx = 0;
	int ret = 0;

	if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
		return -EINVAL;

	/* mlx5 device sets alignment as 64*2^driver_value
	 * so normalizing is needed.
	 */
	mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
			 alignment - MLX5_MEMIC_BASE_ALIGN;
	if (mlx5_alignment > max_alignment)
		return -EINVAL;

	MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
	MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
	MLX5_SET(alloc_memic_in, in, memic_size, length);
	MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
		 mlx5_alignment);

	do {
		spin_lock(&memic->memic_lock);
		page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages,
						      num_memic_hw_pages,
						      page_idx,
						      num_pages, 0);

		if (page_idx + num_pages <= num_memic_hw_pages)
			bitmap_set(memic->memic_alloc_pages,
				   page_idx, num_pages);
		else
			ret = -ENOMEM;

		spin_unlock(&memic->memic_lock);

		if (ret)
			return ret;

		MLX5_SET64(alloc_memic_in, in, range_start_addr,
			   hw_start_addr + (page_idx * PAGE_SIZE));

		ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
		if (ret) {
			spin_lock(&memic->memic_lock);
			bitmap_clear(memic->memic_alloc_pages,
				     page_idx, num_pages);
			spin_unlock(&memic->memic_lock);

			if (ret == -EAGAIN) {
				page_idx++;
				continue;
			}

			return ret;
		}

		*addr = pci_resource_start(dev->pdev, 0) +
			MLX5_GET64(alloc_memic_out, out, memic_start_addr);

		return ret;
	} while (page_idx < num_memic_hw_pages);

	return ret;
}

int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length)
{
	struct mlx5_core_dev *dev = memic->dev;
	u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
	u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
	u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0};
	u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0};
	u64 start_page_idx;
	int err;

	addr -= pci_resource_start(dev->pdev, 0);
	start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;

	MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
	MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
	MLX5_SET(dealloc_memic_in, in, memic_size, length);

	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));

	if (!err) {
		spin_lock(&memic->memic_lock);
		bitmap_clear(memic->memic_alloc_pages,
			     start_page_idx, num_pages);
		spin_unlock(&memic->memic_lock);
	}

	return err;
}
+4 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#ifndef MLX5_IB_CMD_H
#define MLX5_IB_CMD_H

#include "mlx5_ib.h"
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>

@@ -41,4 +42,7 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
			       void *out, int out_size);
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
				void *in, int in_size);
int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
			 u64 length, u32 alignment);
int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);
#endif /* MLX5_IB_CMD_H */
+142 −1
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#endif
@@ -891,6 +892,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
		props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
	}

	if (MLX5_CAP_DEV_MEM(mdev, memic)) {
		props->max_dm_size =
			MLX5_CAP_DEV_MEM(mdev, max_memic_size);
	}

	if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;

@@ -2014,6 +2020,8 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
		return "best effort WC";
	case MLX5_IB_MMAP_NC_PAGE:
		return "NC";
	case MLX5_IB_MMAP_DEVICE_MEM:
		return "Device Memory";
	default:
		return NULL;
	}
@@ -2172,6 +2180,34 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
	return err;
}

static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
	struct mlx5_ib_ucontext *mctx = to_mucontext(context);
	struct mlx5_ib_dev *dev = to_mdev(context->device);
	u16 page_idx = get_extended_index(vma->vm_pgoff);
	size_t map_size = vma->vm_end - vma->vm_start;
	u32 npages = map_size >> PAGE_SHIFT;
	phys_addr_t pfn;
	pgprot_t prot;

	if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
	    page_idx + npages)
		return -EINVAL;

	pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
	      MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
	      PAGE_SHIFT) +
	      page_idx;
	prot = pgprot_writecombine(vma->vm_page_prot);
	vma->vm_page_prot = prot;

	if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
			       vma->vm_page_prot))
		return -EAGAIN;

	return mlx5_ib_set_vma_data(vma, mctx);
}

static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -2216,6 +2252,9 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
	case MLX5_IB_MMAP_CLOCK_INFO:
		return mlx5_ib_mmap_clock_info_page(dev, vma, context);

	case MLX5_IB_MMAP_DEVICE_MEM:
		return dm_mmap(ibcontext, vma);

	default:
		return -EINVAL;
	}
@@ -2223,6 +2262,87 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
	return 0;
}

struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
			       struct ib_ucontext *context,
			       struct ib_dm_alloc_attr *attr,
			       struct uverbs_attr_bundle *attrs)
{
	u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
	struct mlx5_memic *memic = &to_mdev(ibdev)->memic;
	phys_addr_t memic_addr;
	struct mlx5_ib_dm *dm;
	u64 start_offset;
	u32 page_idx;
	int err;

	dm = kzalloc(sizeof(*dm), GFP_KERNEL);
	if (!dm)
		return ERR_PTR(-ENOMEM);

	mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n",
		    attr->length, act_size, attr->alignment);

	err = mlx5_cmd_alloc_memic(memic, &memic_addr,
				   act_size, attr->alignment);
	if (err)
		goto err_free;

	start_offset = memic_addr & ~PAGE_MASK;
	page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
		    MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
		    PAGE_SHIFT;

	err = uverbs_copy_to(attrs,
			     MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
			     &start_offset, sizeof(start_offset));
	if (err)
		goto err_dealloc;

	err = uverbs_copy_to(attrs,
			     MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
			     &page_idx, sizeof(page_idx));
	if (err)
		goto err_dealloc;

	bitmap_set(to_mucontext(context)->dm_pages, page_idx,
		   DIV_ROUND_UP(act_size, PAGE_SIZE));

	dm->dev_addr = memic_addr;

	return &dm->ibdm;

err_dealloc:
	mlx5_cmd_dealloc_memic(memic, memic_addr,
			       act_size);
err_free:
	kfree(dm);
	return ERR_PTR(err);
}

int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
{
	struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic;
	struct mlx5_ib_dm *dm = to_mdm(ibdm);
	u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE);
	u32 page_idx;
	int ret;

	ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size);
	if (ret)
		return ret;

	page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
		    MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
		    PAGE_SHIFT;
	bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
		     page_idx,
		     DIV_ROUND_UP(act_size, PAGE_SIZE));

	kfree(dm);

	return 0;
}

static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
				      struct ib_ucontext *context,
				      struct ib_udata *udata)
@@ -4834,13 +4954,22 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
	mlx5_nic_vport_disable_roce(dev->mdev);
}

ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM,
			     UVERBS_METHOD_DM_ALLOC,
			     &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
						  UVERBS_ATTR_TYPE(u64),
						  UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
			     &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
						  UVERBS_ATTR_TYPE(u16),
						  UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));

ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION,
			     UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
			     &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
						 UVERBS_ATTR_TYPE(u64),
						 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));

#define NUM_TREES	1
#define NUM_TREES	2
static int populate_specs_root(struct mlx5_ib_dev *dev)
{
	const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = {
@@ -4851,6 +4980,10 @@ static int populate_specs_root(struct mlx5_ib_dev *dev)
	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
		default_root[num_trees++] = &mlx5_ib_flow_action;

	if (MLX5_CAP_DEV_MEM(dev->mdev, memic) &&
	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
		default_root[num_trees++] = &mlx5_ib_dm;

	dev->ib_dev.specs_root =
		uverbs_alloc_spec_tree(num_trees, default_root);

@@ -4925,6 +5058,9 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
	INIT_LIST_HEAD(&dev->qp_list);
	spin_lock_init(&dev->reset_flow_resource_lock);

	spin_lock_init(&dev->memic.memic_lock);
	dev->memic.dev = mdev;

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
	err = init_srcu_struct(&dev->mr_srcu);
	if (err)
@@ -5087,6 +5223,11 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
	}

	if (MLX5_CAP_DEV_MEM(mdev, memic)) {
		dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm;
		dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm;
	}

	dev->ib_dev.create_flow	= mlx5_ib_create_flow;
	dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
	dev->ib_dev.uverbs_ex_cmd_mask |=
+34 −1
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@
#include <linux/mlx5/transobj.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/mlx5-abi.h>
#include <rdma/uverbs_ioctl.h>

#define mlx5_ib_dbg(dev, format, arg...)				\
pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
@@ -108,6 +109,16 @@ enum {
	MLX5_IB_INVALID_BFREG		= BIT(31),
};

enum {
	MLX5_MAX_MEMIC_PAGES = 0x100,
	MLX5_MEMIC_ALLOC_SIZE_MASK = 0x3f,
};

enum {
	MLX5_MEMIC_BASE_ALIGN	= 6,
	MLX5_MEMIC_BASE_SIZE	= 1 << MLX5_MEMIC_BASE_ALIGN,
};

struct mlx5_ib_vma_private_data {
	struct list_head list;
	struct vm_area_struct *vma;
@@ -131,6 +142,7 @@ struct mlx5_ib_ucontext {
	struct mutex		vma_private_list_mutex;

	u64			lib_caps;
	DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
};

static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -521,6 +533,11 @@ enum mlx5_ib_mtt_access_flags {
	MLX5_IB_MTT_WRITE = (1 << 1),
};

struct mlx5_ib_dm {
	struct ib_dm		ibdm;
	phys_addr_t		dev_addr;
};

#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)

struct mlx5_ib_mr {
@@ -784,6 +801,12 @@ struct mlx5_ib_flow_action {
	};
};

struct mlx5_memic {
	struct mlx5_core_dev *dev;
	spinlock_t		memic_lock;
	DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
};

struct mlx5_ib_dev {
	struct ib_device		ib_dev;
	struct mlx5_core_dev		*mdev;
@@ -830,6 +853,7 @@ struct mlx5_ib_dev {
	u8			umr_fence;
	struct list_head	ib_dev_list;
	u64			sys_image_guid;
	struct mlx5_memic	memic;
};

static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -897,6 +921,11 @@ static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
	return container_of(msrq, struct mlx5_ib_srq, msrq);
}

static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm)
{
	return container_of(ibdm, struct mlx5_ib_dm, ibdm);
}

static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
{
	return container_of(ibmr, struct mlx5_ib_mr, ibmr);
@@ -1041,7 +1070,11 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
						      struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev);

struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
			       struct ib_ucontext *context,
			       struct ib_dm_alloc_attr *attr,
			       struct uverbs_attr_bundle *attrs);
int mlx5_ib_dealloc_dm(struct ib_dm *ibdm);

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
+55 −0
Original line number Diff line number Diff line
@@ -92,6 +92,8 @@ enum {
	MLX5_CMD_OP_DESTROY_MKEY                  = 0x202,
	MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS        = 0x203,
	MLX5_CMD_OP_PAGE_FAULT_RESUME             = 0x204,
	MLX5_CMD_OP_ALLOC_MEMIC                   = 0x205,
	MLX5_CMD_OP_DEALLOC_MEMIC                 = 0x206,
	MLX5_CMD_OP_CREATE_EQ                     = 0x301,
	MLX5_CMD_OP_DESTROY_EQ                    = 0x302,
	MLX5_CMD_OP_QUERY_EQ                      = 0x303,
@@ -8886,4 +8888,57 @@ struct mlx5_ifc_destroy_vport_lag_in_bits {
	u8         reserved_at_40[0x40];
};

struct mlx5_ifc_alloc_memic_in_bits {
	u8         opcode[0x10];
	u8         reserved_at_10[0x10];

	u8         reserved_at_20[0x10];
	u8         op_mod[0x10];

	u8         reserved_at_30[0x20];

	u8	   reserved_at_40[0x18];
	u8	   log_memic_addr_alignment[0x8];

	u8         range_start_addr[0x40];

	u8         range_size[0x20];

	u8         memic_size[0x20];
};

struct mlx5_ifc_alloc_memic_out_bits {
	u8         status[0x8];
	u8         reserved_at_8[0x18];

	u8         syndrome[0x20];

	u8         memic_start_addr[0x40];
};

struct mlx5_ifc_dealloc_memic_in_bits {
	u8         opcode[0x10];
	u8         reserved_at_10[0x10];

	u8         reserved_at_20[0x10];
	u8         op_mod[0x10];

	u8         reserved_at_40[0x40];

	u8         memic_start_addr[0x40];

	u8         memic_size[0x20];

	u8         reserved_at_e0[0x20];
};

struct mlx5_ifc_dealloc_memic_out_bits {
	u8         status[0x8];
	u8         reserved_at_8[0x18];

	u8         syndrome[0x20];

	u8         reserved_at_40[0x40];
};

#endif /* MLX5_IFC_H */
Loading