Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0a9a0188 authored by Jack Morgenstein's avatar Jack Morgenstein Committed by Roland Dreier
Browse files

mlx4: MAD_IFC paravirtualization



The MAD_IFC firmware command fulfills two functions.

First, it is used in the QP0/QP1 MAD-handling flow to obtain
information from the FW (for answering queries), and for setting
variables in the HCA (MAD SET packets).

For this, MAD_IFC should provide the FW (physical) view of the data.
This is the view that OpenSM needs.  We call this the "network view".

In the second case, MAD_IFC is used by various verbs to obtain data
regarding the local HCA (e.g., ib_query_device()).  We call this the
"host view".

This data needs to be paravirtualized.

MAD_IFC therefore needs a wrapper function, and also needs another
flag indicating whether it should provide the network view (when it is
called by ib_process_mad in special-qp packet handling), or the host
view (when it is called while implementing a verb).

There are currently 2 flag parameters in mlx4_MAD_IFC already:
ignore_bkey and ignore_mkey.  These two parameters are replaced by a
single "mad_ifc_flags" parameter, with different bits set for each
flag.  A third flag is added: "network-view/host-view".

Signed-off-by: default avatarJack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
parent 37bfc7c1
Loading
Loading
Loading
Loading
+12 −8
Original line number Diff line number Diff line
@@ -75,7 +75,7 @@ struct mlx4_rcv_tunnel_mad {
	struct ib_mad mad;
} __packed;

int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
		 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
		 void *in_mad, void *response_mad)
{
@@ -102,10 +102,13 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
	 * Key check traps can't be generated unless we have in_wc to
	 * tell us where to send the trap.
	 */
	if (ignore_mkey || !in_wc)
	if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
		op_modifier |= 0x1;
	if (ignore_bkey || !in_wc)
	if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
		op_modifier |= 0x2;
	if (mlx4_is_mfunc(dev->dev) &&
	    (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
		op_modifier |= 0x8;

	if (in_wc) {
		struct {
@@ -138,10 +141,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
		in_modifier |= in_wc->slid << 16;
	}

	err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
			   in_modifier, op_modifier,
	err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
			   mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
			   MLX4_CMD_NATIVE);
			   (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);

	if (!err)
		memcpy(response_mad, outmailbox->buf, 256);
@@ -614,8 +617,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
		prev_lid = pattr.lid;

	err = mlx4_MAD_IFC(to_mdev(ibdev),
			   mad_flags & IB_MAD_IGNORE_MKEY,
			   mad_flags & IB_MAD_IGNORE_BKEY,
			   (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
			   (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
			   MLX4_MAD_IFC_NET_VIEW,
			   port_num, in_wc, in_grh, in_mad, out_mad);
	if (err)
		return IB_MAD_RESULT_FAILURE;
+47 −17
Original line number Diff line number Diff line
@@ -98,7 +98,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
	init_query_mad(in_mad);
	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;

	err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
	err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
			   1, NULL, NULL, in_mad, out_mad);
	if (err)
		goto out;

@@ -182,11 +183,12 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
}

static int ib_link_query_port(struct ib_device *ibdev, u8 port,
			      struct ib_port_attr *props)
			      struct ib_port_attr *props, int netw_view)
{
	struct ib_smp *in_mad  = NULL;
	struct ib_smp *out_mad = NULL;
	int ext_active_speed;
	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
	int err = -ENOMEM;

	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -198,7 +200,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
	in_mad->attr_mod = cpu_to_be32(port);

	err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL,
	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;

	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
				in_mad, out_mad);
	if (err)
		goto out;
@@ -211,6 +216,9 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
	props->state		= out_mad->data[32] & 0xf;
	props->phys_state	= out_mad->data[33] >> 4;
	props->port_cap_flags	= be32_to_cpup((__be32 *) (out_mad->data + 20));
	if (netw_view)
		props->gid_tbl_len = out_mad->data[50];
	else
		props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
	props->max_msg_sz	= to_mdev(ibdev)->dev->caps.max_msg_sz;
	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len[port];
@@ -244,7 +252,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
		in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
		in_mad->attr_mod = cpu_to_be32(port);

		err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port,
		err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
				   NULL, NULL, in_mad, out_mad);
		if (err)
			goto out;
@@ -270,7 +278,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
}

static int eth_link_query_port(struct ib_device *ibdev, u8 port,
			       struct ib_port_attr *props)
			       struct ib_port_attr *props, int netw_view)
{

	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -320,20 +328,27 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
	return err;
}

static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
			      struct ib_port_attr *props)
int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
			 struct ib_port_attr *props, int netw_view)
{
	int err;

	memset(props, 0, sizeof *props);

	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
		ib_link_query_port(ibdev, port, props) :
				eth_link_query_port(ibdev, port, props);
		ib_link_query_port(ibdev, port, props, netw_view) :
				eth_link_query_port(ibdev, port, props, netw_view);

	return err;
}

static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
			      struct ib_port_attr *props)
{
	/* returns host view */
	return __mlx4_ib_query_port(ibdev, port, props, 0);
}

static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
			       union ib_gid *gid)
{
@@ -350,7 +365,8 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
	in_mad->attr_mod = cpu_to_be32(port);

	err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
	err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, port,
			   NULL, NULL, in_mad, out_mad);
	if (err)
		goto out;

@@ -360,7 +376,8 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
	in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
	in_mad->attr_mod = cpu_to_be32(index / 8);

	err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
	err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, port,
			   NULL, NULL, in_mad, out_mad);
	if (err)
		goto out;

@@ -391,11 +408,12 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
		return iboe_query_gid(ibdev, port, index, gid);
}

static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
			      u16 *pkey)
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
			 u16 *pkey, int netw_view)
{
	struct ib_smp *in_mad  = NULL;
	struct ib_smp *out_mad = NULL;
	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
	int err = -ENOMEM;

	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -407,7 +425,11 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
	in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
	in_mad->attr_mod = cpu_to_be32(index / 32);

	err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;

	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
			   in_mad, out_mad);
	if (err)
		goto out;

@@ -419,6 +441,11 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
	return err;
}

static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
{
	return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
}

static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
				 struct ib_device_modify *props)
{
@@ -849,6 +876,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)
{
	struct ib_smp *in_mad  = NULL;
	struct ib_smp *out_mad = NULL;
	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
	int err = -ENOMEM;

	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -858,8 +886,10 @@ static int init_node_data(struct mlx4_ib_dev *dev)

	init_query_mad(in_mad);
	in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
	if (mlx4_is_master(dev->dev))
		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;

	err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
	if (err)
		goto out;

@@ -867,7 +897,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)

	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;

	err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
	if (err)
		goto out;

+13 −1
Original line number Diff line number Diff line
@@ -176,6 +176,14 @@ enum mlx4_ib_qp_type {
	MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
	MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)

enum mlx4_ib_mad_ifc_flags {
	MLX4_MAD_IFC_IGNORE_MKEY	= 1,
	MLX4_MAD_IFC_IGNORE_BKEY	= 2,
	MLX4_MAD_IFC_IGNORE_KEYS	= (MLX4_MAD_IFC_IGNORE_MKEY |
					   MLX4_MAD_IFC_IGNORE_BKEY),
	MLX4_MAD_IFC_NET_VIEW		= 4,
};

enum {
	MLX4_NUM_TUNNEL_BUFS		= 256,
};
@@ -512,7 +520,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
		      struct ib_recv_wr **bad_wr);

int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
		 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
		 void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags,	u8 port_num,
@@ -527,6 +535,10 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
			 u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
			 struct ib_port_attr *props, int netw_view);
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
			 u16 *pkey, int netw_view);

int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
			u8 *mac, int *is_mcast, u8 port);
+162 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@

#include <linux/mlx4/cmd.h>
#include <linux/semaphore.h>
#include <rdma/ib_smi.h>

#include <asm/io.h>

@@ -627,6 +628,149 @@ static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr,
			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}

static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey,
			       struct mlx4_cmd_mailbox *inbox,
			       struct mlx4_cmd_mailbox *outbox)
{
	struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf);
	struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf);
	int err;
	int i;

	if (index & 0x1f)
		return -EINVAL;

	in_mad->attr_mod = cpu_to_be32(index / 32);

	err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
			   MLX4_CMD_NATIVE);
	if (err)
		return err;

	for (i = 0; i < 32; ++i)
		pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]);

	return err;
}

static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table,
			       struct mlx4_cmd_mailbox *inbox,
			       struct mlx4_cmd_mailbox *outbox)
{
	int i;
	int err;

	for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) {
		err = query_pkey_block(dev, port, i, table + i, inbox, outbox);
		if (err)
			return err;
	}

	return 0;
}
#define PORT_CAPABILITY_LOCATION_IN_SMP 20
#define PORT_STATE_OFFSET 32

static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf)
{
	/* will be modified when add alias_guid feature */
	return IB_PORT_DOWN;
}

static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
				struct mlx4_vhcr *vhcr,
				struct mlx4_cmd_mailbox *inbox,
				struct mlx4_cmd_mailbox *outbox,
				struct mlx4_cmd_info *cmd)
{
	struct ib_smp *smp = inbox->buf;
	u32 index;
	u8 port;
	u16 *table;
	int err;
	int vidx, pidx;
	struct mlx4_priv *priv = mlx4_priv(dev);
	struct ib_smp *outsmp = outbox->buf;
	__be16 *outtab = (__be16 *)(outsmp->data);
	__be32 slave_cap_mask;
	port = vhcr->in_modifier;

	if (smp->base_version == 1 &&
	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
	    smp->class_version == 1) {
		if (smp->method	== IB_MGMT_METHOD_GET) {
			if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
				index = be32_to_cpu(smp->attr_mod);
				if (port < 1 || port > dev->caps.num_ports)
					return -EINVAL;
				table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL);
				if (!table)
					return -ENOMEM;
				/* need to get the full pkey table because the paravirtualized
				 * pkeys may be scattered among several pkey blocks.
				 */
				err = get_full_pkey_table(dev, port, table, inbox, outbox);
				if (!err) {
					for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) {
						pidx = priv->virt2phys_pkey[slave][port - 1][vidx];
						outtab[vidx % 32] = cpu_to_be16(table[pidx]);
					}
				}
				kfree(table);
				return err;
			}
			if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) {
				/*get the slave specific caps:*/
				/*do the command */
				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
					    vhcr->in_modifier, vhcr->op_modifier,
					    vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
				/* modify the response for slaves */
				if (!err && slave != mlx4_master_func_num(dev)) {
					u8 *state = outsmp->data + PORT_STATE_OFFSET;

					*state = (*state & 0xf0) | vf_port_state(dev, port, slave);
					slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
					memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4);
				}
				return err;
			}
			if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
				/* compute slave's gid block */
				smp->attr_mod = cpu_to_be32(slave / 8);
				/* execute cmd */
				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
					     vhcr->in_modifier, vhcr->op_modifier,
					     vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
				if (!err) {
					/* if needed, move slave gid to index 0 */
					if (slave % 8)
						memcpy(outsmp->data,
						       outsmp->data + (slave % 8) * 8, 8);
					/* delete all other gids */
					memset(outsmp->data + 8, 0, 56);
				}
				return err;
			}
		}
	}
	if (slave != mlx4_master_func_num(dev) &&
	    ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
	     (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
	      smp->method == IB_MGMT_METHOD_SET))) {
		mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
			 "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
			 slave, smp->method, smp->mgmt_class,
			 be16_to_cpu(smp->attr_id));
		return -EPERM;
	}
	/*default:*/
	return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
				    vhcr->in_modifier, vhcr->op_modifier,
				    vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
}

int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
		     struct mlx4_vhcr *vhcr,
		     struct mlx4_cmd_mailbox *inbox,
@@ -1060,6 +1204,24 @@ static struct mlx4_cmd_info cmd_info[] = {
		.verify = NULL,
		.wrapper = mlx4_GEN_QP_wrapper
	},
	{
		.opcode = MLX4_CMD_CONF_SPECIAL_QP,
		.has_inbox = false,
		.has_outbox = false,
		.out_is_imm = false,
		.encode_slave_id = false,
		.verify = NULL, /* XXX verify: only demux can do this */
		.wrapper = NULL
	},
	{
		.opcode = MLX4_CMD_MAD_IFC,
		.has_inbox = true,
		.has_outbox = true,
		.out_is_imm = false,
		.encode_slave_id = false,
		.verify = NULL,
		.wrapper = mlx4_MAD_IFC_wrapper
	},
	{
		.opcode = MLX4_CMD_QUERY_IF_STAT,
		.has_inbox = false,