Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 39e7d095 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlx4-next'



Or Gerlitz says:

====================
mlx4: Add SRIOV support for RoCE

This series adds SRIOV support for RoCE (RDMA over Ethernet) to the mlx4 driver.

The patches are against net-next, as of commit 2d8d40af "pkt_sched: fq:
do not hold qdisc lock while allocating memory"

changes from V1:
 - addressed feedback from Dave on patch #3 and changed get_real_sgid_index()
   to be called fill_in_real_sgid_index() and be a void  function.
 - removed some checkpatch warnings on long lines

changes from V0:
  - always check the return code of mlx4_get_roce_gid_from_slave().
    The call we fixed is introduced in patch #1 and later removed by
    patch #3 that allows guests to have multiple GIDS. The 1..3
    separation was done for proper division of patches to logical changes.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 36f6fdb7 aa9a2d51
Loading
Loading
Loading
Loading
+63 −17
Original line number Diff line number Diff line
@@ -61,6 +61,11 @@ struct cm_generic_msg {
	__be32 remote_comm_id;
};

struct cm_sidr_generic_msg {
	struct ib_mad_hdr hdr;
	__be32 request_id;
};

struct cm_req_msg {
	unsigned char unused[0x60];
	union ib_gid primary_path_sgid;
@@ -69,29 +74,63 @@ struct cm_req_msg {

static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
	if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
		struct cm_sidr_generic_msg *msg =
			(struct cm_sidr_generic_msg *)mad;
		msg->request_id = cpu_to_be32(cm_id);
	} else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
		pr_err("trying to set local_comm_id in SIDR_REP\n");
		return;
	} else {
		struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
		msg->local_comm_id = cpu_to_be32(cm_id);
	}
}

static u32 get_local_comm_id(struct ib_mad *mad)
{
	if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
		struct cm_sidr_generic_msg *msg =
			(struct cm_sidr_generic_msg *)mad;
		return be32_to_cpu(msg->request_id);
	} else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
		pr_err("trying to set local_comm_id in SIDR_REP\n");
		return -1;
	} else {
		struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;

		return be32_to_cpu(msg->local_comm_id);
	}
}

static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
{
	if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
		struct cm_sidr_generic_msg *msg =
			(struct cm_sidr_generic_msg *)mad;
		msg->request_id = cpu_to_be32(cm_id);
	} else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
		pr_err("trying to set remote_comm_id in SIDR_REQ\n");
		return;
	} else {
		struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
		msg->remote_comm_id = cpu_to_be32(cm_id);
	}
}

static u32 get_remote_comm_id(struct ib_mad *mad)
{
	if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
		struct cm_sidr_generic_msg *msg =
			(struct cm_sidr_generic_msg *)mad;
		return be32_to_cpu(msg->request_id);
	} else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
		pr_err("trying to set remote_comm_id in SIDR_REQ\n");
		return -1;
	} else {
		struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;

		return be32_to_cpu(msg->remote_comm_id);
	}
}

static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
{
@@ -282,19 +321,21 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
	u32 sl_cm_id;
	int pv_cm_id = -1;

	sl_cm_id = get_local_comm_id(mad);

	if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
			mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
			mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
			mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
		sl_cm_id = get_local_comm_id(mad);
		id = id_map_alloc(ibdev, slave_id, sl_cm_id);
		if (IS_ERR(id)) {
			mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
				__func__, slave_id, sl_cm_id);
			return PTR_ERR(id);
		}
	} else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
	} else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
		   mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
		return 0;
	} else {
		sl_cm_id = get_local_comm_id(mad);
		id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
	}

@@ -320,9 +361,13 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
	u32 pv_cm_id;
	struct id_map_entry *id;

	if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
	if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
	    mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
		union ib_gid gid;

		if (!slave)
			return 0;

		gid = gid_from_req_msg(ibdev, mad);
		*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
		if (*slave < 0) {
@@ -341,6 +386,7 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
		return -ENOENT;
	}

	if (slave)
		*slave = id->slave_id;
	set_remote_comm_id(mad, id->sl_cm_id);

+28 −14
Original line number Diff line number Diff line
@@ -564,7 +564,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
}

static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
			   unsigned tail, struct mlx4_cqe *cqe)
			   unsigned tail, struct mlx4_cqe *cqe, int is_eth)
{
	struct mlx4_ib_proxy_sqp_hdr *hdr;

@@ -574,12 +574,20 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
				   DMA_FROM_DEVICE);
	hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
	wc->pkey_index	= be16_to_cpu(hdr->tun.pkey_index);
	wc->slid	= be16_to_cpu(hdr->tun.slid_mac_47_32);
	wc->sl		= (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
	wc->src_qp	= be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
	wc->wc_flags   |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
	wc->dlid_path_bits = 0;

	if (is_eth) {
		wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
		memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
		memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
		wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
	} else {
		wc->slid        = be16_to_cpu(hdr->tun.slid_mac_47_32);
		wc->sl          = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
	}

	return 0;
}

@@ -594,6 +602,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
	struct mlx4_srq *msrq = NULL;
	int is_send;
	int is_error;
	int is_eth;
	u32 g_mlpath_rqpn;
	u16 wqe_ctr;
	unsigned tail = 0;
@@ -778,11 +787,15 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
			break;
		}

		is_eth = (rdma_port_get_link_layer(wc->qp->device,
						  (*cur_qp)->port) ==
			  IB_LINK_LAYER_ETHERNET);
		if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
			if ((*cur_qp)->mlx4_ib_qp_type &
			    (MLX4_IB_QPT_PROXY_SMI_OWNER |
			     MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
				return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
				return use_tunnel_data(*cur_qp, cq, wc, tail,
						       cqe, is_eth);
		}

		wc->slid	   = be16_to_cpu(cqe->rlid);
@@ -793,20 +806,21 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
		wc->pkey_index     = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
		wc->wc_flags	  |= mlx4_ib_ipoib_csum_ok(cqe->status,
					cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
		if (rdma_port_get_link_layer(wc->qp->device,
				(*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
		if (is_eth) {
			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 13;
		else
			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 12;
		if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) {
			if (be32_to_cpu(cqe->vlan_my_qpn) &
					MLX4_CQE_VLAN_PRESENT_MASK) {
				wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
					MLX4_CQE_VID_MASK;
			} else {
				wc->vlan_id = 0xffff;
			}
		wc->wc_flags |= IB_WC_WITH_VLAN;
			memcpy(wc->smac, cqe->smac, ETH_ALEN);
		wc->wc_flags |= IB_WC_WITH_SMAC;
			wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
		} else {
			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 12;
			wc->vlan_id = 0xffff;
		}
	}

	return 0;
+110 −11
Original line number Diff line number Diff line
@@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
	int ret = 0;
	u16 tun_pkey_ix;
	u16 cached_pkey;
	u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;

	if (dest_qpt > IB_QPT_GSI)
		return -EINVAL;
@@ -509,6 +510,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
	 * The driver will set the force loopback bit in post_send */
	memset(&attr, 0, sizeof attr);
	attr.port_num = port;
	if (is_eth) {
		memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
		attr.ah_flags = IB_AH_GRH;
	}
	ah = ib_create_ah(tun_ctx->pd, &attr);
	if (IS_ERR(ah))
		return -ENOMEM;
@@ -540,11 +545,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,

	/* adjust tunnel data */
	tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
	tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
	tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
	tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
	tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;

	if (is_eth) {
		u16 vlan = 0;
		if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
						NULL)) {
			/* VST mode */
			if (vlan != wc->vlan_id)
				/* Packet vlan is not the VST-assigned vlan.
				 * Drop the packet.
				 */
				goto out;
			 else
				/* Remove the vlan tag before forwarding
				 * the packet to the VF.
				 */
				vlan = 0xffff;
		} else {
			vlan = wc->vlan_id;
		}

		tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
		memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
		memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
	} else {
		tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
		tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
	}

	ib_dma_sync_single_for_device(&dev->ib_dev,
				      tun_qp->tx_ring[tun_tx_ix].buf.map,
				      sizeof (struct mlx4_rcv_tunnel_mad),
@@ -580,6 +610,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
	int err;
	int slave;
	u8 *slave_id;
	int is_eth = 0;

	if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
		is_eth = 0;
	else
		is_eth = 1;

	if (is_eth) {
		if (!(wc->wc_flags & IB_WC_GRH)) {
			mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
			return -EINVAL;
		}
		if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
			mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
			return -EINVAL;
		}
		if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
			mlx4_ib_warn(ibdev, "failed matching grh\n");
			return -ENOENT;
		}
		if (slave >= dev->dev->caps.sqp_demux) {
			mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
				     slave, dev->dev->caps.sqp_demux);
			return -ENOENT;
		}

		if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
			return 0;

		err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
		if (err)
			pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
				 slave, err);
		return 0;
	}

	/* Initially assume that this mad is for us */
	slave = mlx4_master_func_num(dev->dev);
@@ -1076,8 +1141,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)


int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
			 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
			 u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
			 enum ib_qp_type dest_qpt, u16 pkey_index,
			 u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
			 u8 *s_mac, struct ib_mad *mad)
{
	struct ib_sge list;
	struct ib_send_wr wr, *bad_wr;
@@ -1166,6 +1232,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
	wr.num_sge = 1;
	wr.opcode = IB_WR_SEND;
	wr.send_flags = IB_SEND_SIGNALED;
	if (s_mac)
		memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);


	ret = ib_post_send(send_qp, &wr, &bad_wr);
out:
@@ -1174,6 +1243,34 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
	return ret;
}

static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
{
	int gids;
	int vfs;

	if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
		return slave;

	gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
	vfs = dev->dev->num_vfs;

	if (slave == 0)
		return 0;
	if (slave <= gids % vfs)
		return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);

	return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
}

static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
				    struct ib_ah_attr *ah_attr)
{
	if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
		ah_attr->grh.sgid_index = slave;
	else
		ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
}

static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
{
	struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
@@ -1260,12 +1357,14 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
	memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
	ah.ibah.device = ctx->ib_dev;
	mlx4_ib_query_ah(&ah.ibah, &ah_attr);
	if ((ah_attr.ah_flags & IB_AH_GRH) &&
	    (ah_attr.grh.sgid_index != slave)) {
		mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
			     slave, ah_attr.grh.sgid_index);
		return;
	}
	if (ah_attr.ah_flags & IB_AH_GRH)
		fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);

	memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
	ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
	/* if slave have default vlan use it */
	mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
				    &ah_attr.vlan_id, &ah_attr.sl);

	mlx4_ib_send_to_wire(dev, slave, ctx->port,
			     is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1273,7 +1372,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
			     be16_to_cpu(tunnel->hdr.pkey_index),
			     be32_to_cpu(tunnel->hdr.remote_qpn),
			     be32_to_cpu(tunnel->hdr.qkey),
			     &ah_attr, &tunnel->mad);
			     &ah_attr, wc->smac, &tunnel->mad);
}

static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
+0 −8
Original line number Diff line number Diff line
@@ -1888,14 +1888,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)

	pr_info_once("%s", mlx4_ib_version);

	mlx4_foreach_non_ib_transport_port(i, dev)
		num_ports++;

	if (mlx4_is_mfunc(dev) && num_ports) {
		dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
		return NULL;
	}

	num_ports = 0;
	mlx4_foreach_ib_transport_port(i, dev)
		num_ports++;
+3 −2
Original line number Diff line number Diff line
@@ -215,8 +215,9 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
	}
	mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
	spin_unlock(&dev->sm_lock);
	return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
				    IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
	return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
				    ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
				    &ah_attr, NULL, mad);
}

static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
Loading