Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fa417f7b authored by Eli Cohen's avatar Eli Cohen Committed by Roland Dreier
Browse files

IB/mlx4: Add support for IBoE



Add support for IBoE to mlx4_ib.  The bulk of the code is handling the
new address vector fields; mlx4 needs the MAC address of a remote node
to include it in a WQE (for datagrams) or in the QP context (for
connected QPs).  Address resolution is done by assuming all unicast
GIDs are either link-local IPv6 addresses.

Multicast group attach/detach needs to update the NIC's multicast
filters; but since attaching a QP to a multicast group can be done
before the QP is bound to a port, for IBoE we need to keep track of
all multicast groups that a QP is attached too before it transitions
from INIT to RTR (since it does not have a port in the INIT state).

Signed-off-by: default avatarEli Cohen <eli@mellanox.co.il>

[ Many things cleaned up and otherwise monkeyed with; hope I didn't
  introduce too many bugs.  - Roland ]

Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 7ac870ed
Loading
Loading
Loading
Loading
+120 −33
Original line number Diff line number Diff line
@@ -30,66 +30,153 @@
 * SOFTWARE.
 */

#include <rdma/ib_addr.h>

#include <linux/slab.h>
#include <linux/inet.h>
#include <linux/string.h>

#include "mlx4_ib.h"

struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
			u8 *mac, int *is_mcast, u8 port)
{
	struct mlx4_dev *dev = to_mdev(pd->device)->dev;
	struct mlx4_ib_ah *ah;
	struct in6_addr in6;

	ah = kmalloc(sizeof *ah, GFP_ATOMIC);
	if (!ah)
		return ERR_PTR(-ENOMEM);
	*is_mcast = 0;

	memset(&ah->av, 0, sizeof ah->av);
	memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
	if (rdma_link_local_addr(&in6))
		rdma_get_ll_mac(&in6, mac);
	else if (rdma_is_multicast_addr(&in6)) {
		rdma_get_mcast_mac(&in6, mac);
		*is_mcast = 1;
	} else
		return -EINVAL;

	ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
	ah->av.g_slid  = ah_attr->src_path_bits;
	ah->av.dlid    = cpu_to_be16(ah_attr->dlid);
	if (ah_attr->static_rate) {
		ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
		while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
		       !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
			--ah->av.stat_rate;
	return 0;
}
	ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);

static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
				  struct mlx4_ib_ah *ah)
{
	struct mlx4_dev *dev = to_mdev(pd->device)->dev;

	ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
	ah->av.ib.g_slid  = ah_attr->src_path_bits;
	if (ah_attr->ah_flags & IB_AH_GRH) {
		ah->av.g_slid   |= 0x80;
		ah->av.gid_index = ah_attr->grh.sgid_index;
		ah->av.hop_limit = ah_attr->grh.hop_limit;
		ah->av.sl_tclass_flowlabel |=
		ah->av.ib.g_slid   |= 0x80;
		ah->av.ib.gid_index = ah_attr->grh.sgid_index;
		ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
		ah->av.ib.sl_tclass_flowlabel |=
			cpu_to_be32((ah_attr->grh.traffic_class << 20) |
				    ah_attr->grh.flow_label);
		memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
		memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
	}

	ah->av.ib.dlid    = cpu_to_be16(ah_attr->dlid);
	if (ah_attr->static_rate) {
		ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
		while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
		       !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
			--ah->av.ib.stat_rate;
	}
	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);

	return &ah->ibah;
}

static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
				    struct mlx4_ib_ah *ah)
{
	struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
	struct mlx4_dev *dev = ibdev->dev;
	u8 mac[6];
	int err;
	int is_mcast;

	err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
	if (err)
		return ERR_PTR(err);

	memcpy(ah->av.eth.mac, mac, 6);
	ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
	ah->av.eth.gid_index = ah_attr->grh.sgid_index;
	if (ah_attr->static_rate) {
		ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
		while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
		       !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
			--ah->av.eth.stat_rate;
	}

	/*
	 * HW requires multicast LID so we just choose one.
	 */
	if (is_mcast)
		ah->av.ib.dlid = cpu_to_be16(0xc000);

	memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
	ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);

	return &ah->ibah;
}

struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
	struct mlx4_ib_ah *ah;
	struct ib_ah *ret;

	ah = kzalloc(sizeof *ah, GFP_ATOMIC);
	if (!ah)
		return ERR_PTR(-ENOMEM);

	if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
		if (!(ah_attr->ah_flags & IB_AH_GRH)) {
			ret = ERR_PTR(-EINVAL);
		} else {
			/*
			 * TBD: need to handle the case when we get
			 * called in an atomic context and there we
			 * might sleep.  We don't expect this
			 * currently since we're working with link
			 * local addresses which we can translate
			 * without going to sleep.
			 */
			ret = create_iboe_ah(pd, ah_attr, ah);
		}

		if (IS_ERR(ret))
			kfree(ah);

		return ret;
	} else
		return create_ib_ah(pd, ah_attr, ah); /* never fails */
}

int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
{
	struct mlx4_ib_ah *ah = to_mah(ibah);
	enum rdma_link_layer ll;

	memset(ah_attr, 0, sizeof *ah_attr);
	ah_attr->dlid	       = be16_to_cpu(ah->av.dlid);
	ah_attr->sl	       = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
	ah_attr->port_num      = be32_to_cpu(ah->av.port_pd) >> 24;
	if (ah->av.stat_rate)
		ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
	ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
	ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
	ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
	ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
	ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
	if (ah->av.ib.stat_rate)
		ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
	ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;

	if (mlx4_ib_ah_grh_present(ah)) {
		ah_attr->ah_flags = IB_AH_GRH;

		ah_attr->grh.traffic_class =
			be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
			be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
		ah_attr->grh.flow_label =
			be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
		ah_attr->grh.hop_limit  = ah->av.hop_limit;
		ah_attr->grh.sgid_index = ah->av.gid_index;
		memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
			be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
		ah_attr->grh.hop_limit  = ah->av.ib.hop_limit;
		ah_attr->grh.sgid_index = ah->av.ib.gid_index;
		memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
	}

	return 0;
+20 −12
Original line number Diff line number Diff line
@@ -311,9 +311,12 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
	struct ib_mad_agent *agent;
	int p, q;
	int ret;
	enum rdma_link_layer ll;

	for (p = 0; p < dev->num_ports; ++p)
	for (p = 0; p < dev->num_ports; ++p) {
		ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
		for (q = 0; q <= 1; ++q) {
			if (ll == IB_LINK_LAYER_INFINIBAND) {
				agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
							      q ? IB_QPT_GSI : IB_QPT_SMI,
							      NULL, 0, send_handler,
@@ -323,6 +326,9 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
					goto err;
				}
				dev->send_agent[p][q] = agent;
			} else
				dev->send_agent[p][q] = NULL;
		}
	}

	return 0;
@@ -344,9 +350,11 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
	for (p = 0; p < dev->num_ports; ++p) {
		for (q = 0; q <= 1; ++q) {
			agent = dev->send_agent[p][q];
			if (agent) {
				dev->send_agent[p][q] = NULL;
				ib_unregister_mad_agent(agent);
			}
		}

		if (dev->sm_ah[p])
			ib_destroy_ah(dev->sm_ah[p]);
+412 −36
Original line number Diff line number Diff line
@@ -35,9 +35,13 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>

#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>

#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
@@ -58,6 +62,15 @@ static const char mlx4_ib_version[] =
	DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
	DRV_VERSION " (" DRV_RELDATE ")\n";

struct update_gid_work {
	struct work_struct	work;
	union ib_gid		gids[128];
	struct mlx4_ib_dev     *dev;
	int			port;
};

static struct workqueue_struct *wq;

static void init_query_mad(struct ib_smp *mad)
{
	mad->base_version  = 1;
@@ -154,28 +167,19 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
	return err;
}

static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
			      struct ib_port_attr *props)
static enum rdma_link_layer
mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
{
	struct ib_smp *in_mad  = NULL;
	struct ib_smp *out_mad = NULL;
	int err = -ENOMEM;

	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
	if (!in_mad || !out_mad)
		goto out;

	memset(props, 0, sizeof *props);

	init_query_mad(in_mad);
	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
	in_mad->attr_mod = cpu_to_be32(port);
	struct mlx4_dev *dev = to_mdev(device)->dev;

	err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
	if (err)
		goto out;
	return dev->caps.port_mask & (1 << (port_num - 1)) ?
		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
}

static int ib_link_query_port(struct ib_device *ibdev, u8 port,
			      struct ib_port_attr *props,
			      struct ib_smp *out_mad)
{
	props->lid		= be16_to_cpup((__be16 *) (out_mad->data + 16));
	props->lmc		= out_mad->data[34] & 0x7;
	props->sm_lid		= be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -196,6 +200,80 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
	props->max_vl_num	= out_mad->data[37] >> 4;
	props->init_type_reply	= out_mad->data[41] >> 4;

	return 0;
}

static u8 state_to_phys_state(enum ib_port_state state)
{
	return state == IB_PORT_ACTIVE ? 5 : 3;
}

static int eth_link_query_port(struct ib_device *ibdev, u8 port,
			       struct ib_port_attr *props,
			       struct ib_smp *out_mad)
{
	struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe;
	struct net_device *ndev;
	enum ib_mtu tmp;

	props->active_width	= IB_WIDTH_4X;
	props->active_speed	= 4;
	props->port_cap_flags	= IB_PORT_CM_SUP;
	props->gid_tbl_len	= to_mdev(ibdev)->dev->caps.gid_table_len[port];
	props->max_msg_sz	= to_mdev(ibdev)->dev->caps.max_msg_sz;
	props->pkey_tbl_len	= 1;
	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
	props->max_mtu		= IB_MTU_2048;
	props->subnet_timeout	= 0;
	props->max_vl_num	= out_mad->data[37] >> 4;
	props->init_type_reply	= 0;
	props->state		= IB_PORT_DOWN;
	props->phys_state	= state_to_phys_state(props->state);
	props->active_mtu	= IB_MTU_256;
	spin_lock(&iboe->lock);
	ndev = iboe->netdevs[port - 1];
	if (!ndev)
		goto out;

	tmp = iboe_get_mtu(ndev->mtu);
	props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;

	props->state		= netif_running(ndev) &&  netif_oper_up(ndev) ?
					IB_PORT_ACTIVE : IB_PORT_DOWN;
	props->phys_state	= state_to_phys_state(props->state);

out:
	spin_unlock(&iboe->lock);
	return 0;
}

static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
			      struct ib_port_attr *props)
{
	struct ib_smp *in_mad  = NULL;
	struct ib_smp *out_mad = NULL;
	int err = -ENOMEM;

	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
	if (!in_mad || !out_mad)
		goto out;

	memset(props, 0, sizeof *props);

	init_query_mad(in_mad);
	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
	in_mad->attr_mod = cpu_to_be32(port);

	err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
	if (err)
		goto out;

	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
		ib_link_query_port(ibdev, port, props, out_mad) :
		eth_link_query_port(ibdev, port, props, out_mad);

out:
	kfree(in_mad);
	kfree(out_mad);
@@ -203,7 +281,7 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
	return err;
}

static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
			       union ib_gid *gid)
{
	struct ib_smp *in_mad  = NULL;
@@ -241,6 +319,25 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
	return err;
}

static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
			  union ib_gid *gid)
{
	struct mlx4_ib_dev *dev = to_mdev(ibdev);

	*gid = dev->iboe.gid_table[port - 1][index];

	return 0;
}

static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
			     union ib_gid *gid)
{
	if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
		return __mlx4_ib_query_gid(ibdev, port, index, gid);
	else
		return iboe_query_gid(ibdev, port, index, gid);
}

static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
			      u16 *pkey)
{
@@ -289,6 +386,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
{
	struct mlx4_cmd_mailbox *mailbox;
	int err;
	u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;

	mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
	if (IS_ERR(mailbox))
@@ -304,7 +402,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
	}

	err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
	err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
		       MLX4_CMD_TIME_CLASS_B);

	mlx4_free_cmd_mailbox(dev->dev, mailbox);
@@ -447,18 +545,132 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
	return 0;
}

static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
{
	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
	struct mlx4_ib_gid_entry *ge;

	ge = kzalloc(sizeof *ge, GFP_KERNEL);
	if (!ge)
		return -ENOMEM;

	ge->gid = *gid;
	if (mlx4_ib_add_mc(mdev, mqp, gid)) {
		ge->port = mqp->port;
		ge->added = 1;
	}

	mutex_lock(&mqp->mutex);
	list_add_tail(&ge->list, &mqp->gid_list);
	mutex_unlock(&mqp->mutex);

	return 0;
}

int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
		   union ib_gid *gid)
{
	u8 mac[6];
	struct net_device *ndev;
	int ret = 0;

	if (!mqp->port)
		return 0;

	spin_lock(&mdev->iboe.lock);
	ndev = mdev->iboe.netdevs[mqp->port - 1];
	if (ndev)
		dev_hold(ndev);
	spin_unlock(&mdev->iboe.lock);

	if (ndev) {
		rdma_get_mcast_mac((struct in6_addr *)gid, mac);
		rtnl_lock();
		dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
		ret = 1;
		rtnl_unlock();
		dev_put(ndev);
	}

	return ret;
}

static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
	return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
				     &to_mqp(ibqp)->mqp, gid->raw,
				     !!(to_mqp(ibqp)->flags &
	int err;
	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
	struct mlx4_ib_qp *mqp = to_mqp(ibqp);

	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags &
				    MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
	if (err)
		return err;

	err = add_gid_entry(ibqp, gid);
	if (err)
		goto err_add;

	return 0;

err_add:
	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw);
	return err;
}

static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
{
	struct mlx4_ib_gid_entry *ge;
	struct mlx4_ib_gid_entry *tmp;
	struct mlx4_ib_gid_entry *ret = NULL;

	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
		if (!memcmp(raw, ge->gid.raw, 16)) {
			ret = ge;
			break;
		}
	}

	return ret;
}

static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
	return mlx4_multicast_detach(to_mdev(ibqp->device)->dev,
				     &to_mqp(ibqp)->mqp, gid->raw);
	int err;
	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
	u8 mac[6];
	struct net_device *ndev;
	struct mlx4_ib_gid_entry *ge;

	err = mlx4_multicast_detach(mdev->dev,
				    &mqp->mqp, gid->raw);
	if (err)
		return err;

	mutex_lock(&mqp->mutex);
	ge = find_gid_entry(mqp, gid->raw);
	if (ge) {
		spin_lock(&mdev->iboe.lock);
		ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
		if (ndev)
			dev_hold(ndev);
		spin_unlock(&mdev->iboe.lock);
		rdma_get_mcast_mac((struct in6_addr *)gid, mac);
		if (ndev) {
			rtnl_lock();
			dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
			rtnl_unlock();
			dev_put(ndev);
		}
		list_del(&ge->list);
		kfree(ge);
	} else
		printk(KERN_WARNING "could not find mgid entry\n");

	mutex_unlock(&mqp->mutex);

	return 0;
}

static int init_node_data(struct mlx4_ib_dev *dev)
@@ -543,15 +755,143 @@ static struct device_attribute *mlx4_class_attributes[] = {
	&dev_attr_board_id
};

static void mlx4_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
{
	memcpy(eui, dev->dev_addr, 3);
	memcpy(eui + 5, dev->dev_addr + 3, 3);
	eui[3] = 0xFF;
	eui[4] = 0xFE;
	eui[0] ^= 2;
}

static void update_gids_task(struct work_struct *work)
{
	struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
	struct mlx4_cmd_mailbox *mailbox;
	union ib_gid *gids;
	int err;
	struct mlx4_dev	*dev = gw->dev->dev;
	struct ib_event event;

	mailbox = mlx4_alloc_cmd_mailbox(dev);
	if (IS_ERR(mailbox)) {
		printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox));
		return;
	}

	gids = mailbox->buf;
	memcpy(gids, gw->gids, sizeof gw->gids);

	err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
	if (err)
		printk(KERN_WARNING "set port command failed\n");
	else {
		memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
		event.device = &gw->dev->ib_dev;
		event.element.port_num = gw->port;
		event.event    = IB_EVENT_LID_CHANGE;
		ib_dispatch_event(&event);
	}

	mlx4_free_cmd_mailbox(dev, mailbox);
	kfree(gw);
}

static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
{
	struct net_device *ndev = dev->iboe.netdevs[port - 1];
	struct update_gid_work *work;

	work = kzalloc(sizeof *work, GFP_ATOMIC);
	if (!work)
		return -ENOMEM;

	if (!clear) {
		mlx4_addrconf_ifid_eui48(&work->gids[0].raw[8], ndev);
		work->gids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
	}

	INIT_WORK(&work->work, update_gids_task);
	work->port = port;
	work->dev = dev;
	queue_work(wq, &work->work);

	return 0;
}

static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
{
	switch (event) {
	case NETDEV_UP:
		update_ipv6_gids(dev, port, 0);
		break;

	case NETDEV_DOWN:
		update_ipv6_gids(dev, port, 1);
		dev->iboe.netdevs[port - 1] = NULL;
	}
}

static void netdev_added(struct mlx4_ib_dev *dev, int port)
{
	update_ipv6_gids(dev, port, 0);
}

static void netdev_removed(struct mlx4_ib_dev *dev, int port)
{
	update_ipv6_gids(dev, port, 1);
}

static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
				void *ptr)
{
	struct net_device *dev = ptr;
	struct mlx4_ib_dev *ibdev;
	struct net_device *oldnd;
	struct mlx4_ib_iboe *iboe;
	int port;

	if (!net_eq(dev_net(dev), &init_net))
		return NOTIFY_DONE;

	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
	iboe = &ibdev->iboe;

	spin_lock(&iboe->lock);
	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
		oldnd = iboe->netdevs[port - 1];
		iboe->netdevs[port - 1] =
			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROTOCOL_EN, port);
		if (oldnd != iboe->netdevs[port - 1]) {
			if (iboe->netdevs[port - 1])
				netdev_added(ibdev, port);
			else
				netdev_removed(ibdev, port);
		}
	}

	if (dev == iboe->netdevs[0])
		handle_en_event(ibdev, 1, event);
	else if (dev == iboe->netdevs[1])
		handle_en_event(ibdev, 2, event);

	spin_unlock(&iboe->lock);

	return NOTIFY_DONE;
}

static void *mlx4_ib_add(struct mlx4_dev *dev)
{
	struct mlx4_ib_dev *ibdev;
	int num_ports = 0;
	int i;
	int err;
	struct mlx4_ib_iboe *iboe;

	printk_once(KERN_INFO "%s", mlx4_ib_version);

	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
	mlx4_foreach_ib_transport_port(i, dev)
		num_ports++;

	/* No point in registering a device with no ports... */
@@ -564,6 +904,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
		return NULL;
	}

	iboe = &ibdev->iboe;

	if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
		goto err_dealloc;

@@ -612,6 +954,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)

	ibdev->ib_dev.query_device	= mlx4_ib_query_device;
	ibdev->ib_dev.query_port	= mlx4_ib_query_port;
	ibdev->ib_dev.get_link_layer	= mlx4_ib_port_link_layer;
	ibdev->ib_dev.query_gid		= mlx4_ib_query_gid;
	ibdev->ib_dev.query_pkey	= mlx4_ib_query_pkey;
	ibdev->ib_dev.modify_device	= mlx4_ib_modify_device;
@@ -656,6 +999,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
	ibdev->ib_dev.unmap_fmr		= mlx4_ib_unmap_fmr;
	ibdev->ib_dev.dealloc_fmr	= mlx4_ib_fmr_dealloc;

	spin_lock_init(&iboe->lock);

	if (init_node_data(ibdev))
		goto err_map;

@@ -668,16 +1013,28 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
	if (mlx4_ib_mad_init(ibdev))
		goto err_reg;

	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
		iboe->nb.notifier_call = mlx4_ib_netdev_event;
		err = register_netdevice_notifier(&iboe->nb);
		if (err)
			goto err_reg;
	}

	for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
		if (device_create_file(&ibdev->ib_dev.dev,
				       mlx4_class_attributes[i]))
			goto err_reg;
			goto err_notif;
	}

	ibdev->ib_active = true;

	return ibdev;

err_notif:
	if (unregister_netdevice_notifier(&ibdev->iboe.nb))
		printk(KERN_WARNING "failure unregistering notifier\n");
	flush_workqueue(wq);

err_reg:
	ib_unregister_device(&ibdev->ib_dev);

@@ -703,11 +1060,16 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)

	mlx4_ib_mad_cleanup(ibdev);
	ib_unregister_device(&ibdev->ib_dev);
	if (ibdev->iboe.nb.notifier_call) {
		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
			printk(KERN_WARNING "failure unregistering notifier\n");
		ibdev->iboe.nb.notifier_call = NULL;
	}
	iounmap(ibdev->uar_map);

	for (p = 1; p <= ibdev->num_ports; ++p)
	mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
		mlx4_CLOSE_PORT(dev, p);

	iounmap(ibdev->uar_map);
	mlx4_uar_free(dev, &ibdev->priv_uar);
	mlx4_pd_free(dev, ibdev->priv_pdn);
	ib_dealloc_device(&ibdev->ib_dev);
@@ -749,17 +1111,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
static struct mlx4_interface mlx4_ib_interface = {
	.add		= mlx4_ib_add,
	.remove		= mlx4_ib_remove,
	.event	= mlx4_ib_event
	.event		= mlx4_ib_event,
	.protocol	= MLX4_PROTOCOL_IB
};

static int __init mlx4_ib_init(void)
{
	return mlx4_register_interface(&mlx4_ib_interface);
	int err;

	wq = create_singlethread_workqueue("mlx4_ib");
	if (!wq)
		return -ENOMEM;

	err = mlx4_register_interface(&mlx4_ib_interface);
	if (err) {
		destroy_workqueue(wq);
		return err;
	}

	return 0;
}

static void __exit mlx4_ib_cleanup(void)
{
	mlx4_unregister_interface(&mlx4_ib_interface);
	destroy_workqueue(wq);
}

module_init(mlx4_ib_init);
+30 −2
Original line number Diff line number Diff line
@@ -112,6 +112,13 @@ enum mlx4_ib_qp_flags {
	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK	= 1 << 1,
};

struct mlx4_ib_gid_entry {
	struct list_head	list;
	union ib_gid		gid;
	int			added;
	u8			port;
};

struct mlx4_ib_qp {
	struct ib_qp		ibqp;
	struct mlx4_qp		mqp;
@@ -138,6 +145,8 @@ struct mlx4_ib_qp {
	u8			resp_depth;
	u8			sq_no_prefetch;
	u8			state;
	int			mlx_type;
	struct list_head	gid_list;
};

struct mlx4_ib_srq {
@@ -157,7 +166,14 @@ struct mlx4_ib_srq {

struct mlx4_ib_ah {
	struct ib_ah		ibah;
	struct mlx4_av		av;
	union mlx4_ext_av       av;
};

struct mlx4_ib_iboe {
	spinlock_t		lock;
	struct net_device      *netdevs[MLX4_MAX_PORTS];
	struct notifier_block 	nb;
	union ib_gid		gid_table[MLX4_MAX_PORTS][128];
};

struct mlx4_ib_dev {
@@ -176,6 +192,7 @@ struct mlx4_ib_dev {

	struct mutex		cap_mask_mutex;
	bool			ib_active;
	struct mlx4_ib_iboe	iboe;
};

static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@@ -314,9 +331,20 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);

int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
			u8 *mac, int *is_mcast, u8 port);

static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
	return !!(ah->av.g_slid & 0x80);
	u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;

	if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET)
		return 1;

	return !!(ah->av.ib.g_slid & 0x80);
}

int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
		   union ib_gid *gid);

#endif /* MLX4_IB_H */
+105 −25

File changed.

Preview size limit exceeded, changes collapsed.

Loading