Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0fe3e204 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlx4-HA-LAG-SRIOV-VF'



Or Gerlitz says:

====================
Add HA and LAG support for mlx4 SRIOV VFs

This series is built upon the code added in commit ce388fff "Merge branch
'mlx4-next'" which added HA and LAG support to mlx4 RoCE and SRIOV services.

We add HA and Link Aggregation support to single ported mlx4 Ethernet VFs.

In this case, the PF Ethernet interfaces are bonded, the VFs see single
port HW devices (already supported) -- however, now this port is highly
available. This means that all VF HW QPs (both VF Ethernet driver and VF
RoCE / RAW QPs) are subject to the V2P (Virtual-To-Physical) mapping which
is managed by the PF driver, and hence resilient across link failures and
such events.

When bonding operates in Dynamic link aggregation (802.3ad) mode, traffic
from each VF will go over the VF "base port" (the port this VF is assigned
to by the admin) as long as this port is up. When the port fails, traffic
from all VFs that are defined on this port will move to the other port, and
be back to their base-port when it recovers.

Moni and Or.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0d76d6e8 e57968a1
Loading
Loading
Loading
Loading
+14 −3
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
#include <linux/gfp.h>
#include <rdma/ib_pma.h>

#include <linux/mlx4/driver.h>
#include "mlx4_ib.h"

enum {
@@ -606,8 +607,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
			struct ib_mad *mad)
{
	struct mlx4_ib_dev *dev = to_mdev(ibdev);
	int err;
	int slave;
	int err, other_port;
	int slave = -1;
	u8 *slave_id;
	int is_eth = 0;

@@ -625,7 +626,17 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
			mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
			return -EINVAL;
		}
		if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
		err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave);
		if (err && mlx4_is_mf_bonded(dev->dev)) {
			other_port = (port == 1) ? 2 : 1;
			err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave);
			if (!err) {
				port = other_port;
				pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n",
					 slave, grh->dgid.raw, port, other_port);
			}
		}
		if (err) {
			mlx4_ib_warn(ibdev, "failed matching grh\n");
			return -ENOENT;
		}
+20 −4
Original line number Diff line number Diff line
@@ -151,6 +151,17 @@ void mlx4_gen_slave_eqe(struct work_struct *work)
	      eqe = next_slave_event_eqe(slave_eq)) {
		slave = eqe->slave_id;

		if (eqe->type == MLX4_EVENT_TYPE_PORT_CHANGE &&
		    eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN &&
		    mlx4_is_bonded(dev)) {
			struct mlx4_port_cap port_cap;

			if (!mlx4_QUERY_PORT(dev, 1, &port_cap) && port_cap.link_state)
				goto consume;

			if (!mlx4_QUERY_PORT(dev, 2, &port_cap) && port_cap.link_state)
				goto consume;
		}
		/* All active slaves need to receive the event */
		if (slave == ALL_SLAVES) {
			for (i = 0; i <= dev->persist->num_vfs; i++) {
@@ -174,6 +185,7 @@ void mlx4_gen_slave_eqe(struct work_struct *work)
				mlx4_warn(dev, "Failed to generate event for slave %d\n",
					  slave);
		}
consume:
		++slave_eq->cons;
	}
}
@@ -594,7 +606,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
					break;
				for (i = 0; i < dev->persist->num_vfs + 1;
				     i++) {
					if (!test_bit(i, slaves_port.slaves))
					int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port);

					if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev))
						continue;
					if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
						if (i == mlx4_master_func_num(dev))
@@ -606,7 +620,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
							eqe->event.port_change.port =
								cpu_to_be32(
								(be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF)
								| (mlx4_phys_to_slave_port(dev, i, port) << 28));
								| (reported_port << 28));
							mlx4_slave_event(dev, i, eqe);
						}
					} else {  /* IB port */
@@ -636,7 +650,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
					for (i = 0;
					     i < dev->persist->num_vfs + 1;
					     i++) {
						if (!test_bit(i, slaves_port.slaves))
						int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port);

						if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev))
							continue;
						if (i == mlx4_master_func_num(dev))
							continue;
@@ -645,7 +661,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
							eqe->event.port_change.port =
								cpu_to_be32(
								(be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF)
								| (mlx4_phys_to_slave_port(dev, i, port) << 28));
								| (reported_port << 28));
							mlx4_slave_event(dev, i, eqe);
						}
					}
+11 −1
Original line number Diff line number Diff line
@@ -1104,6 +1104,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c
			goto out;

		MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET);
		port_cap->link_state = (field & 0x80) >> 7;
		port_cap->supported_port_types = field & 3;
		port_cap->suggested_type = (field >> 3) & 1;
		port_cap->default_sense = (field >> 4) & 1;
@@ -1310,6 +1311,15 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
			port_type |= MLX4_PORT_LINK_UP_MASK;
		else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state)
			port_type &= ~MLX4_PORT_LINK_UP_MASK;
		else if (IFLA_VF_LINK_STATE_AUTO == admin_link_state && mlx4_is_bonded(dev)) {
			int other_port = (port == 1) ? 2 : 1;
			struct mlx4_port_cap port_cap;

			err = mlx4_QUERY_PORT(dev, other_port, &port_cap);
			if (err)
				goto out;
			port_type |= (port_cap.link_state << 7);
		}

		MLX4_PUT(outbox->buf, port_type,
			 QUERY_PORT_SUPPORTED_TYPE_OFFSET);
@@ -1325,7 +1335,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
		MLX4_PUT(outbox->buf, short_field,
			 QUERY_PORT_CUR_MAX_PKEY_OFFSET);
	}

out:
	return err;
}

+1 −0
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@ struct mlx4_mod_stat_cfg {
};

struct mlx4_port_cap {
	u8  link_state;
	u8  supported_port_types;
	u8  suggested_type;
	u8  default_sense;
+97 −10
Original line number Diff line number Diff line
@@ -1221,6 +1221,76 @@ static ssize_t set_port_ib_mtu(struct device *dev,
	return err ? err : count;
}

/* bond for multi-function device */
#define MAX_MF_BOND_ALLOWED_SLAVES 63
static int mlx4_mf_bond(struct mlx4_dev *dev)
{
	int err = 0;
	struct mlx4_slaves_pport slaves_port1;
	struct mlx4_slaves_pport slaves_port2;
	DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);

	slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
	slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
	bitmap_and(slaves_port_1_2,
		   slaves_port1.slaves, slaves_port2.slaves,
		   dev->persist->num_vfs + 1);

	/* only single port vfs are allowed */
	if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
		mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
		return -EINVAL;
	}

	/* limit on maximum allowed VFs */
	if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
	    bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) >
	    MAX_MF_BOND_ALLOWED_SLAVES)
		return -EINVAL;

	if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
		mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
		return -EINVAL;
	}

	err = mlx4_bond_mac_table(dev);
	if (err)
		return err;
	err = mlx4_bond_vlan_table(dev);
	if (err)
		goto err1;
	err = mlx4_bond_fs_rules(dev);
	if (err)
		goto err2;

	return 0;
err2:
	(void)mlx4_unbond_vlan_table(dev);
err1:
	(void)mlx4_unbond_mac_table(dev);
	return err;
}

static int mlx4_mf_unbond(struct mlx4_dev *dev)
{
	int ret, ret1;

	ret = mlx4_unbond_fs_rules(dev);
	if (ret)
		mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret);
	ret1 = mlx4_unbond_mac_table(dev);
	if (ret1) {
		mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
		ret = ret1;
	}
	ret1 = mlx4_unbond_vlan_table(dev);
	if (ret1) {
		mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
		ret = ret1;
	}
	return ret;
}

int mlx4_bond(struct mlx4_dev *dev)
{
	int ret = 0;
@@ -1228,16 +1298,23 @@ int mlx4_bond(struct mlx4_dev *dev)

	mutex_lock(&priv->bond_mutex);

	if (!mlx4_is_bonded(dev))
	if (!mlx4_is_bonded(dev)) {
		ret = mlx4_do_bond(dev, true);
	else
		ret = 0;

	mutex_unlock(&priv->bond_mutex);
		if (ret)
			mlx4_err(dev, "Failed to bond device: %d\n", ret);
	else
		if (!ret && mlx4_is_master(dev)) {
			ret = mlx4_mf_bond(dev);
			if (ret) {
				mlx4_err(dev, "bond for multifunction failed\n");
				mlx4_do_bond(dev, false);
			}
		}
	}

	mutex_unlock(&priv->bond_mutex);
	if (!ret)
		mlx4_dbg(dev, "Device is bonded\n");

	return ret;
}
EXPORT_SYMBOL_GPL(mlx4_bond);
@@ -1249,14 +1326,24 @@ int mlx4_unbond(struct mlx4_dev *dev)

	mutex_lock(&priv->bond_mutex);

	if (mlx4_is_bonded(dev))
		ret = mlx4_do_bond(dev, false);
	if (mlx4_is_bonded(dev)) {
		int ret2 = 0;

	mutex_unlock(&priv->bond_mutex);
		ret = mlx4_do_bond(dev, false);
		if (ret)
			mlx4_err(dev, "Failed to unbond device: %d\n", ret);
	else
		if (mlx4_is_master(dev))
			ret2 = mlx4_mf_unbond(dev);
		if (ret2) {
			mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
			ret = ret2;
		}
	}

	mutex_unlock(&priv->bond_mutex);
	if (!ret)
		mlx4_dbg(dev, "Device is unbonded\n");

	return ret;
}
EXPORT_SYMBOL_GPL(mlx4_unbond);
Loading