Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c5b8b34c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bonding-team-offload'



Jiri Pirko says:

====================
bonding/team offload + mlxsw implementation

This patchset introduces needed infrastructure for link aggregation
offload - for both team and bonding. It also implements the offload
in mlxsw driver.

Particulary, this patchset introduces possibility for upper driver
(bond/team/bridge/..) to pass type-specific info down to notifier listeners.
Info is passed along with NETDEV_CHANGEUPPER/NETDEV_PRECHANGEUPPER
notifiers. Listeners (drivers of netdevs being enslaved) can react
accordingly.

Other extension is for run-time use. This patchset introduces
new netdev notifier type - NETDEV_CHANGELOWERSTATE. Along with this
notification, the upper driver (bond/team/bridge/..) can pass some
information about lower device change, particulary link-up and
TX-enabled states. Listeners (drivers of netdevs being enslaved)
can react accordingly.

The last part of the patchset is implementation of LAG offload in mlxsw,
using both previously introduced infrastructre extensions.

Note that bond-speficic (and ugly) NETDEV_BONDING_INFO used by mlx4
can be removed and mlx4 can use the extensions this patchset adds.
I plan to convert it and get rid of NETDEV_BONDING_INFO in
a follow-up patchset.

v2->v3:
- one small fix in patch 1
v1->v2:
- added patch 1 and 2 per Andy's request
- couple of more or less cosmetic changes described in couple other patches
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3b195843 74581206
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -103,6 +103,7 @@ Netdevice notifier events which can be failed are:
 * NETDEV_POST_INIT
 * NETDEV_PRECHANGEMTU
 * NETDEV_PRECHANGEUPPER
 * NETDEV_CHANGEUPPER

Example: Inject netdevice mtu change error (-22 == -EINVAL)

+10 −1
Original line number Diff line number Diff line
@@ -93,7 +93,8 @@ enum ad_link_speed_type {
	AD_LINK_SPEED_10000MBPS,
	AD_LINK_SPEED_20000MBPS,
	AD_LINK_SPEED_40000MBPS,
	AD_LINK_SPEED_56000MBPS
	AD_LINK_SPEED_56000MBPS,
	AD_LINK_SPEED_100000MBPS,
};

/* compare MAC addresses */
@@ -258,6 +259,7 @@ static inline int __check_agg_selection_timer(struct port *port)
 *     %AD_LINK_SPEED_20000MBPS
 *     %AD_LINK_SPEED_40000MBPS
 *     %AD_LINK_SPEED_56000MBPS
 *     %AD_LINK_SPEED_100000MBPS
 */
static u16 __get_link_speed(struct port *port)
{
@@ -305,6 +307,10 @@ static u16 __get_link_speed(struct port *port)
			speed = AD_LINK_SPEED_56000MBPS;
			break;

		case SPEED_100000:
			speed = AD_LINK_SPEED_100000MBPS;
			break;

		default:
			/* unknown speed value from ethtool. shouldn't happen */
			speed = 0;
@@ -681,6 +687,9 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)
		case AD_LINK_SPEED_56000MBPS:
			bandwidth = aggregator->num_of_ports * 56000;
			break;
		case AD_LINK_SPEED_100000MBPS:
			bandwidth = aggregator->num_of_ports * 100000;
			break;
		default:
			bandwidth = 0; /* to silence the compiler */
		}
+79 −32
Original line number Diff line number Diff line
@@ -830,7 +830,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
			}

			new_active->delay = 0;
			bond_set_slave_link_state(new_active, BOND_LINK_UP);
			bond_set_slave_link_state(new_active, BOND_LINK_UP,
						  BOND_SLAVE_NOTIFY_NOW);

			if (BOND_MODE(bond) == BOND_MODE_8023AD)
				bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
@@ -1198,26 +1199,43 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
	return ret;
}

static int bond_master_upper_dev_link(struct net_device *bond_dev,
				      struct net_device *slave_dev,
				      struct slave *slave)
static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond)
{
	switch (BOND_MODE(bond)) {
	case BOND_MODE_ROUNDROBIN:
		return NETDEV_LAG_TX_TYPE_ROUNDROBIN;
	case BOND_MODE_ACTIVEBACKUP:
		return NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
	case BOND_MODE_BROADCAST:
		return NETDEV_LAG_TX_TYPE_BROADCAST;
	case BOND_MODE_XOR:
	case BOND_MODE_8023AD:
		return NETDEV_LAG_TX_TYPE_HASH;
	default:
		return NETDEV_LAG_TX_TYPE_UNKNOWN;
	}
}

static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave)
{
	struct netdev_lag_upper_info lag_upper_info;
	int err;

	err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave);
	lag_upper_info.tx_type = bond_lag_tx_type(bond);
	err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
					   &lag_upper_info);
	if (err)
		return err;
	slave_dev->flags |= IFF_SLAVE;
	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
	slave->dev->flags |= IFF_SLAVE;
	rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
	return 0;
}

static void bond_upper_dev_unlink(struct net_device *bond_dev,
				  struct net_device *slave_dev)
static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave)
{
	netdev_upper_dev_unlink(slave_dev, bond_dev);
	slave_dev->flags &= ~IFF_SLAVE;
	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
	netdev_upper_dev_unlink(slave->dev, bond->dev);
	slave->dev->flags &= ~IFF_SLAVE;
	rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
}

static struct slave *bond_alloc_slave(struct bonding *bond)
@@ -1299,6 +1317,16 @@ void bond_queue_slave_event(struct slave *slave)
	queue_delayed_work(slave->bond->wq, &nnw->work, 0);
}

void bond_lower_state_changed(struct slave *slave)
{
	struct netdev_lag_lower_state_info info;

	info.link_up = slave->link == BOND_LINK_UP ||
		       slave->link == BOND_LINK_FAIL;
	info.tx_enabled = bond_is_active_slave(slave);
	netdev_lower_state_changed(slave->dev, &info);
}

/* enslave device <slave> to bond device <master> */
int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
{
@@ -1563,21 +1591,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
		if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
			if (bond->params.updelay) {
				bond_set_slave_link_state(new_slave,
							  BOND_LINK_BACK);
							  BOND_LINK_BACK,
							  BOND_SLAVE_NOTIFY_NOW);
				new_slave->delay = bond->params.updelay;
			} else {
				bond_set_slave_link_state(new_slave,
							  BOND_LINK_UP);
							  BOND_LINK_UP,
							  BOND_SLAVE_NOTIFY_NOW);
			}
		} else {
			bond_set_slave_link_state(new_slave, BOND_LINK_DOWN);
			bond_set_slave_link_state(new_slave, BOND_LINK_DOWN,
						  BOND_SLAVE_NOTIFY_NOW);
		}
	} else if (bond->params.arp_interval) {
		bond_set_slave_link_state(new_slave,
					  (netif_carrier_ok(slave_dev) ?
					  BOND_LINK_UP : BOND_LINK_DOWN));
					  BOND_LINK_UP : BOND_LINK_DOWN),
					  BOND_SLAVE_NOTIFY_NOW);
	} else {
		bond_set_slave_link_state(new_slave, BOND_LINK_UP);
		bond_set_slave_link_state(new_slave, BOND_LINK_UP,
					  BOND_SLAVE_NOTIFY_NOW);
	}

	if (new_slave->link != BOND_LINK_DOWN)
@@ -1662,7 +1695,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
		goto err_detach;
	}

	res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave);
	res = bond_master_upper_dev_link(bond, new_slave);
	if (res) {
		netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res);
		goto err_unregister;
@@ -1698,7 +1731,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)

/* Undo stages on error */
err_upper_unlink:
	bond_upper_dev_unlink(bond_dev, slave_dev);
	bond_upper_dev_unlink(bond, new_slave);

err_unregister:
	netdev_rx_handler_unregister(slave_dev);
@@ -1799,12 +1832,14 @@ static int __bond_release_one(struct net_device *bond_dev,
		return -EINVAL;
	}

	bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW);

	bond_sysfs_slave_del(slave);

	/* recompute stats just before removing the slave */
	bond_get_stats(bond->dev, &bond->bond_stats);

	bond_upper_dev_unlink(bond_dev, slave_dev);
	bond_upper_dev_unlink(bond, slave);
	/* unregister rx_handler early so bond_handle_frame wouldn't be called
	 * for this slave anymore.
	 */
@@ -1996,7 +2031,8 @@ static int bond_miimon_inspect(struct bonding *bond)
			if (link_state)
				continue;

			bond_set_slave_link_state(slave, BOND_LINK_FAIL);
			bond_set_slave_link_state(slave, BOND_LINK_FAIL,
						  BOND_SLAVE_NOTIFY_LATER);
			slave->delay = bond->params.downdelay;
			if (slave->delay) {
				netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
@@ -2011,7 +2047,8 @@ static int bond_miimon_inspect(struct bonding *bond)
		case BOND_LINK_FAIL:
			if (link_state) {
				/* recovered before downdelay expired */
				bond_set_slave_link_state(slave, BOND_LINK_UP);
				bond_set_slave_link_state(slave, BOND_LINK_UP,
							  BOND_SLAVE_NOTIFY_LATER);
				slave->last_link_up = jiffies;
				netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
					    (bond->params.downdelay - slave->delay) *
@@ -2033,7 +2070,8 @@ static int bond_miimon_inspect(struct bonding *bond)
			if (!link_state)
				continue;

			bond_set_slave_link_state(slave, BOND_LINK_BACK);
			bond_set_slave_link_state(slave, BOND_LINK_BACK,
						  BOND_SLAVE_NOTIFY_LATER);
			slave->delay = bond->params.updelay;

			if (slave->delay) {
@@ -2047,7 +2085,8 @@ static int bond_miimon_inspect(struct bonding *bond)
		case BOND_LINK_BACK:
			if (!link_state) {
				bond_set_slave_link_state(slave,
							  BOND_LINK_DOWN);
							  BOND_LINK_DOWN,
							  BOND_SLAVE_NOTIFY_LATER);
				netdev_info(bond->dev, "link status down again after %d ms for interface %s\n",
					    (bond->params.updelay - slave->delay) *
					    bond->params.miimon,
@@ -2085,7 +2124,8 @@ static void bond_miimon_commit(struct bonding *bond)
			continue;

		case BOND_LINK_UP:
			bond_set_slave_link_state(slave, BOND_LINK_UP);
			bond_set_slave_link_state(slave, BOND_LINK_UP,
						  BOND_SLAVE_NOTIFY_NOW);
			slave->last_link_up = jiffies;

			primary = rtnl_dereference(bond->primary_slave);
@@ -2125,7 +2165,8 @@ static void bond_miimon_commit(struct bonding *bond)
			if (slave->link_failure_count < UINT_MAX)
				slave->link_failure_count++;

			bond_set_slave_link_state(slave, BOND_LINK_DOWN);
			bond_set_slave_link_state(slave, BOND_LINK_DOWN,
						  BOND_SLAVE_NOTIFY_NOW);

			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP ||
			    BOND_MODE(bond) == BOND_MODE_8023AD)
@@ -2708,7 +2749,8 @@ static void bond_ab_arp_commit(struct bonding *bond)
				struct slave *current_arp_slave;

				current_arp_slave = rtnl_dereference(bond->current_arp_slave);
				bond_set_slave_link_state(slave, BOND_LINK_UP);
				bond_set_slave_link_state(slave, BOND_LINK_UP,
							  BOND_SLAVE_NOTIFY_NOW);
				if (current_arp_slave) {
					bond_set_slave_inactive_flags(
						current_arp_slave,
@@ -2731,7 +2773,8 @@ static void bond_ab_arp_commit(struct bonding *bond)
			if (slave->link_failure_count < UINT_MAX)
				slave->link_failure_count++;

			bond_set_slave_link_state(slave, BOND_LINK_DOWN);
			bond_set_slave_link_state(slave, BOND_LINK_DOWN,
						  BOND_SLAVE_NOTIFY_NOW);
			bond_set_slave_inactive_flags(slave,
						      BOND_SLAVE_NOTIFY_NOW);

@@ -2810,7 +2853,8 @@ static bool bond_ab_arp_probe(struct bonding *bond)
		 * up when it is actually down
		 */
		if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
			bond_set_slave_link_state(slave, BOND_LINK_DOWN);
			bond_set_slave_link_state(slave, BOND_LINK_DOWN,
						  BOND_SLAVE_NOTIFY_LATER);
			if (slave->link_failure_count < UINT_MAX)
				slave->link_failure_count++;

@@ -2830,7 +2874,8 @@ static bool bond_ab_arp_probe(struct bonding *bond)
	if (!new_slave)
		goto check_state;

	bond_set_slave_link_state(new_slave, BOND_LINK_BACK);
	bond_set_slave_link_state(new_slave, BOND_LINK_BACK,
				  BOND_SLAVE_NOTIFY_LATER);
	bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER);
	bond_arp_send_all(bond, new_slave);
	new_slave->last_link_up = jiffies;
@@ -2838,7 +2883,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)

check_state:
	bond_for_each_slave_rcu(bond, slave, iter) {
		if (slave->should_notify) {
		if (slave->should_notify || slave->should_notify_link) {
			should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW;
			break;
		}
@@ -2893,8 +2938,10 @@ static void bond_activebackup_arp_mon(struct work_struct *work)
		if (should_notify_peers)
			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
						 bond->dev);
		if (should_notify_rtnl)
		if (should_notify_rtnl) {
			bond_slave_state_notify(bond);
			bond_slave_link_notify(bond);
		}

		rtnl_unlock();
	}
+76 −3
Original line number Diff line number Diff line
@@ -105,6 +105,9 @@ struct mlxsw_core {
		struct debugfs_blob_wrapper vsd_blob;
		struct debugfs_blob_wrapper psid_blob;
	} dbg;
	struct {
		u8 *mapping; /* lag_id+port_index to local_port mapping */
	} lag;
	struct mlxsw_hwmon *hwmon;
	unsigned long driver_priv[0];
	/* driver_priv has to be always the last item */
@@ -815,6 +818,17 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
		goto err_alloc_stats;
	}

	if (mlxsw_driver->profile->used_max_lag &&
	    mlxsw_driver->profile->used_max_port_per_lag) {
		alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag *
			     mlxsw_driver->profile->max_port_per_lag;
		mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL);
		if (!mlxsw_core->lag.mapping) {
			err = -ENOMEM;
			goto err_alloc_lag_mapping;
		}
	}

	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile);
	if (err)
		goto err_bus_init;
@@ -847,6 +861,8 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
err_emad_init:
	mlxsw_bus->fini(bus_priv);
err_bus_init:
	kfree(mlxsw_core->lag.mapping);
err_alloc_lag_mapping:
	free_percpu(mlxsw_core->pcpu_stats);
err_alloc_stats:
	kfree(mlxsw_core);
@@ -865,6 +881,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core)
	mlxsw_hwmon_fini(mlxsw_core->hwmon);
	mlxsw_emad_fini(mlxsw_core);
	mlxsw_core->bus->fini(mlxsw_core->bus_priv);
	kfree(mlxsw_core->lag.mapping);
	free_percpu(mlxsw_core->pcpu_stats);
	kfree(mlxsw_core);
	mlxsw_core_driver_put(device_kind);
@@ -1196,11 +1213,25 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
	struct mlxsw_rx_listener_item *rxl_item;
	const struct mlxsw_rx_listener *rxl;
	struct mlxsw_core_pcpu_stats *pcpu_stats;
	u8 local_port = rx_info->sys_port;
	u8 local_port;
	bool found = false;

	dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: sys_port = %d, trap_id = 0x%x\n",
			    __func__, rx_info->sys_port, rx_info->trap_id);
	if (rx_info->is_lag) {
		dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n",
				    __func__, rx_info->u.lag_id,
				    rx_info->trap_id);
		/* Upper layer does not care if the skb came from LAG or not,
		 * so just get the local_port for the lag port and push it up.
		 */
		local_port = mlxsw_core_lag_mapping_get(mlxsw_core,
							rx_info->u.lag_id,
							rx_info->lag_port_index);
	} else {
		local_port = rx_info->u.sys_port;
	}

	dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n",
			    __func__, local_port, rx_info->trap_id);

	if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) ||
	    (local_port >= MLXSW_PORT_MAX_PORTS))
@@ -1244,6 +1275,48 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
}
EXPORT_SYMBOL(mlxsw_core_skb_receive);

static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core,
					u16 lag_id, u8 port_index)
{
	return mlxsw_core->driver->profile->max_port_per_lag * lag_id +
	       port_index;
}

void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core,
				u16 lag_id, u8 port_index, u8 local_port)
{
	int index = mlxsw_core_lag_mapping_index(mlxsw_core,
						 lag_id, port_index);

	mlxsw_core->lag.mapping[index] = local_port;
}
EXPORT_SYMBOL(mlxsw_core_lag_mapping_set);

u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
			      u16 lag_id, u8 port_index)
{
	int index = mlxsw_core_lag_mapping_index(mlxsw_core,
						 lag_id, port_index);

	return mlxsw_core->lag.mapping[index];
}
EXPORT_SYMBOL(mlxsw_core_lag_mapping_get);

void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
				  u16 lag_id, u8 local_port)
{
	int i;

	for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) {
		int index = mlxsw_core_lag_mapping_index(mlxsw_core,
							 lag_id, i);

		if (mlxsw_core->lag.mapping[index] == local_port)
			mlxsw_core->lag.mapping[index] = 0;
	}
}
EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear);

int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
		   u32 in_mod, bool out_mbox_direct,
		   char *in_mbox, size_t in_mbox_size,
+13 −1
Original line number Diff line number Diff line
@@ -112,13 +112,25 @@ int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
		    const struct mlxsw_reg_info *reg, char *payload);

struct mlxsw_rx_info {
	bool is_lag;
	union {
		u16 sys_port;
		u16 lag_id;
	} u;
	u8 lag_port_index;
	int trap_id;
};

void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
			    struct mlxsw_rx_info *rx_info);

void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core,
				u16 lag_id, u8 port_index, u8 local_port);
u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
			      u16 lag_id, u8 port_index);
void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
				  u16 lag_id, u8 local_port);

#define MLXSW_CONFIG_PROFILE_SWID_COUNT 8

struct mlxsw_swid_config {
Loading