Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eeb66cdb authored by Saeed Mahameed's avatar Saeed Mahameed
Browse files

net/mlx5: Separate between E-Switch and MPFS



Multi-Physical Function Switch (MPFs) is required for when multi-PF
configuration is enabled to allow passing user configured unicast MAC
addresses to the requesting PF.

Before this patch eswitch.c used to manage the HW MPFS l2 table,
E-Switch always (regardless of sriov) enabled vport(0) (NIC PF) vport's
contexts update on unicast mac address list changes, to populate the PF's
MPFS L2 table accordingly.

In downstream patch we would like to allow compiling the driver without
E-Switch functionalities, for that we move MPFS l2 table logic out
of eswitch.c into its own file, and provide Kconfig flag (MLX5_MPFS) to
allow compiling out MPFS for those who don't want Multi-PF support.

NIC PF netdevice will now directly update MPFS l2 table via the new MPFS
API. VF netdevice has no access to MPFS L2 table, so E-Switch will remain
responsible of updating its MPFS l2 table on behalf of its VFs.

Due to this change we also don't require enabling vport(0) (PF vport)
unicast mac changes events anymore, for when SRIOV is not enabled.
Which means E-Switch is now activated only on SRIOV activation, and not
required otherwise.

Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Cc: Jes Sorensen <jsorensen@fb.com>
Cc: kernel-team@fb.com
parent a9f7705f
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -34,6 +34,16 @@ config MLX5_CORE_EN
	---help---
	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.

config MLX5_MPFS
        bool "Mellanox Technologies MLX5 MPFS support"
        depends on MLX5_CORE_EN
	default y
        ---help---
	  Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
          support in ConnectX NIC. MPFs is required for when multi-PF configuration
          is enabled to allow passing user configured unicast MAC addresses to the
          requesting PF.

config MLX5_CORE_EN_DCB
	bool "Data Center Bridging (DCB) Support"
	default y
+2 −0
Original line number Diff line number Diff line
@@ -16,6 +16,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += eswitch.o eswitch_offloads.o \
		en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
		en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o

mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o

mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o

mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
+16 −1
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include <linux/tcp.h>
#include <linux/mlx5/fs.h>
#include "en.h"
#include "lib/mpfs.h"

static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
				  struct mlx5e_l2_rule *ai, int type);
@@ -65,6 +66,7 @@ struct mlx5e_l2_hash_node {
	struct hlist_node          hlist;
	u8                         action;
	struct mlx5e_l2_rule ai;
	bool   mpfs;
};

static inline int mlx5e_hash_l2(u8 *addr)
@@ -362,17 +364,30 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
				    struct mlx5e_l2_hash_node *hn)
{
	switch (hn->action) {
	u8 action = hn->action;
	int l2_err = 0;

	switch (action) {
	case MLX5E_ACTION_ADD:
		mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH);
		if (!is_multicast_ether_addr(hn->ai.addr)) {
			l2_err = mlx5_mpfs_add_mac(priv->mdev, hn->ai.addr);
			hn->mpfs = !l2_err;
		}
		hn->action = MLX5E_ACTION_NONE;
		break;

	case MLX5E_ACTION_DEL:
		if (!is_multicast_ether_addr(hn->ai.addr) && hn->mpfs)
			l2_err = mlx5_mpfs_del_mac(priv->mdev, hn->ai.addr);
		mlx5e_del_l2_flow_rule(priv, &hn->ai);
		mlx5e_del_l2_from_hash(hn);
		break;
	}

	if (l2_err)
		netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n",
			    action == MLX5E_ACTION_ADD ? "add" : "del", hn->ai.addr, l2_err);
}

static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
+38 −152
Original line number Diff line number Diff line
@@ -46,19 +46,13 @@ enum {
	MLX5_ACTION_DEL  = 2,
};

/* E-Switch UC L2 table hash node */
struct esw_uc_addr {
	struct l2addr_node node;
	u32                table_index;
	u32                vport;
};

/* Vport UC/MC hash node */
struct vport_addr {
	struct l2addr_node     node;
	u8                     action;
	u32                    vport;
	struct mlx5_flow_handle *flow_rule; /* SRIOV only */
	struct mlx5_flow_handle *flow_rule;
	bool mpfs; /* UC MAC was added to MPFs */
	/* A flag indicating that mac was added due to mc promiscuous vport */
	bool mc_promisc;
};
@@ -154,81 +148,6 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
	return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in));
}

/* HW L2 Table (MPFS) management */
static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
				  u8 *mac, u8 vlan_valid, u16 vlan)
{
	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]   = {0};
	u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
	u8 *in_mac_addr;

	MLX5_SET(set_l2_table_entry_in, in, opcode,
		 MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
	MLX5_SET(set_l2_table_entry_in, in, table_index, index);
	MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid);
	MLX5_SET(set_l2_table_entry_in, in, vlan, vlan);

	in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
	ether_addr_copy(&in_mac_addr[2], mac);

	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}

static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
{
	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]   = {0};
	u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};

	MLX5_SET(delete_l2_table_entry_in, in, opcode,
		 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
	MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}

static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix)
{
	int err = 0;

	*ix = find_first_zero_bit(l2_table->bitmap, l2_table->size);
	if (*ix >= l2_table->size)
		err = -ENOSPC;
	else
		__set_bit(*ix, l2_table->bitmap);

	return err;
}

static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix)
{
	__clear_bit(ix, l2_table->bitmap);
}

static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac,
			      u8 vlan_valid, u16 vlan,
			      u32 *index)
{
	struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;
	int err;

	err = alloc_l2_table_index(l2_table, index);
	if (err)
		return err;

	err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan);
	if (err)
		free_l2_table_index(l2_table, *index);

	return err;
}

static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index)
{
	struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;

	del_l2_table_entry_cmd(dev, index);
	free_l2_table_index(l2_table, index);
}

/* E-Switch FDB */
static struct mlx5_flow_handle *
__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
@@ -455,65 +374,60 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,

static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
{
	struct hlist_head *hash = esw->l2_table.l2_hash;
	struct esw_uc_addr *esw_uc;
	u8 *mac = vaddr->node.addr;
	u32 vport = vaddr->vport;
	int err;

	esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
	if (esw_uc) {
	/* Skip mlx5_mpfs_add_mac for PFs,
	 * it is already done by the PF netdev in mlx5e_execute_l2_action
	 */
	if (!vport)
		goto fdb_add;

	err = mlx5_mpfs_add_mac(esw->dev, mac);
	if (err) {
		esw_warn(esw->dev,
			 "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n",
			 mac, vport, esw_uc->vport);
		return -EEXIST;
			 "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n",
			 mac, vport, err);
		return err;
	}
	vaddr->mpfs = true;

	esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL);
	if (!esw_uc)
		return -ENOMEM;
	esw_uc->vport = vport;

	err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index);
	if (err)
		goto abort;

fdb_add:
	/* SRIOV is enabled: Forward UC MAC to vport */
	if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY)
		vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);

	esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
		  vport, mac, esw_uc->table_index, vaddr->flow_rule);
	return err;
abort:
	l2addr_hash_del(esw_uc);
	esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
		  vport, mac, vaddr->flow_rule);

	return err;
}

static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
{
	struct hlist_head *hash = esw->l2_table.l2_hash;
	struct esw_uc_addr *esw_uc;
	u8 *mac = vaddr->node.addr;
	u32 vport = vaddr->vport;
	int err = 0;

	esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
	if (!esw_uc || esw_uc->vport != vport) {
		esw_debug(esw->dev,
			  "MAC(%pM) doesn't belong to vport (%d)\n",
			  mac, vport);
		return -EINVAL;
	}
	esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n",
		  vport, mac, esw_uc->table_index, vaddr->flow_rule);
	/* Skip mlx5_mpfs_del_mac for PFs,
	 * it is already done by the PF netdev in mlx5e_execute_l2_action
	 */
	if (!vport || !vaddr->mpfs)
		goto fdb_del;

	del_l2_table_entry(esw->dev, esw_uc->table_index);
	err = mlx5_mpfs_del_mac(esw->dev, mac);
	if (err)
		esw_warn(esw->dev,
			 "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n",
			 mac, vport, err);
	vaddr->mpfs = false;

fdb_del:
	if (vaddr->flow_rule)
		mlx5_del_flow_rules(vaddr->flow_rule);
	vaddr->flow_rule = NULL;

	l2addr_hash_del(esw_uc);
	return 0;
}

@@ -1635,7 +1549,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)

	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
	esw->mode = mode;
	esw_disable_vport(esw, 0);

	if (mode == SRIOV_LEGACY)
		err = esw_create_legacy_fdb_table(esw, nvfs + 1);
@@ -1648,7 +1561,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
	if (err)
		esw_warn(esw->dev, "Failed to create eswitch TSAR");

	enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE;
	/* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
	 * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
	 * 2. FDB/Eswitch is programmed by user space tools
	 */
	enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
	for (i = 0; i <= nvfs; i++)
		esw_enable_vport(esw, i, enabled_events);

@@ -1657,7 +1574,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
	return 0;

abort:
	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
	esw->mode = SRIOV_NONE;
	return err;
}
@@ -1691,30 +1607,10 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
		esw_offloads_cleanup(esw, nvports);

	esw->mode = SRIOV_NONE;
	/* VPORT 0 (PF) must be enabled back with non-sriov configuration */
	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
}

void mlx5_eswitch_attach(struct mlx5_eswitch *esw)
{
	if (!ESW_ALLOWED(esw))
		return;

	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
	/* VF Vports will be enabled when SRIOV is enabled */
}

void mlx5_eswitch_detach(struct mlx5_eswitch *esw)
{
	if (!ESW_ALLOWED(esw))
		return;

	esw_disable_vport(esw, 0);
}

int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
	int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
	int total_vports = MLX5_TOTAL_VPORTS(dev);
	struct mlx5_eswitch *esw;
	int vport_num;
@@ -1724,8 +1620,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
		return 0;

	esw_info(dev,
		 "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n",
		 total_vports, l2_table_size,
		 "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
		 total_vports,
		 MLX5_MAX_UC_PER_VPORT(dev),
		 MLX5_MAX_MC_PER_VPORT(dev));

@@ -1735,14 +1631,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)

	esw->dev = dev;

	esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size),
				   sizeof(uintptr_t), GFP_KERNEL);
	if (!esw->l2_table.bitmap) {
		err = -ENOMEM;
		goto abort;
	}
	esw->l2_table.size = l2_table_size;

	esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
	if (!esw->work_queue) {
		err = -ENOMEM;
@@ -1793,7 +1681,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
abort:
	if (esw->work_queue)
		destroy_workqueue(esw->work_queue);
	kfree(esw->l2_table.bitmap);
	kfree(esw->vports);
	kfree(esw->offloads.vport_reps);
	kfree(esw);
@@ -1809,7 +1696,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)

	esw->dev->priv.eswitch = NULL;
	destroy_workqueue(esw->work_queue);
	kfree(esw->l2_table.bitmap);
	kfree(esw->offloads.vport_reps);
	kfree(esw->vports);
	kfree(esw);
+1 −54
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@
#include <linux/if_link.h>
#include <net/devlink.h>
#include <linux/mlx5/device.h>
#include "lib/mpfs.h"

#define MLX5_MAX_UC_PER_VPORT(dev) \
	(1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
@@ -44,9 +45,6 @@
#define MLX5_MAX_MC_PER_VPORT(dev) \
	(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))

#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
#define MLX5_L2_ADDR_HASH(addr) (addr[5])

#define FDB_UPLINK_VPORT 0xffff

#define MLX5_MIN_BW_SHARE 1
@@ -54,48 +52,6 @@
#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
	min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit)

/* L2 -mac address based- hash helpers */
struct l2addr_node {
	struct hlist_node hlist;
	u8                addr[ETH_ALEN];
};

#define for_each_l2hash_node(hn, tmp, hash, i) \
	for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
		hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)

#define l2addr_hash_find(hash, mac, type) ({                \
	int ix = MLX5_L2_ADDR_HASH(mac);                    \
	bool found = false;                                 \
	type *ptr = NULL;                                   \
							    \
	hlist_for_each_entry(ptr, &hash[ix], node.hlist)    \
		if (ether_addr_equal(ptr->node.addr, mac)) {\
			found = true;                       \
			break;                              \
		}                                           \
	if (!found)                                         \
		ptr = NULL;                                 \
	ptr;                                                \
})

#define l2addr_hash_add(hash, mac, type, gfp) ({            \
	int ix = MLX5_L2_ADDR_HASH(mac);                    \
	type *ptr = NULL;                                   \
							    \
	ptr = kzalloc(sizeof(type), gfp);                   \
	if (ptr) {                                          \
		ether_addr_copy(ptr->node.addr, mac);       \
		hlist_add_head(&ptr->node.hlist, &hash[ix]);\
	}                                                   \
	ptr;                                                \
})

#define l2addr_hash_del(ptr) ({                             \
	hlist_del(&ptr->node.hlist);                        \
	kfree(ptr);                                         \
})

struct vport_ingress {
	struct mlx5_flow_table *acl;
	struct mlx5_flow_group *allow_untagged_spoofchk_grp;
@@ -150,12 +106,6 @@ struct mlx5_vport {
	u16                     enabled_events;
};

struct mlx5_l2_table {
	struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE];
	u32                  size;
	unsigned long        *bitmap;
};

struct mlx5_eswitch_fdb {
	void *fdb;
	union {
@@ -222,7 +172,6 @@ struct esw_mc_addr { /* SRIOV only */

struct mlx5_eswitch {
	struct mlx5_core_dev    *dev;
	struct mlx5_l2_table    l2_table;
	struct mlx5_eswitch_fdb fdb_table;
	struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
	struct workqueue_struct *work_queue;
@@ -250,8 +199,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
void mlx5_eswitch_attach(struct mlx5_eswitch *esw);
void mlx5_eswitch_detach(struct mlx5_eswitch *esw);
void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
Loading