Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 53d94892 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlx5-bulk-flow-stats-sriov-tc-offloads'



Saeed Mahameed says:

====================
Mellanox 100G mlx5 Bulk flow statistics and SRIOV TC offloads

This series from Amir and Or deals with two enhancements for the mlx5 TC offloads.

The 1st two patches add bulk reading of flow counters. Few bulk counter queries are
used instead of issuing thousands firmware commands per second to get statistics of all
flows set to HW.

The next patches add TC based SRIOV offloading to mlx5, as a follow up for the e-switch
offloads mode and the VF representors. When the e-switch is set to the (new) "offloads"
mode, we can now offload TC/flower drop and forward rules, the forward action we offload
is TC mirred/redirect.

The above is done by the VF representor netdevices exporting the setup_tc ndo where from
there we're re-using and enhancing the existing mlx5 TC offloads sub-module which now
works for both the NIC and the SRIOV cases.

The series is applied on top b38a75d2 ('mlxsw: core: Trace EMAD messages')
and it has no merge issues with the on-going net submission ('mlx5 tx timeout watchdog fixes')

V2:
    - Fixed compilation warning.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 18017723 d957b4e3
Loading
Loading
Loading
Loading
+39 −1
Original line number Diff line number Diff line
@@ -33,9 +33,11 @@
#include <generated/utsrelease.h>
#include <linux/mlx5/fs.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>

#include "eswitch.h"
#include "en.h"
#include "en_tc.h"

static const char mlx5e_rep_driver_name[] = "mlx5e_rep";

@@ -201,6 +203,10 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,

	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
		mlx5e_remove_sqs_fwd_rules(priv);

	/* clean (and re-init) existing uplink offloaded TC rules */
	mlx5e_tc_cleanup(priv);
	mlx5e_tc_init(priv);
}

static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
@@ -217,6 +223,29 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
	return 0;
}

static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
				  __be16 proto, struct tc_to_netdev *tc)
{
	struct mlx5e_priv *priv = netdev_priv(dev);

	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
		return -EOPNOTSUPP;

	switch (tc->type) {
	case TC_SETUP_CLSFLOWER:
		switch (tc->cls_flower->command) {
		case TC_CLSFLOWER_REPLACE:
			return mlx5e_configure_flower(priv, proto, tc->cls_flower);
		case TC_CLSFLOWER_DESTROY:
			return mlx5e_delete_flower(priv, tc->cls_flower);
		case TC_CLSFLOWER_STATS:
			return mlx5e_stats_flower(priv, tc->cls_flower);
		}
	default:
		return -EOPNOTSUPP;
	}
}

static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
	.switchdev_port_attr_get	= mlx5e_attr_get,
};
@@ -226,6 +255,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
	.ndo_stop                = mlx5e_close,
	.ndo_start_xmit          = mlx5e_xmit,
	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
	.ndo_get_stats64         = mlx5e_get_stats,
};

@@ -279,7 +309,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
	netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
#endif

	netdev->features	 |= NETIF_F_VLAN_CHALLENGED;
	netdev->features	 |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC;
	netdev->hw_features      |= NETIF_F_HW_TC;

	eth_hw_addr_random(netdev);
}
@@ -323,8 +354,14 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
	}
	rep->vport_rx_rule = flow_rule;

	err = mlx5e_tc_init(priv);
	if (err)
		goto err_del_flow_rule;

	return 0;

err_del_flow_rule:
	mlx5_del_flow_rule(rep->vport_rx_rule);
err_destroy_direct_tirs:
	mlx5e_destroy_direct_tirs(priv);
err_destroy_direct_rqts:
@@ -338,6 +375,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
	struct mlx5_eswitch_rep *rep = priv->ppriv;
	int i;

	mlx5e_tc_cleanup(priv);
	mlx5_del_flow_rule(rep->vport_rx_rule);
	mlx5e_destroy_direct_tirs(priv);
	for (i = 0; i < priv->params.num_channels; i++)
+96 −20
Original line number Diff line number Diff line
@@ -37,8 +37,11 @@
#include <linux/mlx5/fs.h>
#include <linux/mlx5/device.h>
#include <linux/rhashtable.h>
#include <net/switchdev.h>
#include <net/tc_act/tc_mirred.h>
#include "en.h"
#include "en_tc.h"
#include "eswitch.h"

struct mlx5e_tc_flow {
	struct rhash_head	node;
@@ -49,7 +52,7 @@ struct mlx5e_tc_flow {
#define MLX5E_TC_TABLE_NUM_ENTRIES 1024
#define MLX5E_TC_TABLE_NUM_GROUPS 4

static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
						    struct mlx5_flow_spec *spec,
						    u32 action, u32 flow_tag)
{
@@ -62,7 +65,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
		dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
		dest.ft = priv->fs.vlan.ft.t;
	} else {
	} else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
		counter = mlx5_fc_create(dev, true);
		if (IS_ERR(counter))
			return ERR_CAST(counter);
@@ -109,6 +112,22 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
	return rule;
}

static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
						    struct mlx5_flow_spec *spec,
						    u32 action, u32 dst_vport)
{
	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
	struct mlx5_eswitch_rep *rep = priv->ppriv;
	u32 src_vport;

	if (rep->vport) /* set source vport for the flow */
		src_vport = rep->vport;
	else
		src_vport = FDB_UPLINK_VPORT;

	return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport);
}

static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
			      struct mlx5_flow_rule *rule)
{
@@ -120,7 +139,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,

	mlx5_fc_destroy(priv->mdev, counter);

	if (!mlx5e_tc_num_filters(priv)) {
	if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
		mlx5_destroy_flow_table(priv->fs.tc.t);
		priv->fs.tc.t = NULL;
	}
@@ -295,7 +314,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
	return 0;
}

static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
				u32 *action, u32 *flow_tag)
{
	const struct tc_action *a;
@@ -339,16 +358,66 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
	return 0;
}

static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
				u32 *action, u32 *dest_vport)
{
	const struct tc_action *a;

	if (tc_no_actions(exts))
		return -EINVAL;

	*action = 0;

	tc_for_each_action(a, exts) {
		/* Only support a single action per rule */
		if (*action)
			return -EINVAL;

		if (is_tcf_gact_shot(a)) {
			*action = MLX5_FLOW_CONTEXT_ACTION_DROP |
				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
			continue;
		}

		if (is_tcf_mirred_redirect(a)) {
			int ifindex = tcf_mirred_ifindex(a);
			struct net_device *out_dev;
			struct mlx5e_priv *out_priv;
			struct mlx5_eswitch_rep *out_rep;

			out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);

			if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) {
				pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
				       priv->netdev->name, out_dev->name);
				return -EINVAL;
			}

			out_priv = netdev_priv(out_dev);
			out_rep  = out_priv->ppriv;
			if (out_rep->vport == 0)
				*dest_vport = FDB_UPLINK_VPORT;
			else
				*dest_vport = out_rep->vport;
			*action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
			continue;
		}

		return -EINVAL;
	}
	return 0;
}

int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
			   struct tc_cls_flower_offload *f)
{
	struct mlx5e_tc_table *tc = &priv->fs.tc;
	int err = 0;
	u32 flow_tag;
	u32 action;
	u32 flow_tag, action, dest_vport = 0;
	struct mlx5e_tc_flow *flow;
	struct mlx5_flow_spec *spec;
	struct mlx5_flow_rule *old = NULL;
	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;

	flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
				      tc->ht_params);
@@ -369,28 +438,35 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
	if (err < 0)
		goto err_free;

	err = parse_tc_actions(priv, f->exts, &action, &flow_tag);
	if (esw && esw->mode == SRIOV_OFFLOADS) {
		err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport);
		if (err < 0)
			goto err_free;

	err = rhashtable_insert_fast(&tc->ht, &flow->node,
				     tc->ht_params);
	if (err)
		flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport);
	} else {
		err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag);
		if (err < 0)
			goto err_free;
		flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag);
	}

	flow->rule = mlx5e_tc_add_flow(priv, spec, action, flow_tag);
	if (IS_ERR(flow->rule)) {
		err = PTR_ERR(flow->rule);
		goto err_hash_del;
		goto err_free;
	}

	err = rhashtable_insert_fast(&tc->ht, &flow->node,
				     tc->ht_params);
	if (err)
		goto err_del_rule;

	if (old)
		mlx5e_tc_del_flow(priv, old);

	goto out;

err_hash_del:
	rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
err_del_rule:
	mlx5_del_flow_rule(flow->rule);

err_free:
	if (!old)
+7 −0
Original line number Diff line number Diff line
@@ -145,6 +145,7 @@ struct mlx5_eswitch_fdb {
		} legacy;

		struct offloads_fdb {
			struct mlx5_flow_table *fdb;
			struct mlx5_flow_group *send_to_vport_grp;
			struct mlx5_flow_group *miss_grp;
			struct mlx5_flow_rule  *miss_rule;
@@ -221,6 +222,12 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
				 int vport,
				 struct ifla_vf_stats *vf_stats);

struct mlx5_flow_spec;

struct mlx5_flow_rule *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
				struct mlx5_flow_spec *spec,
				u32 action, u32 src_vport, u32 dst_vport);
struct mlx5_flow_rule *
mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);

+70 −7
Original line number Diff line number Diff line
@@ -38,6 +38,54 @@
#include "mlx5_core.h"
#include "eswitch.h"

enum {
	FDB_FAST_PATH = 0,
	FDB_SLOW_PATH
};

struct mlx5_flow_rule *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
				struct mlx5_flow_spec *spec,
				u32 action, u32 src_vport, u32 dst_vport)
{
	struct mlx5_flow_destination dest = { 0 };
	struct mlx5_fc *counter = NULL;
	struct mlx5_flow_rule *rule;
	void *misc;

	if (esw->mode != SRIOV_OFFLOADS)
		return ERR_PTR(-EOPNOTSUPP);

	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
		dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
		dest.vport_num = dst_vport;
		action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
	} else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
		counter = mlx5_fc_create(esw->dev, true);
		if (IS_ERR(counter))
			return ERR_CAST(counter);
		dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
		dest.counter = counter;
	}

	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
	MLX5_SET(fte_match_set_misc, misc, source_port, src_vport);

	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);

	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
				      MLX5_MATCH_MISC_PARAMETERS;

	rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb,
				  spec, action, 0, &dest);

	if (IS_ERR(rule))
		mlx5_fc_destroy(esw->dev, counter);

	return rule;
}

static struct mlx5_flow_rule *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
{
@@ -149,7 +197,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
	dest.vport_num = 0;

	flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
	flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
				       0, &dest);
	if (IS_ERR(flow_rule)) {
@@ -165,6 +213,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
}

#define MAX_PF_SQ 256
#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */
#define ESW_OFFLOADS_NUM_GROUPS  4

static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
{
@@ -190,15 +240,25 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
	esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n",
		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));

	table_size = nvports + MAX_PF_SQ + 1;
	fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0);
	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
						  ESW_OFFLOADS_NUM_ENTRIES,
						  ESW_OFFLOADS_NUM_GROUPS, 0);
	if (IS_ERR(fdb)) {
		err = PTR_ERR(fdb);
		esw_warn(dev, "Failed to create FDB Table err %d\n", err);
		goto fdb_err;
		esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
		goto fast_fdb_err;
	}
	esw->fdb_table.fdb = fdb;

	table_size = nvports + MAX_PF_SQ + 1;
	fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0);
	if (IS_ERR(fdb)) {
		err = PTR_ERR(fdb);
		esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
		goto slow_fdb_err;
	}
	esw->fdb_table.offloads.fdb = fdb;

	/* create send-to-vport group */
	memset(flow_group_in, 0, inlen);
	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -247,8 +307,10 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
miss_err:
	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
	mlx5_destroy_flow_table(fdb);
fdb_err:
	mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
slow_fdb_err:
	mlx5_destroy_flow_table(esw->fdb_table.fdb);
fast_fdb_err:
ns_err:
	kvfree(flow_group_in);
	return err;
@@ -264,6 +326,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);

	mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
	mlx5_destroy_flow_table(esw->fdb_table.fdb);
}

+67 −0
Original line number Diff line number Diff line
@@ -413,3 +413,70 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,

	return 0;
}

struct mlx5_cmd_fc_bulk {
	u16 id;
	int num;
	int outlen;
	u32 out[0];
};

struct mlx5_cmd_fc_bulk *
mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
{
	struct mlx5_cmd_fc_bulk *b;
	int outlen = sizeof(*b) +
		MLX5_ST_SZ_BYTES(query_flow_counter_out) +
		MLX5_ST_SZ_BYTES(traffic_counter) * num;

	b = kzalloc(outlen, GFP_KERNEL);
	if (!b)
		return NULL;

	b->id = id;
	b->num = num;
	b->outlen = outlen;

	return b;
}

void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
{
	kfree(b);
}

int
mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
{
	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];

	memset(in, 0, sizeof(in));

	MLX5_SET(query_flow_counter_in, in, opcode,
		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
	MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
	MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);

	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
					  b->out, b->outlen);
}

void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
			  struct mlx5_cmd_fc_bulk *b, u16 id,
			  u64 *packets, u64 *bytes)
{
	int index = id - b->id;
	void *stats;

	if (index < 0 || index >= b->num) {
		mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n",
			       id, b->id, b->id + b->num - 1);
		return;
	}

	stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
			     flow_statistics[index]);
	*packets = MLX5_GET64(traffic_counter, stats, packets);
	*bytes = MLX5_GET64(traffic_counter, stats, octets);
}
Loading