Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 36f87780 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'packet-sampling-offload'

Jiri Pirko says:

====================
Add support for offloading packet-sampling

Yotam says:

The first patch introduces the psample module, a netlink channel dedicated
to packet sampling implemented using generic netlink. This module provides
a generic way for kernel modules to sample packets, while not being tied
to any specific subsystem like NFLOG.

The second patch adds the sample tc action, which uses psample to randomly
sample packets that match a classifier. The user can configure the psample
group number, the sampling rate and the packet's truncation (to save
kernel-user traffic).

The last two patches add the support for offloading the matchall-sample
tc command in the mlxsw driver, for ingress qdiscs.

An example for psample usage can be found in the libpsample project at:
https://github.com/Mellanox/libpsample



v1->v2:
- Reword first patch's commit message
- Fix typo in comment in second patch
- Change order of tc_sample uapi enum to match convention
- Rename act_sample action callback tcf_sample -> tcf_sample_act
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d36db83b 98d0f7b9
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -9957,6 +9957,13 @@ L: linuxppc-dev@lists.ozlabs.org
S:	Maintained
F:	drivers/block/ps3vram.c

PSAMPLE PACKET SAMPLING SUPPORT:
M:	Yotam Gigi <yotamg@mellanox.com>
S:	Maintained
F:	net/psample
F:	include/net/psample.h
F:	include/uapi/linux/psample.h

PSTORE FILESYSTEM
M:	Anton Vorontsov <anton@enomsg.org>
M:	Colin Cross <ccross@android.com>
+41 −0
Original line number Diff line number Diff line
@@ -4965,6 +4965,46 @@ static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port,
					   MLXSW_REG_MLCR_DURATION_MAX : 0);
}

/* MPSC - Monitoring Packet Sampling Configuration Register
 * --------------------------------------------------------
 * MPSC Register is used to configure the Packet Sampling mechanism.
 */
#define MLXSW_REG_MPSC_ID 0x9080
#define MLXSW_REG_MPSC_LEN 0x1C

MLXSW_REG_DEFINE(mpsc, MLXSW_REG_MPSC_ID, MLXSW_REG_MPSC_LEN);

/* reg_mpsc_local_port
 * Local port number
 * Not supported for CPU port
 * Access: Index
 */
MLXSW_ITEM32(reg, mpsc, local_port, 0x00, 16, 8);

/* reg_mpsc_e
 * Enable sampling on port local_port
 * Access: RW
 */
MLXSW_ITEM32(reg, mpsc, e, 0x04, 30, 1);

#define MLXSW_REG_MPSC_RATE_MAX 3500000000UL

/* reg_mpsc_rate
 * Sampling rate = 1 out of rate packets (with randomization around
 * the point). Valid values are: 1 to MLXSW_REG_MPSC_RATE_MAX
 * Access: RW
 */
MLXSW_ITEM32(reg, mpsc, rate, 0x08, 0, 32);

static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e,
				       u32 rate)
{
	MLXSW_REG_ZERO(mpsc, payload);
	mlxsw_reg_mpsc_local_port_set(payload, local_port);
	mlxsw_reg_mpsc_e_set(payload, e);
	mlxsw_reg_mpsc_rate_set(payload, rate);
}

/* SBPR - Shared Buffer Pools Register
 * -----------------------------------
 * The SBPR configures and retrieves the shared buffer pools and configuration.
@@ -5429,6 +5469,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
	MLXSW_REG(mpat),
	MLXSW_REG(mpar),
	MLXSW_REG(mlcr),
	MLXSW_REG(mpsc),
	MLXSW_REG(sbpr),
	MLXSW_REG(sbcm),
	MLXSW_REG(sbpm),
+111 −0
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
#include <net/netevent.h>
#include <net/tc_act/tc_sample.h>

#include "spectrum.h"
#include "pci.h"
@@ -469,6 +470,16 @@ static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from,
	mlxsw_sp_span_inspected_port_unbind(from, span_entry, type);
}

static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
				    bool enable, u32 rate)
{
	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
	char mpsc_pl[MLXSW_REG_MPSC_LEN];

	mlxsw_reg_mpsc_pack(mpsc_pl, mlxsw_sp_port->local_port, enable, rate);
	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpsc), mpsc_pl);
}

static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port,
					  bool is_up)
{
@@ -1218,6 +1229,51 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
	mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type);
}

static int
mlxsw_sp_port_add_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port,
				      struct tc_cls_matchall_offload *cls,
				      const struct tc_action *a,
				      bool ingress)
{
	int err;

	if (!mlxsw_sp_port->sample)
		return -EOPNOTSUPP;
	if (rtnl_dereference(mlxsw_sp_port->sample->psample_group)) {
		netdev_err(mlxsw_sp_port->dev, "sample already active\n");
		return -EEXIST;
	}
	if (tcf_sample_rate(a) > MLXSW_REG_MPSC_RATE_MAX) {
		netdev_err(mlxsw_sp_port->dev, "sample rate not supported\n");
		return -EOPNOTSUPP;
	}

	rcu_assign_pointer(mlxsw_sp_port->sample->psample_group,
			   tcf_sample_psample_group(a));
	mlxsw_sp_port->sample->truncate = tcf_sample_truncate(a);
	mlxsw_sp_port->sample->trunc_size = tcf_sample_trunc_size(a);
	mlxsw_sp_port->sample->rate = tcf_sample_rate(a);

	err = mlxsw_sp_port_sample_set(mlxsw_sp_port, true, tcf_sample_rate(a));
	if (err)
		goto err_port_sample_set;
	return 0;

err_port_sample_set:
	RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
	return err;
}

static void
mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port)
{
	if (!mlxsw_sp_port->sample)
		return;

	mlxsw_sp_port_sample_set(mlxsw_sp_port, false, 1);
	RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
}

static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
					  __be16 protocol,
					  struct tc_cls_matchall_offload *cls,
@@ -1248,6 +1304,10 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
		mirror = &mall_tc_entry->mirror;
		err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port,
							    mirror, a, ingress);
	} else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) {
		mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE;
		err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, cls,
							    a, ingress);
	} else {
		err = -EOPNOTSUPP;
	}
@@ -1281,6 +1341,9 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
		mlxsw_sp_port_del_cls_matchall_mirror(mlxsw_sp_port,
						      &mall_tc_entry->mirror);
		break;
	case MLXSW_SP_PORT_MALL_SAMPLE:
		mlxsw_sp_port_del_cls_matchall_sample(mlxsw_sp_port);
		break;
	default:
		WARN_ON(1);
	}
@@ -2259,6 +2322,13 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
		goto err_alloc_stats;
	}

	mlxsw_sp_port->sample = kzalloc(sizeof(*mlxsw_sp_port->sample),
					GFP_KERNEL);
	if (!mlxsw_sp_port->sample) {
		err = -ENOMEM;
		goto err_alloc_sample;
	}

	mlxsw_sp_port->hw_stats.cache =
		kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL);

@@ -2387,6 +2457,8 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
err_port_swid_set:
	kfree(mlxsw_sp_port->hw_stats.cache);
err_alloc_hw_stats:
	kfree(mlxsw_sp_port->sample);
err_alloc_sample:
	free_percpu(mlxsw_sp_port->pcpu_stats);
err_alloc_stats:
	kfree(mlxsw_sp_port->untagged_vlans);
@@ -2433,6 +2505,7 @@ static void __mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
	mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
	kfree(mlxsw_sp_port->hw_stats.cache);
	kfree(mlxsw_sp_port->sample);
	free_percpu(mlxsw_sp_port->pcpu_stats);
	kfree(mlxsw_sp_port->untagged_vlans);
	kfree(mlxsw_sp_port->active_vlans);
@@ -2734,6 +2807,41 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
	return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
}

static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port,
					     void *priv)
{
	struct mlxsw_sp *mlxsw_sp = priv;
	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
	struct psample_group *psample_group;
	u32 size;

	if (unlikely(!mlxsw_sp_port)) {
		dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received for non-existent port\n",
				     local_port);
		goto out;
	}
	if (unlikely(!mlxsw_sp_port->sample)) {
		dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received on unsupported port\n",
				     local_port);
		goto out;
	}

	size = mlxsw_sp_port->sample->truncate ?
		  mlxsw_sp_port->sample->trunc_size : skb->len;

	rcu_read_lock();
	psample_group = rcu_dereference(mlxsw_sp_port->sample->psample_group);
	if (!psample_group)
		goto out_unlock;
	psample_sample_packet(psample_group, skb, size,
			      mlxsw_sp_port->dev->ifindex, 0,
			      mlxsw_sp_port->sample->rate);
out_unlock:
	rcu_read_unlock();
out:
	consume_skb(skb);
}

#define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
	MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action,	\
		  _is_ctrl, SP_##_trap_group, DISCARD)
@@ -2769,6 +2877,9 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
	MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
	MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false),
	MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false),
	/* PKT Sample trap */
	MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
		  false, SP_IP2ME, DISCARD)
};

static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
+10 −0
Original line number Diff line number Diff line
@@ -46,6 +46,7 @@
#include <linux/dcbnl.h>
#include <linux/in6.h>
#include <linux/notifier.h>
#include <net/psample.h>

#include "port.h"
#include "core.h"
@@ -229,6 +230,7 @@ struct mlxsw_sp_span_entry {

enum mlxsw_sp_port_mall_action_type {
	MLXSW_SP_PORT_MALL_MIRROR,
	MLXSW_SP_PORT_MALL_SAMPLE,
};

struct mlxsw_sp_port_mall_mirror_tc_entry {
@@ -315,6 +317,13 @@ struct mlxsw_sp_port_pcpu_stats {
	u32			tx_dropped;
};

struct mlxsw_sp_port_sample {
	struct psample_group __rcu *psample_group;
	u32 trunc_size;
	u32 rate;
	bool truncate;
};

struct mlxsw_sp_port {
	struct net_device *dev;
	struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats;
@@ -361,6 +370,7 @@ struct mlxsw_sp_port {
		struct rtnl_link_stats64 *cache;
		struct delayed_work update_dw;
	} hw_stats;
	struct mlxsw_sp_port_sample *sample;
};

struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
+1 −0
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@ enum {
	MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32,
	MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33,
	MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
	MLXSW_TRAP_ID_PKT_SAMPLE = 0x38,
	MLXSW_TRAP_ID_ARPBC = 0x50,
	MLXSW_TRAP_ID_ARPUC = 0x51,
	MLXSW_TRAP_ID_MTUERROR = 0x52,
Loading