Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 92470808 authored by John Fastabend's avatar John Fastabend Committed by Jeff Kirsher
Browse files

ixgbe: add XDP support for pass and drop actions



Basic XDP drop support for ixgbe. Uses READ_ONCE/xchg semantics on XDP
programs instead of RCU primitives as suggested by Daniel Borkmann and
Alex Duyck.

v2: fix the build issues seen w/ XDP when page sizes are larger than 4K
    and made minor fixes based on feedback from Jakub Kicinski

Signed-off-by: default avatarJohn Fastabend <john.r.fastabend@intel.com>
Acked-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 6133406b
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -318,6 +318,7 @@ struct ixgbe_ring {
	struct ixgbe_ring *next;	/* pointer to next ring in q_vector */
	struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
	struct net_device *netdev;	/* netdev ring belongs to */
	struct bpf_prog *xdp_prog;
	struct device *dev;		/* device for DMA mapping */
	struct ixgbe_fwd_adapter *l2_accel_priv;
	void *desc;			/* descriptor ring memory */
@@ -555,6 +556,7 @@ struct ixgbe_adapter {
	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
	/* OS defined structs */
	struct net_device *netdev;
	struct bpf_prog *xdp_prog;
	struct pci_dev *pdev;

	unsigned long state;
@@ -835,7 +837,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter);
void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);
void ixgbe_reset(struct ixgbe_adapter *adapter);
void ixgbe_set_ethtool_ops(struct net_device *netdev);
int ixgbe_setup_rx_resources(struct ixgbe_ring *);
int ixgbe_setup_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
int ixgbe_setup_tx_resources(struct ixgbe_ring *);
void ixgbe_free_rx_resources(struct ixgbe_ring *);
void ixgbe_free_tx_resources(struct ixgbe_ring *);
+2 −2
Original line number Diff line number Diff line
@@ -1128,7 +1128,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
			       sizeof(struct ixgbe_ring));

			temp_ring[i].count = new_rx_count;
			err = ixgbe_setup_rx_resources(&temp_ring[i]);
			err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]);
			if (err) {
				while (i) {
					i--;
@@ -1761,7 +1761,7 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter)
	rx_ring->netdev = adapter->netdev;
	rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx;

	err = ixgbe_setup_rx_resources(rx_ring);
	err = ixgbe_setup_rx_resources(adapter, rx_ring);
	if (err) {
		ret_val = 4;
		goto err_nomem;
+143 −26
Original line number Diff line number Diff line
@@ -49,6 +49,9 @@
#include <linux/if_macvlan.h>
#include <linux/if_bridge.h>
#include <linux/prefetch.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/atomic.h>
#include <scsi/fc/fc_fcoe.h>
#include <net/udp_tunnel.h>
#include <net/pkt_cls.h>
@@ -1855,6 +1858,10 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
 * @rx_desc: pointer to the EOP Rx descriptor
 * @skb: pointer to current skb being fixed
 *
 * Check if the skb is valid in the XDP case it will be an error pointer.
 * Return true in this case to abort processing and advance to next
 * descriptor.
 *
 * Check for corrupted packet headers caused by senders on the local L2
 * embedded NIC switch not setting up their Tx Descriptors right.  These
 * should be very rare.
@@ -1873,6 +1880,10 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
{
	struct net_device *netdev = rx_ring->netdev;

	/* XDP packets use error pointer so abort at this point */
	if (IS_ERR(skb))
		return true;

	/* verify that the packet does not have any known errors */
	if (unlikely(ixgbe_test_staterr(rx_desc,
					IXGBE_RXDADV_ERR_FRAME_ERR_MASK) &&
@@ -2048,7 +2059,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
		/* hand second half of page back to the ring */
		ixgbe_reuse_rx_page(rx_ring, rx_buffer);
	} else {
		if (IXGBE_CB(skb)->dma == rx_buffer->dma) {
		if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) {
			/* the page has been released from the ring */
			IXGBE_CB(skb)->page_released = true;
		} else {
@@ -2069,21 +2080,22 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,

static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
					   struct ixgbe_rx_buffer *rx_buffer,
					   union ixgbe_adv_rx_desc *rx_desc,
					   unsigned int size)
					   struct xdp_buff *xdp,
					   union ixgbe_adv_rx_desc *rx_desc)
{
	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
	unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192)
	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
#else
	unsigned int truesize = SKB_DATA_ALIGN(size);
	unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
					       xdp->data_hard_start);
#endif
	struct sk_buff *skb;

	/* prefetch first cache line of first page */
	prefetch(va);
	prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
	prefetch(va + L1_CACHE_BYTES);
	prefetch(xdp->data + L1_CACHE_BYTES);
#endif

	/* allocate a skb to store the frags */
@@ -2096,7 +2108,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
			IXGBE_CB(skb)->dma = rx_buffer->dma;

		skb_add_rx_frag(skb, 0, rx_buffer->page,
				rx_buffer->page_offset,
				xdp->data - page_address(rx_buffer->page),
				size, truesize);
#if (PAGE_SIZE < 8192)
		rx_buffer->page_offset ^= truesize;
@@ -2104,7 +2116,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
		rx_buffer->page_offset += truesize;
#endif
	} else {
		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
		memcpy(__skb_put(skb, size),
		       xdp->data, ALIGN(size, sizeof(long)));
		rx_buffer->pagecnt_bias++;
	}

@@ -2113,32 +2126,32 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,

static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
				       struct ixgbe_rx_buffer *rx_buffer,
				       union ixgbe_adv_rx_desc *rx_desc,
				       unsigned int size)
				       struct xdp_buff *xdp,
				       union ixgbe_adv_rx_desc *rx_desc)
{
	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
#else
	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
				SKB_DATA_ALIGN(IXGBE_SKB_PAD + size);
				SKB_DATA_ALIGN(xdp->data_end -
					       xdp->data_hard_start);
#endif
	struct sk_buff *skb;

	/* prefetch first cache line of first page */
	prefetch(va);
	prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
	prefetch(va + L1_CACHE_BYTES);
	prefetch(xdp->data + L1_CACHE_BYTES);
#endif

	/* build an skb around the page buffer */
	skb = build_skb(va - IXGBE_SKB_PAD, truesize);
	/* build an skb to around the page buffer */
	skb = build_skb(xdp->data_hard_start, truesize);
	if (unlikely(!skb))
		return NULL;

	/* update pointers within the skb to store the data */
	skb_reserve(skb, IXGBE_SKB_PAD);
	__skb_put(skb, size);
	skb_reserve(skb, xdp->data - xdp->data_hard_start);
	__skb_put(skb, xdp->data_end - xdp->data);

	/* record DMA address if this is the start of a chain of buffers */
	if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
@@ -2154,6 +2167,41 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
	return skb;
}

#define IXGBE_XDP_PASS 0
#define IXGBE_XDP_CONSUMED 1

static struct sk_buff *ixgbe_run_xdp(struct ixgbe_ring  *rx_ring,
				     struct xdp_buff *xdp)
{
	int result = IXGBE_XDP_PASS;
	struct bpf_prog *xdp_prog;
	u32 act;

	rcu_read_lock();
	xdp_prog = READ_ONCE(rx_ring->xdp_prog);

	if (!xdp_prog)
		goto xdp_out;

	act = bpf_prog_run_xdp(xdp_prog, xdp);
	switch (act) {
	case XDP_PASS:
		break;
	default:
		bpf_warn_invalid_xdp_action(act);
	case XDP_TX:
	case XDP_ABORTED:
		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
		/* fallthrough -- handle aborts by dropping packet */
	case XDP_DROP:
		result = IXGBE_XDP_CONSUMED;
		break;
	}
xdp_out:
	rcu_read_unlock();
	return ERR_PTR(-result);
}

/**
 * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
 * @q_vector: structure containing interrupt and ring information
@@ -2183,6 +2231,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
		union ixgbe_adv_rx_desc *rx_desc;
		struct ixgbe_rx_buffer *rx_buffer;
		struct sk_buff *skb;
		struct xdp_buff xdp;
		unsigned int size;

		/* return some buffers to hardware, one at a time is too slow */
@@ -2205,14 +2254,29 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
		rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);

		/* retrieve a buffer from the ring */
		if (skb)
		if (!skb) {
			xdp.data = page_address(rx_buffer->page) +
				   rx_buffer->page_offset;
			xdp.data_hard_start = xdp.data -
					      ixgbe_rx_offset(rx_ring);
			xdp.data_end = xdp.data + size;

			skb = ixgbe_run_xdp(rx_ring, &xdp);
		}

		if (IS_ERR(skb)) {
			total_rx_packets++;
			total_rx_bytes += size;
			rx_buffer->pagecnt_bias++;
		} else if (skb) {
			ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
		else if (ring_uses_build_skb(rx_ring))
		} else if (ring_uses_build_skb(rx_ring)) {
			skb = ixgbe_build_skb(rx_ring, rx_buffer,
					      rx_desc, size);
		else
					      &xdp, rx_desc);
		} else {
			skb = ixgbe_construct_skb(rx_ring, rx_buffer,
						  rx_desc, size);
						  &xdp, rx_desc);
		}

		/* exit if we failed to retrieve a buffer */
		if (!skb) {
@@ -6073,7 +6137,8 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
 *
 * Returns 0 on success, negative on failure
 **/
int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
			     struct ixgbe_ring *rx_ring)
{
	struct device *dev = rx_ring->dev;
	int orig_node = dev_to_node(dev);
@@ -6112,6 +6177,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
	rx_ring->next_to_clean = 0;
	rx_ring->next_to_use = 0;

	rx_ring->xdp_prog = adapter->xdp_prog;

	return 0;
err:
	vfree(rx_ring->rx_buffer_info);
@@ -6135,7 +6202,7 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
	int i, err = 0;

	for (i = 0; i < adapter->num_rx_queues; i++) {
		err = ixgbe_setup_rx_resources(adapter->rx_ring[i]);
		err = ixgbe_setup_rx_resources(adapter, adapter->rx_ring[i]);
		if (!err)
			continue;

@@ -6203,6 +6270,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
{
	ixgbe_clean_rx_ring(rx_ring);

	rx_ring->xdp_prog = NULL;
	vfree(rx_ring->rx_buffer_info);
	rx_ring->rx_buffer_info = NULL;

@@ -9468,6 +9536,54 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
	return features;
}

static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
{
	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
	struct ixgbe_adapter *adapter = netdev_priv(dev);
	struct bpf_prog *old_prog;

	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
		return -EINVAL;

	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
		return -EINVAL;

	/* verify ixgbe ring attributes are sufficient for XDP */
	for (i = 0; i < adapter->num_rx_queues; i++) {
		struct ixgbe_ring *ring = adapter->rx_ring[i];

		if (ring_is_rsc_enabled(ring))
			return -EINVAL;

		if (frame_size > ixgbe_rx_bufsz(ring))
			return -EINVAL;
	}

	old_prog = xchg(&adapter->xdp_prog, prog);
	for (i = 0; i < adapter->num_rx_queues; i++)
		xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);

	if (old_prog)
		bpf_prog_put(old_prog);

	return 0;
}

static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
{
	struct ixgbe_adapter *adapter = netdev_priv(dev);

	switch (xdp->command) {
	case XDP_SETUP_PROG:
		return ixgbe_xdp_setup(dev, xdp->prog);
	case XDP_QUERY_PROG:
		xdp->prog_attached = !!(adapter->xdp_prog);
		return 0;
	default:
		return -EINVAL;
	}
}

static const struct net_device_ops ixgbe_netdev_ops = {
	.ndo_open		= ixgbe_open,
	.ndo_stop		= ixgbe_close,
@@ -9513,6 +9629,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
	.ndo_udp_tunnel_add	= ixgbe_add_udp_tunnel_port,
	.ndo_udp_tunnel_del	= ixgbe_del_udp_tunnel_port,
	.ndo_features_check	= ixgbe_features_check,
	.ndo_xdp		= ixgbe_xdp,
};

/**