Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 684009d4 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'XDP-redirect-memory-return-API'



Jesper Dangaard Brouer says:

====================
XDP redirect memory return API

Submitted against net-next, as it contains NIC driver changes.

This patchset works towards supporting different XDP RX-ring memory
allocators.  As this will be needed by the AF_XDP zero-copy mode.

The patchset uses mlx5 as the sample driver, which gets implemented
XDP_REDIRECT RX-mode, but not ndo_xdp_xmit (as this API is subject to
change thought the patchset).

A new struct xdp_frame is introduced (modeled after cpumap xdp_pkt).
And both ndo_xdp_xmit and the new xdp_return_frame end-up using this.

Support for a driver supplied allocator is implemented, and a
refurbished version of page_pool is the first return allocator type
introduced.  This will be a integration point for AF_XDP zero-copy.

The mlx5 driver evolve into using the page_pool, and see a performance
increase (with ndo_xdp_xmit out ixgbe driver) from 6Mpps to 12Mpps.

The patchset stop at 16 patches (one over limit), but more API changes
are planned.  Specifically extending ndo_xdp_xmit and xdp_return_frame
APIs to support bulking.  As this will address some known limits.

V2: Updated according to Tariq's feedback
V3: Updated based on feedback from Jason Wang and Alex Duyck
V4: Updated based on feedback from Tariq and Jason
V5: Fix SPDX license, add Tariq's reviews, improve patch desc for perf test
V6: Updated based on feedback from Eric Dumazet and Alex Duyck
V7: Adapt to i40e that got XDP_REDIRECT support in-between
V8:
 Updated based on feedback kbuild test robot, and adjust for mlx5 changes
 page_pool only compiled into kernel when drivers Kconfig 'select' feature
V9:
 Remove some inline statements, let compiler decide what to inline
 Fix return value in virtio_net driver
 Adjust for mlx5 changes in-between submissions
V10:
 Minor adjust for mlx5 requested by Tariq
 Resubmit against net-next
V11: avoid leaking info stored in frame data on page reuse
====================

Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 897ddc24 6dfb970d
Loading
Loading
Loading
Loading
+23 −10
Original line number Diff line number Diff line
@@ -638,7 +638,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
			kfree(tx_buffer->raw_buf);
		else if (ring_is_xdp(ring))
			page_frag_free(tx_buffer->raw_buf);
			xdp_return_frame(tx_buffer->xdpf);
		else
			dev_kfree_skb_any(tx_buffer->skb);
		if (dma_unmap_len(tx_buffer, len))
@@ -841,7 +841,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,

		/* free the skb/XDP data */
		if (ring_is_xdp(tx_ring))
			page_frag_free(tx_buf->raw_buf);
			xdp_return_frame(tx_buf->xdpf);
		else
			napi_consume_skb(tx_buf->skb, napi_budget);

@@ -2203,9 +2203,20 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
#define I40E_XDP_CONSUMED 1
#define I40E_XDP_TX 2

static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
			      struct i40e_ring *xdp_ring);

static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,
				 struct i40e_ring *xdp_ring)
{
	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);

	if (unlikely(!xdpf))
		return I40E_XDP_CONSUMED;

	return i40e_xmit_xdp_ring(xdpf, xdp_ring);
}

/**
 * i40e_run_xdp - run an XDP program
 * @rx_ring: Rx ring being processed
@@ -2225,13 +2236,15 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
	if (!xdp_prog)
		goto xdp_out;

	prefetchw(xdp->data_hard_start); /* xdp_frame write */

	act = bpf_prog_run_xdp(xdp_prog, xdp);
	switch (act) {
	case XDP_PASS:
		break;
	case XDP_TX:
		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
		result = i40e_xmit_xdp_ring(xdp, xdp_ring);
		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
		break;
	case XDP_REDIRECT:
		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
@@ -3478,13 +3491,13 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 * @xdp: data to transmit
 * @xdp_ring: XDP Tx ring
 **/
static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
			      struct i40e_ring *xdp_ring)
{
	u32 size = xdp->data_end - xdp->data;
	u16 i = xdp_ring->next_to_use;
	struct i40e_tx_buffer *tx_bi;
	struct i40e_tx_desc *tx_desc;
	u32 size = xdpf->len;
	dma_addr_t dma;

	if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
@@ -3492,14 +3505,14 @@ static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
		return I40E_XDP_CONSUMED;
	}

	dma = dma_map_single(xdp_ring->dev, xdp->data, size, DMA_TO_DEVICE);
	dma = dma_map_single(xdp_ring->dev, xdpf->data, size, DMA_TO_DEVICE);
	if (dma_mapping_error(xdp_ring->dev, dma))
		return I40E_XDP_CONSUMED;

	tx_bi = &xdp_ring->tx_bi[i];
	tx_bi->bytecount = size;
	tx_bi->gso_segs = 1;
	tx_bi->raw_buf = xdp->data;
	tx_bi->xdpf = xdpf;

	/* record length, and DMA address */
	dma_unmap_len_set(tx_bi, len, size);
@@ -3675,7 +3688,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 *
 * Returns Zero if sent, else an error code
 **/
int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
{
	struct i40e_netdev_priv *np = netdev_priv(dev);
	unsigned int queue_index = smp_processor_id();
@@ -3688,7 +3701,7 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
		return -ENXIO;

	err = i40e_xmit_xdp_ring(xdp, vsi->xdp_rings[queue_index]);
	err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
	if (err != I40E_XDP_TX)
		return -ENOSPC;

+2 −1
Original line number Diff line number Diff line
@@ -306,6 +306,7 @@ static inline unsigned int i40e_txd_use_count(unsigned int size)
struct i40e_tx_buffer {
	struct i40e_tx_desc *next_to_watch;
	union {
		struct xdp_frame *xdpf;
		struct sk_buff *skb;
		void *raw_buf;
	};
@@ -510,7 +511,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp);
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
void i40e_xdp_flush(struct net_device *dev);

/**
+1 −2
Original line number Diff line number Diff line
@@ -241,8 +241,7 @@ struct ixgbe_tx_buffer {
	unsigned long time_stamp;
	union {
		struct sk_buff *skb;
		/* XDP uses address ptr on irq_clean */
		void *data;
		struct xdp_frame *xdpf;
	};
	unsigned int bytecount;
	unsigned short gso_segs;
+27 −11
Original line number Diff line number Diff line
@@ -1216,7 +1216,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,

		/* free the skb */
		if (ring_is_xdp(tx_ring))
			page_frag_free(tx_buffer->data);
			xdp_return_frame(tx_buffer->xdpf);
		else
			napi_consume_skb(tx_buffer->skb, napi_budget);

@@ -2262,7 +2262,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
#define IXGBE_XDP_TX 2

static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
			       struct xdp_buff *xdp);
			       struct xdp_frame *xdpf);

static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
				     struct ixgbe_ring *rx_ring,
@@ -2270,6 +2270,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
{
	int err, result = IXGBE_XDP_PASS;
	struct bpf_prog *xdp_prog;
	struct xdp_frame *xdpf;
	u32 act;

	rcu_read_lock();
@@ -2278,12 +2279,19 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
	if (!xdp_prog)
		goto xdp_out;

	prefetchw(xdp->data_hard_start); /* xdp_frame write */

	act = bpf_prog_run_xdp(xdp_prog, xdp);
	switch (act) {
	case XDP_PASS:
		break;
	case XDP_TX:
		result = ixgbe_xmit_xdp_ring(adapter, xdp);
		xdpf = convert_to_xdp_frame(xdp);
		if (unlikely(!xdpf)) {
			result = IXGBE_XDP_CONSUMED;
			break;
		}
		result = ixgbe_xmit_xdp_ring(adapter, xdpf);
		break;
	case XDP_REDIRECT:
		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
@@ -5797,7 +5805,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)

		/* Free all the Tx ring sk_buffs */
		if (ring_is_xdp(tx_ring))
			page_frag_free(tx_buffer->data);
			xdp_return_frame(tx_buffer->xdpf);
		else
			dev_kfree_skb_any(tx_buffer->skb);

@@ -6370,7 +6378,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
	struct device *dev = rx_ring->dev;
	int orig_node = dev_to_node(dev);
	int ring_node = -1;
	int size;
	int size, err;

	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;

@@ -6407,6 +6415,13 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
			     rx_ring->queue_index) < 0)
		goto err;

	err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
					 MEM_TYPE_PAGE_SHARED, NULL);
	if (err) {
		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
		goto err;
	}

	rx_ring->xdp_prog = adapter->xdp_prog;

	return 0;
@@ -8336,7 +8351,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
}

static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
			       struct xdp_buff *xdp)
			       struct xdp_frame *xdpf)
{
	struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
	struct ixgbe_tx_buffer *tx_buffer;
@@ -8345,12 +8360,12 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
	dma_addr_t dma;
	u16 i;

	len = xdp->data_end - xdp->data;
	len = xdpf->len;

	if (unlikely(!ixgbe_desc_unused(ring)))
		return IXGBE_XDP_CONSUMED;

	dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
	dma = dma_map_single(ring->dev, xdpf->data, len, DMA_TO_DEVICE);
	if (dma_mapping_error(ring->dev, dma))
		return IXGBE_XDP_CONSUMED;

@@ -8365,7 +8380,8 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,

	dma_unmap_len_set(tx_buffer, len, len);
	dma_unmap_addr_set(tx_buffer, dma, dma);
	tx_buffer->data = xdp->data;
	tx_buffer->xdpf = xdpf;

	tx_desc->read.buffer_addr = cpu_to_le64(dma);

	/* put descriptor type bits */
@@ -9996,7 +10012,7 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
	}
}

static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
{
	struct ixgbe_adapter *adapter = netdev_priv(dev);
	struct ixgbe_ring *ring;
@@ -10012,7 +10028,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
	if (unlikely(!ring))
		return -ENXIO;

	err = ixgbe_xmit_xdp_ring(adapter, xdp);
	err = ixgbe_xmit_xdp_ring(adapter, xdpf);
	if (err != IXGBE_XDP_TX)
		return -ENOSPC;

+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ config MLX5_CORE_EN
	bool "Mellanox Technologies ConnectX-4 Ethernet support"
	depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
	depends on IPV6=y || IPV6=n || MLX5_CORE=m
	select PAGE_POOL
	default n
	---help---
	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
Loading