Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 03993094 authored by Jesper Dangaard Brouer's avatar Jesper Dangaard Brouer Committed by David S. Miller
Browse files

xdp: transition into using xdp_frame for return API



Changing API xdp_return_frame() to take struct xdp_frame as argument,
seems like a natural choice. But there are some subtle performance
details here that needs extra care, which is a deliberate choice.

When de-referencing xdp_frame on a remote CPU during DMA-TX
completion, result in the cache-line is change to "Shared"
state. Later when the page is reused for RX, then this xdp_frame
cache-line is written, which change the state to "Modified".

This situation already happens (naturally) for, virtio_net, tun and
cpumap as the xdp_frame pointer is the queued object.  In tun and
cpumap, the ptr_ring is used for efficiently transferring cache-lines
(with pointers) between CPUs. Thus, the only option is to
de-referencing xdp_frame.

It is only the ixgbe driver that had an optimization, in which it can
avoid doing the de-reference of xdp_frame.  The driver already have
TX-ring queue, which (in case of remote DMA-TX completion) have to be
transferred between CPUs anyhow.  In this data area, we stored a
struct xdp_mem_info and a data pointer, which allowed us to avoid
de-referencing xdp_frame.

To compensate for this, a prefetchw is used for telling the cache
coherency protocol about our access pattern.  My benchmarks show that
this prefetchw is enough to compensate the ixgbe driver.

V7: Adjust for commit d9314c47 ("i40e: add support for XDP_REDIRECT")
V8: Adjust for commit bd658dda ("net/mlx5e: Separate dma base address
and offset in dma_sync call")

Signed-off-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 60bbf7ee
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -638,8 +638,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
			kfree(tx_buffer->raw_buf);
		else if (ring_is_xdp(ring))
			xdp_return_frame(tx_buffer->xdpf->data,
					 &tx_buffer->xdpf->mem);
			xdp_return_frame(tx_buffer->xdpf);
		else
			dev_kfree_skb_any(tx_buffer->skb);
		if (dma_unmap_len(tx_buffer, len))
@@ -842,7 +841,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,

		/* free the skb/XDP data */
		if (ring_is_xdp(tx_ring))
			xdp_return_frame(tx_buf->xdpf->data, &tx_buf->xdpf->mem);
			xdp_return_frame(tx_buf->xdpf);
		else
			napi_consume_skb(tx_buf->skb, napi_budget);

+1 −3
Original line number Diff line number Diff line
@@ -241,8 +241,7 @@ struct ixgbe_tx_buffer {
	unsigned long time_stamp;
	union {
		struct sk_buff *skb;
		/* XDP uses address ptr on irq_clean */
		void *data;
		struct xdp_frame *xdpf;
	};
	unsigned int bytecount;
	unsigned short gso_segs;
@@ -250,7 +249,6 @@ struct ixgbe_tx_buffer {
	DEFINE_DMA_UNMAP_ADDR(dma);
	DEFINE_DMA_UNMAP_LEN(len);
	u32 tx_flags;
	struct xdp_mem_info xdp_mem;
};

struct ixgbe_rx_buffer {
+11 −6
Original line number Diff line number Diff line
@@ -1216,7 +1216,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,

		/* free the skb */
		if (ring_is_xdp(tx_ring))
			xdp_return_frame(tx_buffer->data, &tx_buffer->xdp_mem);
			xdp_return_frame(tx_buffer->xdpf);
		else
			napi_consume_skb(tx_buffer->skb, napi_budget);

@@ -2386,6 +2386,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
			xdp.data_hard_start = xdp.data -
					      ixgbe_rx_offset(rx_ring);
			xdp.data_end = xdp.data + size;
			prefetchw(xdp.data_hard_start); /* xdp_frame write */

			skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
		}
@@ -5797,7 +5798,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)

		/* Free all the Tx ring sk_buffs */
		if (ring_is_xdp(tx_ring))
			xdp_return_frame(tx_buffer->data, &tx_buffer->xdp_mem);
			xdp_return_frame(tx_buffer->xdpf);
		else
			dev_kfree_skb_any(tx_buffer->skb);

@@ -8348,16 +8349,21 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
	struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
	struct ixgbe_tx_buffer *tx_buffer;
	union ixgbe_adv_tx_desc *tx_desc;
	struct xdp_frame *xdpf;
	u32 len, cmd_type;
	dma_addr_t dma;
	u16 i;

	len = xdp->data_end - xdp->data;
	xdpf = convert_to_xdp_frame(xdp);
	if (unlikely(!xdpf))
		return -EOVERFLOW;

	len = xdpf->len;

	if (unlikely(!ixgbe_desc_unused(ring)))
		return IXGBE_XDP_CONSUMED;

	dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
	dma = dma_map_single(ring->dev, xdpf->data, len, DMA_TO_DEVICE);
	if (dma_mapping_error(ring->dev, dma))
		return IXGBE_XDP_CONSUMED;

@@ -8372,8 +8378,7 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,

	dma_unmap_len_set(tx_buffer, len, len);
	dma_unmap_addr_set(tx_buffer, dma, dma);
	tx_buffer->data = xdp->data;
	tx_buffer->xdp_mem = xdp->rxq->mem;
	tx_buffer->xdpf = xdpf;

	tx_desc->read.buffer_addr = cpu_to_le64(dma);

+1 −0
Original line number Diff line number Diff line
@@ -890,6 +890,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,

	dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
				      frag_size, DMA_FROM_DEVICE);
	prefetchw(va); /* xdp_frame data area */
	prefetch(data);
	wi->offset += frag_size;

+2 −2
Original line number Diff line number Diff line
@@ -663,7 +663,7 @@ void tun_ptr_free(void *ptr)
	if (tun_is_xdp_frame(ptr)) {
		struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);

		xdp_return_frame(xdpf->data, &xdpf->mem);
		xdp_return_frame(xdpf);
	} else {
		__skb_array_destroy_skb(ptr);
	}
@@ -2196,7 +2196,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
		struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);

		ret = tun_put_user_xdp(tun, tfile, xdpf, to);
		xdp_return_frame(xdpf->data, &xdpf->mem);
		xdp_return_frame(xdpf);
	} else {
		struct sk_buff *skb = ptr;

Loading