Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 735fc405 authored by Jesper Dangaard Brouer's avatar Jesper Dangaard Brouer Committed by Alexei Starovoitov
Browse files

xdp: change ndo_xdp_xmit API to support bulking



This patch change the API for ndo_xdp_xmit to support bulking
xdp_frames.

When kernel is compiled with CONFIG_RETPOLINE, XDP sees a huge slowdown.
Most of the slowdown is caused by DMA API indirect function calls, but
also the net_device->ndo_xdp_xmit() call.

Benchmarked patch with CONFIG_RETPOLINE, using xdp_redirect_map with
single flow/core test (CPU E5-1650 v4 @ 3.60GHz), showed
performance improved:
 for driver ixgbe: 6,042,682 pps -> 6,853,768 pps = +811,086 pps
 for driver i40e : 6,187,169 pps -> 6,724,519 pps = +537,350 pps

With frames avail as a bulk inside the driver ndo_xdp_xmit call,
further optimizations are possible, like bulk DMA-mapping for TX.

Testing without CONFIG_RETPOLINE show the same performance for
physical NIC drivers.

The virtual NIC driver tun sees a huge performance boost, as it can
avoid doing per frame producer locking, but instead amortize the
locking cost over the bulk.

V2: Fix compile errors reported by kbuild test robot <lkp@intel.com>
V4: Isolated ndo, driver changes and callers.

Signed-off-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 389ab7f0
Loading
Loading
Loading
Loading
+19 −7
Original line number Original line Diff line number Diff line
@@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 * @dev: netdev
 * @dev: netdev
 * @xdp: XDP buffer
 * @xdp: XDP buffer
 *
 *
 * Returns Zero if sent, else an error code
 * Returns number of frames successfully sent. Frames that fail are
 * free'ed via XDP return API.
 *
 * For error cases, a negative errno code is returned and no-frames
 * are transmitted (caller must handle freeing frames).
 **/
 **/
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{
{
	struct i40e_netdev_priv *np = netdev_priv(dev);
	struct i40e_netdev_priv *np = netdev_priv(dev);
	unsigned int queue_index = smp_processor_id();
	unsigned int queue_index = smp_processor_id();
	struct i40e_vsi *vsi = np->vsi;
	struct i40e_vsi *vsi = np->vsi;
	int err;
	int drops = 0;
	int i;


	if (test_bit(__I40E_VSI_DOWN, vsi->state))
	if (test_bit(__I40E_VSI_DOWN, vsi->state))
		return -ENETDOWN;
		return -ENETDOWN;
@@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
		return -ENXIO;
		return -ENXIO;


	for (i = 0; i < n; i++) {
		struct xdp_frame *xdpf = frames[i];
		int err;

		err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
		err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
	if (err != I40E_XDP_TX)
		if (err != I40E_XDP_TX) {
		return -ENOSPC;
			xdp_return_frame_rx_napi(xdpf);
			drops++;
		}
	}


	return 0;
	return n - drops;
}
}


/**
/**
+1 −1
Original line number Original line Diff line number Diff line
@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
void i40e_xdp_flush(struct net_device *dev);
void i40e_xdp_flush(struct net_device *dev);


/**
/**
+15 −6
Original line number Original line Diff line number Diff line
@@ -10017,11 +10017,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
	}
	}
}
}


static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
static int ixgbe_xdp_xmit(struct net_device *dev, int n,
			  struct xdp_frame **frames)
{
{
	struct ixgbe_adapter *adapter = netdev_priv(dev);
	struct ixgbe_adapter *adapter = netdev_priv(dev);
	struct ixgbe_ring *ring;
	struct ixgbe_ring *ring;
	int err;
	int drops = 0;
	int i;


	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
		return -ENETDOWN;
		return -ENETDOWN;
@@ -10033,11 +10035,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
	if (unlikely(!ring))
	if (unlikely(!ring))
		return -ENXIO;
		return -ENXIO;


	for (i = 0; i < n; i++) {
		struct xdp_frame *xdpf = frames[i];
		int err;

		err = ixgbe_xmit_xdp_ring(adapter, xdpf);
		err = ixgbe_xmit_xdp_ring(adapter, xdpf);
	if (err != IXGBE_XDP_TX)
		if (err != IXGBE_XDP_TX) {
		return -ENOSPC;
			xdp_return_frame_rx_napi(xdpf);
			drops++;
		}
	}


	return 0;
	return n - drops;
}
}


static void ixgbe_xdp_flush(struct net_device *dev)
static void ixgbe_xdp_flush(struct net_device *dev)
+24 −13
Original line number Original line Diff line number Diff line
@@ -70,6 +70,7 @@
#include <net/netns/generic.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
#include <net/sock.h>
#include <net/xdp.h>
#include <linux/seq_file.h>
#include <linux/seq_file.h>
#include <linux/uio.h>
#include <linux/uio.h>
#include <linux/skb_array.h>
#include <linux/skb_array.h>
@@ -1290,34 +1291,44 @@ static const struct net_device_ops tun_netdev_ops = {
	.ndo_get_stats64	= tun_net_get_stats64,
	.ndo_get_stats64	= tun_net_get_stats64,
};
};


static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{
{
	struct tun_struct *tun = netdev_priv(dev);
	struct tun_struct *tun = netdev_priv(dev);
	struct tun_file *tfile;
	struct tun_file *tfile;
	u32 numqueues;
	u32 numqueues;
	int ret = 0;
	int drops = 0;
	int cnt = n;
	int i;


	rcu_read_lock();
	rcu_read_lock();


	numqueues = READ_ONCE(tun->numqueues);
	numqueues = READ_ONCE(tun->numqueues);
	if (!numqueues) {
	if (!numqueues) {
		ret = -ENOSPC;
		rcu_read_unlock();
		goto out;
		return -ENXIO; /* Caller will free/return all frames */
	}
	}


	tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
	tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
					    numqueues]);
					    numqueues]);

	spin_lock(&tfile->tx_ring.producer_lock);
	for (i = 0; i < n; i++) {
		struct xdp_frame *xdp = frames[i];
		/* Encode the XDP flag into lowest bit for consumer to differ
		/* Encode the XDP flag into lowest bit for consumer to differ
		 * XDP buffer from sk_buff.
		 * XDP buffer from sk_buff.
		 */
		 */
	if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
		void *frame = tun_xdp_to_ptr(xdp);

		if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
			this_cpu_inc(tun->pcpu_stats->tx_dropped);
			this_cpu_inc(tun->pcpu_stats->tx_dropped);
		ret = -ENOSPC;
			xdp_return_frame_rx_napi(xdp);
			drops++;
		}
	}
	}
	spin_unlock(&tfile->tx_ring.producer_lock);


out:
	rcu_read_unlock();
	rcu_read_unlock();
	return ret;
	return cnt - drops;
}
}


static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
@@ -1327,7 +1338,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
	if (unlikely(!frame))
	if (unlikely(!frame))
		return -EOVERFLOW;
		return -EOVERFLOW;


	return tun_xdp_xmit(dev, frame);
	return tun_xdp_xmit(dev, 1, &frame);
}
}


static void tun_xdp_flush(struct net_device *dev)
static void tun_xdp_flush(struct net_device *dev)
+49 −17
Original line number Original line Diff line number Diff line
@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
	virtqueue_kick(sq->vq);
	virtqueue_kick(sq->vq);
}
}


static int __virtnet_xdp_xmit(struct virtnet_info *vi,
static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
				   struct send_queue *sq,
				   struct xdp_frame *xdpf)
				   struct xdp_frame *xdpf)
{
{
	struct virtio_net_hdr_mrg_rxbuf *hdr;
	struct virtio_net_hdr_mrg_rxbuf *hdr;
	struct xdp_frame *xdpf_sent;
	struct send_queue *sq;
	unsigned int len;
	unsigned int qp;
	int err;
	int err;


	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
	sq = &vi->sq[qp];

	/* Free up any pending old buffers before queueing new ones. */
	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
		xdp_return_frame(xdpf_sent);

	/* virtqueue want to use data area in-front of packet */
	/* virtqueue want to use data area in-front of packet */
	if (unlikely(xdpf->metasize > 0))
	if (unlikely(xdpf->metasize > 0))
		return -EOPNOTSUPP;
		return -EOPNOTSUPP;
@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
	return 0;
	return 0;
}
}


static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
				   struct xdp_frame *xdpf)
{
	struct xdp_frame *xdpf_sent;
	struct send_queue *sq;
	unsigned int len;
	unsigned int qp;

	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
	sq = &vi->sq[qp];

	/* Free up any pending old buffers before queueing new ones. */
	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
		xdp_return_frame(xdpf_sent);

	return __virtnet_xdp_xmit_one(vi, sq, xdpf);
}

static int virtnet_xdp_xmit(struct net_device *dev,
			    int n, struct xdp_frame **frames)
{
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct virtnet_info *vi = netdev_priv(dev);
	struct receive_queue *rq = vi->rq;
	struct receive_queue *rq = vi->rq;
	struct xdp_frame *xdpf_sent;
	struct bpf_prog *xdp_prog;
	struct bpf_prog *xdp_prog;
	struct send_queue *sq;
	unsigned int len;
	unsigned int qp;
	int drops = 0;
	int err;
	int i;

	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
	sq = &vi->sq[qp];


	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
	 * indicate XDP resources have been successfully allocated.
	 * indicate XDP resources have been successfully allocated.
@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
	if (!xdp_prog)
	if (!xdp_prog)
		return -ENXIO;
		return -ENXIO;


	return __virtnet_xdp_xmit(vi, xdpf);
	/* Free up any pending old buffers before queueing new ones. */
	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
		xdp_return_frame(xdpf_sent);

	for (i = 0; i < n; i++) {
		struct xdp_frame *xdpf = frames[i];

		err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
		if (err) {
			xdp_return_frame_rx_napi(xdpf);
			drops++;
		}
	}
	return n - drops;
}
}


static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
			xdpf = convert_to_xdp_frame(&xdp);
			xdpf = convert_to_xdp_frame(&xdp);
			if (unlikely(!xdpf))
			if (unlikely(!xdpf))
				goto err_xdp;
				goto err_xdp;
			err = __virtnet_xdp_xmit(vi, xdpf);
			err = __virtnet_xdp_tx_xmit(vi, xdpf);
			if (unlikely(err)) {
			if (unlikely(err)) {
				trace_xdp_exception(vi->dev, xdp_prog, act);
				trace_xdp_exception(vi->dev, xdp_prog, act);
				goto err_xdp;
				goto err_xdp;
@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
			xdpf = convert_to_xdp_frame(&xdp);
			xdpf = convert_to_xdp_frame(&xdp);
			if (unlikely(!xdpf))
			if (unlikely(!xdpf))
				goto err_xdp;
				goto err_xdp;
			err = __virtnet_xdp_xmit(vi, xdpf);
			err = __virtnet_xdp_tx_xmit(vi, xdpf);
			if (unlikely(err)) {
			if (unlikely(err)) {
				trace_xdp_exception(vi->dev, xdp_prog, act);
				trace_xdp_exception(vi->dev, xdp_prog, act);
				if (unlikely(xdp_page != page))
				if (unlikely(xdp_page != page))
Loading