Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d1396004 authored by Toshiaki Makita's avatar Toshiaki Makita Committed by Daniel Borkmann
Browse files

veth: Add XDP TX and REDIRECT



This allows further redirection of xdp_frames like

 NIC   -> veth--veth -> veth--veth
 (XDP)          (XDP)         (XDP)

The intermediate XDP, redirecting packets from NIC to the other veth,
reuses xdp_mem_info from NIC so that page recycling of the NIC works on
the destination veth's XDP.
In this way return_frame is not fully guarded by NAPI, since another
NAPI handler on another cpu may use the same xdp_mem_info concurrently.
Thus disable napi_direct by xdp_set_return_frame_no_direct() during the
NAPI context.

v8:
- Don't use xdp_frame pointer address for data_hard_start of xdp_buff.

v4:
- Use xdp_[set|clear]_return_frame_no_direct() instead of a flag in
  xdp_mem_info.

v3:
- Fix double free when veth_xdp_tx() returns a positive value.
- Convert xdp_xmit and xdp_redir variables into flags.

Signed-off-by: default avatarToshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 2539650f
Loading
Loading
Loading
Loading
+110 −9
Original line number Diff line number Diff line
@@ -32,6 +32,10 @@
#define VETH_RING_SIZE		256
#define VETH_XDP_HEADROOM	(XDP_PACKET_HEADROOM + NET_IP_ALIGN)

/* Separating two types of XDP xmit */
#define VETH_XDP_TX		BIT(0)
#define VETH_XDP_REDIR		BIT(1)

struct pcpu_vstats {
	u64			packets;
	u64			bytes;
@@ -45,6 +49,7 @@ struct veth_priv {
	struct bpf_prog		*_xdp_prog;
	struct net_device __rcu	*peer;
	atomic64_t		dropped;
	struct xdp_mem_info	xdp_mem;
	unsigned		requested_headroom;
	bool			rx_notify_masked;
	struct ptr_ring		xdp_ring;
@@ -317,12 +322,44 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
	return n - drops;
}

static void veth_xdp_flush(struct net_device *dev)
{
	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
	struct net_device *rcv;

	rcu_read_lock();
	rcv = rcu_dereference(priv->peer);
	if (unlikely(!rcv))
		goto out;

	rcv_priv = netdev_priv(rcv);
	/* xdp_ring is initialized on receive side? */
	if (unlikely(!rcu_access_pointer(rcv_priv->xdp_prog)))
		goto out;

	__veth_xdp_flush(rcv_priv);
out:
	rcu_read_unlock();
}

static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
{
	struct xdp_frame *frame = convert_to_xdp_frame(xdp);

	if (unlikely(!frame))
		return -EOVERFLOW;

	return veth_xdp_xmit(dev, 1, &frame, 0);
}

static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
					struct xdp_frame *frame)
					struct xdp_frame *frame,
					unsigned int *xdp_xmit)
{
	void *hard_start = frame->data - frame->headroom;
	void *head = hard_start - sizeof(struct xdp_frame);
	int len = frame->len, delta = 0;
	struct xdp_frame orig_frame;
	struct bpf_prog *xdp_prog;
	unsigned int headroom;
	struct sk_buff *skb;
@@ -346,6 +383,29 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
			delta = frame->data - xdp.data;
			len = xdp.data_end - xdp.data;
			break;
		case XDP_TX:
			orig_frame = *frame;
			xdp.data_hard_start = head;
			xdp.rxq->mem = frame->mem;
			if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
				trace_xdp_exception(priv->dev, xdp_prog, act);
				frame = &orig_frame;
				goto err_xdp;
			}
			*xdp_xmit |= VETH_XDP_TX;
			rcu_read_unlock();
			goto xdp_xmit;
		case XDP_REDIRECT:
			orig_frame = *frame;
			xdp.data_hard_start = head;
			xdp.rxq->mem = frame->mem;
			if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) {
				frame = &orig_frame;
				goto err_xdp;
			}
			*xdp_xmit |= VETH_XDP_REDIR;
			rcu_read_unlock();
			goto xdp_xmit;
		default:
			bpf_warn_invalid_xdp_action(act);
		case XDP_ABORTED:
@@ -370,12 +430,13 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
err_xdp:
	rcu_read_unlock();
	xdp_return_frame(frame);

xdp_xmit:
	return NULL;
}

static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
					struct sk_buff *skb)
					struct sk_buff *skb,
					unsigned int *xdp_xmit)
{
	u32 pktlen, headroom, act, metalen;
	void *orig_data, *orig_data_end;
@@ -447,6 +508,26 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
	switch (act) {
	case XDP_PASS:
		break;
	case XDP_TX:
		get_page(virt_to_page(xdp.data));
		consume_skb(skb);
		xdp.rxq->mem = priv->xdp_mem;
		if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
			trace_xdp_exception(priv->dev, xdp_prog, act);
			goto err_xdp;
		}
		*xdp_xmit |= VETH_XDP_TX;
		rcu_read_unlock();
		goto xdp_xmit;
	case XDP_REDIRECT:
		get_page(virt_to_page(xdp.data));
		consume_skb(skb);
		xdp.rxq->mem = priv->xdp_mem;
		if (xdp_do_redirect(priv->dev, &xdp, xdp_prog))
			goto err_xdp;
		*xdp_xmit |= VETH_XDP_REDIR;
		rcu_read_unlock();
		goto xdp_xmit;
	default:
		bpf_warn_invalid_xdp_action(act);
	case XDP_ABORTED:
@@ -477,9 +558,15 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
	rcu_read_unlock();
	kfree_skb(skb);
	return NULL;
err_xdp:
	rcu_read_unlock();
	page_frag_free(xdp.data);
xdp_xmit:
	return NULL;
}

static int veth_xdp_rcv(struct veth_priv *priv, int budget)
static int veth_xdp_rcv(struct veth_priv *priv, int budget,
			unsigned int *xdp_xmit)
{
	int i, done = 0;

@@ -490,10 +577,12 @@ static int veth_xdp_rcv(struct veth_priv *priv, int budget)
		if (!ptr)
			break;

		if (veth_is_xdp_frame(ptr))
			skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr));
		else
			skb = veth_xdp_rcv_skb(priv, ptr);
		if (veth_is_xdp_frame(ptr)) {
			skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr),
					       xdp_xmit);
		} else {
			skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit);
		}

		if (skb)
			napi_gro_receive(&priv->xdp_napi, skb);
@@ -508,9 +597,11 @@ static int veth_poll(struct napi_struct *napi, int budget)
{
	struct veth_priv *priv =
		container_of(napi, struct veth_priv, xdp_napi);
	unsigned int xdp_xmit = 0;
	int done;

	done = veth_xdp_rcv(priv, budget);
	xdp_set_return_frame_no_direct();
	done = veth_xdp_rcv(priv, budget, &xdp_xmit);

	if (done < budget && napi_complete_done(napi, done)) {
		/* Write rx_notify_masked before reading ptr_ring */
@@ -521,6 +612,12 @@ static int veth_poll(struct napi_struct *napi, int budget)
		}
	}

	if (xdp_xmit & VETH_XDP_TX)
		veth_xdp_flush(priv->dev);
	if (xdp_xmit & VETH_XDP_REDIR)
		xdp_do_flush_map();
	xdp_clear_return_frame_no_direct();

	return done;
}

@@ -567,6 +664,9 @@ static int veth_enable_xdp(struct net_device *dev)
		err = veth_napi_add(dev);
		if (err)
			goto err;

		/* Save original mem info as it can be overwritten */
		priv->xdp_mem = priv->xdp_rxq.mem;
	}

	rcu_assign_pointer(priv->xdp_prog, priv->_xdp_prog);
@@ -584,6 +684,7 @@ static void veth_disable_xdp(struct net_device *dev)

	rcu_assign_pointer(priv->xdp_prog, NULL);
	veth_napi_del(dev);
	priv->xdp_rxq.mem = priv->xdp_mem;
	xdp_rxq_info_unreg(&priv->xdp_rxq);
}