Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a10cc847 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'virtio_net-Fix-problems-around-XDP-tx-and-napi_tx'



Toshiaki Makita says:

====================
virtio_net: Fix problems around XDP tx and napi_tx

While I'm looking into how to account standard tx counters on XDP tx
processing, I found several bugs around XDP tx and napi_tx.

Patch1: Fix oops on error path. Patch2 depends on this.
Patch2: Fix memory corruption on freeing xdp_frames with napi_tx enabled.
Patch3: Minor fix patch5 depends on.
Patch4: Fix memory corruption on processing xdp_frames when XDP is disabled.
  Also patch5 depends on this.
Patch5: Fix memory corruption on processing xdp_frames while XDP is being
  disabled.
Patch6: Minor fix patch7 depends on.
Patch7: Fix memory corruption on freeing sk_buff or xdp_frames when a normal
  queue is reused for XDP and vise versa.

v2:
- patch5: Make rcu_assign_pointer/synchronize_net conditional instead of
          _virtnet_set_queues.
- patch7: Use napi_consume_skb() instead of dev_consume_skb_any()
====================

Signed-off-by: default avatarToshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 41ef81be 5050471d
Loading
Loading
Loading
Loading
+112 −47
Original line number Diff line number Diff line
@@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644);
#define VIRTIO_XDP_TX		BIT(0)
#define VIRTIO_XDP_REDIR	BIT(1)

#define VIRTIO_XDP_FLAG	BIT(0)

/* RX packet size EWMA. The average packet size is used to determine the packet
 * buffer size when refilling RX rings. As the entire RX ring may be refilled
 * at once, the weight is chosen so that the EWMA will be insensitive to short-
@@ -252,6 +254,21 @@ struct padded_vnet_hdr {
	char padding[4];
};

static bool is_xdp_frame(void *ptr)
{
	return (unsigned long)ptr & VIRTIO_XDP_FLAG;
}

static void *xdp_to_ptr(struct xdp_frame *ptr)
{
	return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
}

static struct xdp_frame *ptr_to_xdp(void *ptr)
{
	return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
}

/* Converting between virtqueue no. and kernel tx/rx queue no.
 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
 */
@@ -462,7 +479,8 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,

	sg_init_one(sq->sg, xdpf->data, xdpf->len);

	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
				   GFP_ATOMIC);
	if (unlikely(err))
		return -ENOSPC; /* Caller handle free/refcnt */

@@ -482,36 +500,37 @@ static int virtnet_xdp_xmit(struct net_device *dev,
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct receive_queue *rq = vi->rq;
	struct xdp_frame *xdpf_sent;
	struct bpf_prog *xdp_prog;
	struct send_queue *sq;
	unsigned int len;
	int drops = 0;
	int kicks = 0;
	int ret, err;
	void *ptr;
	int i;

	sq = virtnet_xdp_sq(vi);

	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
		ret = -EINVAL;
		drops = n;
		goto out;
	}

	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
	 * indicate XDP resources have been successfully allocated.
	 */
	xdp_prog = rcu_dereference(rq->xdp_prog);
	if (!xdp_prog) {
		ret = -ENXIO;
	if (!xdp_prog)
		return -ENXIO;

	sq = virtnet_xdp_sq(vi);

	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
		ret = -EINVAL;
		drops = n;
		goto out;
	}

	/* Free up any pending old buffers before queueing new ones. */
	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
		xdp_return_frame(xdpf_sent);
	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
		if (likely(is_xdp_frame(ptr)))
			xdp_return_frame(ptr_to_xdp(ptr));
		else
			napi_consume_skb(ptr, false);
	}

	for (i = 0; i < n; i++) {
		struct xdp_frame *xdpf = frames[i];
@@ -1332,18 +1351,26 @@ static int virtnet_receive(struct receive_queue *rq, int budget,

static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
{
	struct sk_buff *skb;
	unsigned int len;
	unsigned int packets = 0;
	unsigned int bytes = 0;
	void *ptr;

	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
		if (likely(!is_xdp_frame(ptr))) {
			struct sk_buff *skb = ptr;

	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
			pr_debug("Sent skb %p\n", skb);

			bytes += skb->len;
		packets++;

			napi_consume_skb(skb, in_napi);
		} else {
			struct xdp_frame *frame = ptr_to_xdp(ptr);

			bytes += frame->len;
			xdp_return_frame(frame);
		}
		packets++;
	}

	/* Avoid overhead when no packets have been processed
@@ -1358,6 +1385,16 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
	u64_stats_update_end(&sq->stats.syncp);
}

static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
{
	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
		return false;
	else if (q < vi->curr_queue_pairs)
		return true;
	else
		return false;
}

static void virtnet_poll_cleantx(struct receive_queue *rq)
{
	struct virtnet_info *vi = rq->vq->vdev->priv;
@@ -1365,7 +1402,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
	struct send_queue *sq = &vi->sq[index];
	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);

	if (!sq->napi.weight)
	if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
		return;

	if (__netif_tx_trylock(txq)) {
@@ -1442,8 +1479,16 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
{
	struct send_queue *sq = container_of(napi, struct send_queue, napi);
	struct virtnet_info *vi = sq->vq->vdev->priv;
	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
	unsigned int index = vq2txq(sq->vq);
	struct netdev_queue *txq;

	if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
		/* We don't need to enable cb for XDP */
		napi_complete_done(napi, 0);
		return 0;
	}

	txq = netdev_get_tx_queue(vi->dev, index);
	__netif_tx_lock(txq, raw_smp_processor_id());
	free_old_xmit_skbs(sq, true);
	__netif_tx_unlock(txq);
@@ -2395,6 +2440,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
		return -ENOMEM;
	}

	old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
	if (!prog && !old_prog)
		return 0;

	if (prog) {
		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
		if (IS_ERR(prog))
@@ -2402,36 +2451,62 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
	}

	/* Make sure NAPI is not using any XDP TX queues for RX. */
	if (netif_running(dev))
		for (i = 0; i < vi->max_queue_pairs; i++)
	if (netif_running(dev)) {
		for (i = 0; i < vi->max_queue_pairs; i++) {
			napi_disable(&vi->rq[i].napi);
			virtnet_napi_tx_disable(&vi->sq[i].napi);
		}
	}

	if (!prog) {
		for (i = 0; i < vi->max_queue_pairs; i++) {
			rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
			if (i == 0)
				virtnet_restore_guest_offloads(vi);
		}
		synchronize_net();
	}

	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
	err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
	if (err)
		goto err;
	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
	vi->xdp_queue_pairs = xdp_qp;

	if (prog) {
		for (i = 0; i < vi->max_queue_pairs; i++) {
		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
			rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
		if (i == 0) {
			if (!old_prog)
			if (i == 0 && !old_prog)
				virtnet_clear_guest_offloads(vi);
			if (!prog)
				virtnet_restore_guest_offloads(vi);
		}
	}

	for (i = 0; i < vi->max_queue_pairs; i++) {
		if (old_prog)
			bpf_prog_put(old_prog);
		if (netif_running(dev))
		if (netif_running(dev)) {
			virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
			virtnet_napi_tx_enable(vi, vi->sq[i].vq,
					       &vi->sq[i].napi);
		}
	}

	return 0;

err:
	if (!prog) {
		virtnet_clear_guest_offloads(vi);
		for (i = 0; i < vi->max_queue_pairs; i++)
			rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
	}

	if (netif_running(dev)) {
		for (i = 0; i < vi->max_queue_pairs; i++) {
			virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
			virtnet_napi_tx_enable(vi, vi->sq[i].vq,
					       &vi->sq[i].napi);
		}
	}
	if (prog)
		bpf_prog_sub(prog, vi->max_queue_pairs - 1);
	return err;
@@ -2613,16 +2688,6 @@ static void free_receive_page_frags(struct virtnet_info *vi)
			put_page(vi->rq[i].alloc_frag.page);
}

static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
{
	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
		return false;
	else if (q < vi->curr_queue_pairs)
		return true;
	else
		return false;
}

static void free_unused_bufs(struct virtnet_info *vi)
{
	void *buf;
@@ -2631,10 +2696,10 @@ static void free_unused_bufs(struct virtnet_info *vi)
	for (i = 0; i < vi->max_queue_pairs; i++) {
		struct virtqueue *vq = vi->sq[i].vq;
		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
			if (!is_xdp_raw_buffer_queue(vi, i))
			if (!is_xdp_frame(buf))
				dev_kfree_skb(buf);
			else
				put_page(virt_to_head_page(buf));
				xdp_return_frame(ptr_to_xdp(buf));
		}
	}