Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 043d222f authored by Jason Wang's avatar Jason Wang Committed by David S. Miller
Browse files

tuntap: accept an array of XDP buffs through sendmsg()



This patch implement TUN_MSG_PTR msg_control type. This type allows
the caller to pass an array of XDP buffs to tuntap through ptr field
of the tun_msg_control. If an XDP program is attached, tuntap can run
XDP program directly. If not, tuntap will build skb and do a fast
receiving since part of the work has been done by vhost_net.

This will avoid lots of indirect calls thus improves the icache
utilization and allows to do XDP batched flushing when doing XDP
redirection.

Signed-off-by: default avatarJason Wang <jasowang@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent fe8dd45b
Loading
Loading
Loading
Loading
+114 −3
Original line number Diff line number Diff line
@@ -2426,22 +2426,133 @@ static void tun_sock_write_space(struct sock *sk)
	kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
}

static int tun_xdp_one(struct tun_struct *tun,
		       struct tun_file *tfile,
		       struct xdp_buff *xdp, int *flush)
{
	struct tun_xdp_hdr *hdr = xdp->data_hard_start;
	struct virtio_net_hdr *gso = &hdr->gso;
	struct tun_pcpu_stats *stats;
	struct bpf_prog *xdp_prog;
	struct sk_buff *skb = NULL;
	u32 rxhash = 0, act;
	int buflen = hdr->buflen;
	int err = 0;
	bool skb_xdp = false;

	xdp_prog = rcu_dereference(tun->xdp_prog);
	if (xdp_prog) {
		if (gso->gso_type) {
			skb_xdp = true;
			goto build;
		}
		xdp_set_data_meta_invalid(xdp);
		xdp->rxq = &tfile->xdp_rxq;

		act = bpf_prog_run_xdp(xdp_prog, xdp);
		err = tun_xdp_act(tun, xdp_prog, xdp, act);
		if (err < 0) {
			put_page(virt_to_head_page(xdp->data));
			return err;
		}

		switch (err) {
		case XDP_REDIRECT:
			*flush = true;
			/* fall through */
		case XDP_TX:
			return 0;
		case XDP_PASS:
			break;
		default:
			put_page(virt_to_head_page(xdp->data));
			return 0;
		}
	}

build:
	skb = build_skb(xdp->data_hard_start, buflen);
	if (!skb) {
		err = -ENOMEM;
		goto out;
	}

	skb_reserve(skb, xdp->data - xdp->data_hard_start);
	skb_put(skb, xdp->data_end - xdp->data);

	if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
		this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
		kfree_skb(skb);
		err = -EINVAL;
		goto out;
	}

	skb->protocol = eth_type_trans(skb, tun->dev);
	skb_reset_network_header(skb);
	skb_probe_transport_header(skb, 0);

	if (skb_xdp) {
		err = do_xdp_generic(xdp_prog, skb);
		if (err != XDP_PASS)
			goto out;
	}

	if (!rcu_dereference(tun->steering_prog))
		rxhash = __skb_get_hash_symmetric(skb);

	netif_receive_skb(skb);

	stats = get_cpu_ptr(tun->pcpu_stats);
	u64_stats_update_begin(&stats->syncp);
	stats->rx_packets++;
	stats->rx_bytes += skb->len;
	u64_stats_update_end(&stats->syncp);
	put_cpu_ptr(stats);

	if (rxhash)
		tun_flow_update(tun, rxhash, tfile);

out:
	return err;
}

static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
	int ret;
	int ret, i;
	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
	struct tun_struct *tun = tun_get(tfile);
	struct tun_msg_ctl *ctl = m->msg_control;
	struct xdp_buff *xdp;

	if (!tun)
		return -EBADFD;

	if (ctl && ctl->type != TUN_MSG_UBUF)
		return -EINVAL;
	if (ctl && (ctl->type == TUN_MSG_PTR)) {
		int n = ctl->num;
		int flush = 0;

		local_bh_disable();
		rcu_read_lock();

		for (i = 0; i < n; i++) {
			xdp = &((struct xdp_buff *)ctl->ptr)[i];
			tun_xdp_one(tun, tfile, xdp, &flush);
		}

		if (flush)
			xdp_do_flush_map();

		rcu_read_unlock();
		local_bh_enable();

		ret = total_len;
		goto out;
	}

	ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
			   m->msg_flags & MSG_DONTWAIT,
			   m->msg_flags & MSG_MORE);
out:
	tun_put(tun);
	return ret;
}