Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 34f79502 authored by John Fastabend's avatar John Fastabend Committed by David S. Miller
Browse files

bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region



SK_SKB BPF programs are run from the socket/tcp context but early in
the stack before much of the TCP metadata is needed in tcp_skb_cb. So
we can use some unused fields to place BPF metadata needed for SK_SKB
programs when implementing the redirect function.

This allows us to drop the preempt disable logic. It does however
require an API change so sk_redirect_map() has been updated to
additionally provide ctx_ptr to skb. Note, we do however continue to
disable/enable preemption around actual BPF program running to account
for map updates.

Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 435bf0d3
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex);

struct sock *do_sk_redirect_map(void);
struct sock *do_sk_redirect_map(struct sk_buff *skb);

#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
+5 −0
Original line number Diff line number Diff line
@@ -840,6 +840,11 @@ struct tcp_skb_cb {
			struct inet6_skb_parm	h6;
#endif
		} header;	/* For incoming skbs */
		struct {
			__u32 key;
			__u32 flags;
			struct bpf_map *map;
		} bpf;
	};
};

+9 −10
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include <linux/workqueue.h>
#include <linux/list.h>
#include <net/strparser.h>
#include <net/tcp.h>

struct bpf_stab {
	struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
		return SK_DROP;

	skb_orphan(skb);
	/* We need to ensure that BPF metadata for maps is also cleared
	 * when we orphan the skb so that we don't have the possibility
	 * to reference a stale map.
	 */
	TCP_SKB_CB(skb)->bpf.map = NULL;
	skb->sk = psock->sock;
	bpf_compute_data_end(skb);
	preempt_disable();
	rc = (*prog->bpf_func)(skb, prog->insnsi);
	preempt_enable();
	skb->sk = NULL;

	return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
	struct sock *sk;
	int rc;

	/* Because we use per cpu values to feed input from sock redirect
	 * in BPF program to do_sk_redirect_map() call we need to ensure we
	 * are not preempted. RCU read lock is not sufficient in this case
	 * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
	 */
	preempt_disable();
	rc = smap_verdict_func(psock, skb);
	switch (rc) {
	case SK_REDIRECT:
		sk = do_sk_redirect_map();
		preempt_enable();
		sk = do_sk_redirect_map(skb);
		if (likely(sk)) {
			struct smap_psock *peer = smap_psock_sk(sk);

@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
	/* Fall through and free skb otherwise */
	case SK_DROP:
	default:
		if (rc != SK_REDIRECT)
			preempt_enable();
		kfree_skb(skb);
	}
}
+15 −14
Original line number Diff line number Diff line
@@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
	.arg2_type      = ARG_ANYTHING,
};

BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
	   struct bpf_map *, map, u32, key, u64, flags)
{
	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);

	if (unlikely(flags))
		return SK_ABORTED;

	ri->ifindex = key;
	ri->flags = flags;
	ri->map = map;
	tcb->bpf.key = key;
	tcb->bpf.flags = flags;
	tcb->bpf.map = map;

	return SK_REDIRECT;
}

struct sock *do_sk_redirect_map(void)
struct sock *do_sk_redirect_map(struct sk_buff *skb)
{
	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
	struct sock *sk = NULL;

	if (ri->map) {
		sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
	if (tcb->bpf.map) {
		sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);

		ri->ifindex = 0;
		ri->map = NULL;
		/* we do not clear flags for future lookup */
		tcb->bpf.key = 0;
		tcb->bpf.map = NULL;
	}

	return sk;
@@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
	.func           = bpf_sk_redirect_map,
	.gpl_only       = false,
	.ret_type       = RET_INTEGER,
	.arg1_type      = ARG_CONST_MAP_PTR,
	.arg2_type      = ARG_ANYTHING,
	.arg1_type	= ARG_PTR_TO_CTX,
	.arg2_type      = ARG_CONST_MAP_PTR,
	.arg3_type      = ARG_ANYTHING,
	.arg4_type      = ARG_ANYTHING,
};

BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
+1 −1
Original line number Diff line number Diff line
@@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb)
		ret = 1;

	bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
	return bpf_sk_redirect_map(&sock_map, ret, 0);
	return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
}

SEC("sockops")
Loading