Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d375e344 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'cg_skb_direct_pkt_access'



Song Liu says:

====================
Changes v7 -> v8:
1. Dynamically allocate the dummy sk to avoid race conditions.

Changes v6 -> v7:
1. Make dummy sk a global variable (test_run_sk).

Changes v5 -> v6:
1. Fixed dummy sk in bpf_prog_test_run_skb() as suggested by Eric Dumazet.

Changes v4 -> v5:
1. Replaced bpf_compute_and_save_data_pointers() with
   bpf_compute_and_save_data_end();
   Replaced bpf_restore_data_pointers() with bpf_restore_data_end().
2. Fixed indentation in test_verifier.c

Changes v3 -> v4:
1. Fixed crash issue reported by Alexei.

Changes v2 -> v3:
1. Added helper function bpf_compute_and_save_data_pointers() and
   bpf_restore_data_pointers().

Changes v1 -> v2:
1. Updated the list of read-only fields, and read-write fields.
2. Added dummy sk to bpf_prog_test_run_skb().

This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access
some __skb_buff data directly.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 2929ad29 2cb494a3
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
	cb->data_end  = skb->data + skb_headlen(skb);
}

/* Similar to bpf_compute_data_pointers(), except that save orginal
 * data in cb->data and cb->meta_data for restore.
 */
static inline void bpf_compute_and_save_data_end(
	struct sk_buff *skb, void **saved_data_end)
{
	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;

	*saved_data_end = cb->data_end;
	cb->data_end  = skb->data + skb_headlen(skb);
}

/* Restore data saved by bpf_compute_data_pointers(). */
static inline void bpf_restore_data_end(
	struct sk_buff *skb, void *saved_data_end)
{
	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;

	cb->data_end = saved_data_end;
}

static inline u8 *bpf_skb_cb(struct sk_buff *skb)
{
	/* eBPF programs may read/write skb->cb[] area to transfer meta
+6 −0
Original line number Diff line number Diff line
@@ -553,6 +553,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
{
	unsigned int offset = skb->data - skb_network_header(skb);
	struct sock *save_sk;
	void *saved_data_end;
	struct cgroup *cgrp;
	int ret;

@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
	save_sk = skb->sk;
	skb->sk = sk;
	__skb_push(skb, offset);

	/* compute pointers for the bpf prog */
	bpf_compute_and_save_data_end(skb, &saved_data_end);

	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
				 bpf_prog_run_save_cb);
	bpf_restore_data_end(skb, saved_data_end);
	__skb_pull(skb, offset);
	skb->sk = save_sk;
	return ret == 1 ? 0 : -EPERM;
+15 −0
Original line number Diff line number Diff line
@@ -10,6 +10,8 @@
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <linux/sched/signal.h>
#include <net/sock.h>
#include <net/tcp.h>

static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
@@ -115,6 +117,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
	u32 retval, duration;
	int hh_len = ETH_HLEN;
	struct sk_buff *skb;
	struct sock *sk;
	void *data;
	int ret;

@@ -137,11 +140,21 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
		break;
	}

	sk = kzalloc(sizeof(struct sock), GFP_USER);
	if (!sk) {
		kfree(data);
		return -ENOMEM;
	}
	sock_net_set(sk, current->nsproxy->net_ns);
	sock_init_data(NULL, sk);

	skb = build_skb(data, 0);
	if (!skb) {
		kfree(data);
		kfree(sk);
		return -ENOMEM;
	}
	skb->sk = sk;

	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
	__skb_put(skb, size);
@@ -159,6 +172,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,

			if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
				kfree_skb(skb);
				kfree(sk);
				return -ENOMEM;
			}
		}
@@ -171,6 +185,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
		size = skb_headlen(skb);
	ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
	kfree_skb(skb);
	kfree(sk);
	return ret;
}

+35 −1
Original line number Diff line number Diff line
@@ -5352,6 +5352,40 @@ static bool sk_filter_is_valid_access(int off, int size,
	return bpf_skb_is_valid_access(off, size, type, prog, info);
}

static bool cg_skb_is_valid_access(int off, int size,
				   enum bpf_access_type type,
				   const struct bpf_prog *prog,
				   struct bpf_insn_access_aux *info)
{
	switch (off) {
	case bpf_ctx_range(struct __sk_buff, tc_classid):
	case bpf_ctx_range(struct __sk_buff, data_meta):
	case bpf_ctx_range(struct __sk_buff, flow_keys):
		return false;
	}
	if (type == BPF_WRITE) {
		switch (off) {
		case bpf_ctx_range(struct __sk_buff, mark):
		case bpf_ctx_range(struct __sk_buff, priority):
		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
			break;
		default:
			return false;
		}
	}

	switch (off) {
	case bpf_ctx_range(struct __sk_buff, data):
		info->reg_type = PTR_TO_PACKET;
		break;
	case bpf_ctx_range(struct __sk_buff, data_end):
		info->reg_type = PTR_TO_PACKET_END;
		break;
	}

	return bpf_skb_is_valid_access(off, size, type, prog, info);
}

static bool lwt_is_valid_access(int off, int size,
				enum bpf_access_type type,
				const struct bpf_prog *prog,
@@ -7044,7 +7078,7 @@ const struct bpf_prog_ops xdp_prog_ops = {

const struct bpf_verifier_ops cg_skb_verifier_ops = {
	.get_func_proto		= cg_skb_func_proto,
	.is_valid_access	= sk_filter_is_valid_access,
	.is_valid_access	= cg_skb_is_valid_access,
	.convert_ctx_access	= bpf_convert_ctx_access,
};

+171 −0
Original line number Diff line number Diff line
@@ -4862,6 +4862,177 @@ static struct bpf_test tests[] = {
		.result = REJECT,
		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
	},
	{
		"direct packet read test#1 for CGROUP_SKB",
		.insns = {
			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
				    offsetof(struct __sk_buff, data)),
			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
				    offsetof(struct __sk_buff, data_end)),
			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
				    offsetof(struct __sk_buff, len)),
			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
				    offsetof(struct __sk_buff, pkt_type)),
			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
				    offsetof(struct __sk_buff, mark)),
			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
				    offsetof(struct __sk_buff, mark)),
			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
				    offsetof(struct __sk_buff, queue_mapping)),
			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
				    offsetof(struct __sk_buff, protocol)),
			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
				    offsetof(struct __sk_buff, vlan_present)),
			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
			BPF_MOV64_IMM(BPF_REG_0, 0),
			BPF_EXIT_INSN(),
		},
		.result = ACCEPT,
		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
	},
	{
		"direct packet read test#2 for CGROUP_SKB",
		.insns = {
			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
				    offsetof(struct __sk_buff, vlan_tci)),
			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
				    offsetof(struct __sk_buff, vlan_proto)),
			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
				    offsetof(struct __sk_buff, priority)),
			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
				    offsetof(struct __sk_buff, priority)),
			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
				    offsetof(struct __sk_buff,
					     ingress_ifindex)),
			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
				    offsetof(struct __sk_buff, tc_index)),
			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
				    offsetof(struct __sk_buff, hash)),
			BPF_MOV64_IMM(BPF_REG_0, 0),
			BPF_EXIT_INSN(),
		},
		.result = ACCEPT,
		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
	},
	{
		"direct packet read test#3 for CGROUP_SKB",
		.insns = {
			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
				    offsetof(struct __sk_buff, cb[0])),
			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
				    offsetof(struct __sk_buff, cb[1])),
			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
				    offsetof(struct __sk_buff, cb[2])),
			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
				    offsetof(struct __sk_buff, cb[3])),
			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
				    offsetof(struct __sk_buff, cb[4])),
			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
				    offsetof(struct __sk_buff, napi_id)),
			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
				    offsetof(struct __sk_buff, cb[0])),
			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
				    offsetof(struct __sk_buff, cb[1])),
			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
				    offsetof(struct __sk_buff, cb[2])),
			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
				    offsetof(struct __sk_buff, cb[3])),
			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
				    offsetof(struct __sk_buff, cb[4])),
			BPF_MOV64_IMM(BPF_REG_0, 0),
			BPF_EXIT_INSN(),
		},
		.result = ACCEPT,
		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
	},
	{
		"direct packet read test#4 for CGROUP_SKB",
		.insns = {
			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
				    offsetof(struct __sk_buff, family)),
			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
				    offsetof(struct __sk_buff, remote_ip4)),
			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
				    offsetof(struct __sk_buff, local_ip4)),
			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
				    offsetof(struct __sk_buff, remote_ip6[0])),
			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
				    offsetof(struct __sk_buff, remote_ip6[1])),
			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
				    offsetof(struct __sk_buff, remote_ip6[2])),
			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
				    offsetof(struct __sk_buff, remote_ip6[3])),
			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
				    offsetof(struct __sk_buff, local_ip6[0])),
			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
				    offsetof(struct __sk_buff, local_ip6[1])),
			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
				    offsetof(struct __sk_buff, local_ip6[2])),
			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
				    offsetof(struct __sk_buff, local_ip6[3])),
			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
				    offsetof(struct __sk_buff, remote_port)),
			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
				    offsetof(struct __sk_buff, local_port)),
			BPF_MOV64_IMM(BPF_REG_0, 0),
			BPF_EXIT_INSN(),
		},
		.result = ACCEPT,
		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
	},
	{
		"invalid access of tc_classid for CGROUP_SKB",
		.insns = {
			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
				    offsetof(struct __sk_buff, tc_classid)),
			BPF_MOV64_IMM(BPF_REG_0, 0),
			BPF_EXIT_INSN(),
		},
		.result = REJECT,
		.errstr = "invalid bpf_context access",
		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
	},
	{
		"invalid access of data_meta for CGROUP_SKB",
		.insns = {
			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
				    offsetof(struct __sk_buff, data_meta)),
			BPF_MOV64_IMM(BPF_REG_0, 0),
			BPF_EXIT_INSN(),
		},
		.result = REJECT,
		.errstr = "invalid bpf_context access",
		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
	},
	{
		"invalid access of flow_keys for CGROUP_SKB",
		.insns = {
			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
				    offsetof(struct __sk_buff, flow_keys)),
			BPF_MOV64_IMM(BPF_REG_0, 0),
			BPF_EXIT_INSN(),
		},
		.result = REJECT,
		.errstr = "invalid bpf_context access",
		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
	},
	{
		"invalid write access to napi_id for CGROUP_SKB",
		.insns = {
			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
				    offsetof(struct __sk_buff, napi_id)),
			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
				    offsetof(struct __sk_buff, napi_id)),
			BPF_MOV64_IMM(BPF_REG_0, 0),
			BPF_EXIT_INSN(),
		},
		.result = REJECT,
		.errstr = "invalid bpf_context access",
		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
	},
	{
		"valid cgroup storage access",
		.insns = {