Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ef70f9a2 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bpf-sockmap'

John Fastabend says:

====================
BPF: sockmap and sk redirect support

This series implements a sockmap and socket redirect helper for BPF
using a model similar to XDP netdev redirect. A sockmap is a BPF map
type that holds references to sock structs. Then with a new sk
redirect bpf helper BPF programs can use the map to redirect skbs
between sockets,

      bpf_sk_redirect_map(map, key, flags)

Finally, we need a call site to attach our BPF logic to do socket
redirects. We added hooks to recv_sock using the existing strparser
infrastructure to do this. The call site is added via the BPF attach
map call. To enable users to use this infrastructure a new BPF program
BPF_PROG_TYPE_SK_SKB is created that allows users to reference sock
details, such as port and ip address fields, to build useful socket
layer program. The sockmap datapath is as follows,

     recv -> strparser -> verdict/action

where this series implements the drop and redirect actions.
Additional, actions can be added as needed.

A sample program is provided to illustrate how a sockmap can
be integrated with cgroups and used to add/delete sockets in
a sockmap. The program is simple but should show many of the
key ideas.

To test this work test_maps in selftests/bpf was leveraged.
We added a set of tests to add sockets and do send/recv ops
on the sockets to ensure correct behavior. Additionally, the
selftests tests a series of negative test cases. We can expand
on this in the future.

I also have a basic test program I use with iperf/netperf
clients that could be sent as an additional sample if folks
want this. It needs a bit of cleanup to send to the list and
wasn't included in this series.

For people who prefer git over pulling patches out of their mail
editor I've posted the code here,

https://github.com/jrfastab/linux-kernel-xdp/tree/sockmap

For some background information on the genesis of this work
it might be helpful to review these slides from netconf 2017
by Thomas Graf,

http://vger.kernel.org/netconf2017.html
https://docs.google.com/a/covalent.io/presentation/d/1dwSKSBGpUHD3WO5xxzZWj8awV_-xL-oYhvqQMOBhhtk/edit?usp=sharing



Thanks to Daniel Borkmann for reviewing and providing initial
feedback.
====================

Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d369bcaf 6f6d33f3
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#include <linux/rbtree_latch.h>

struct perf_event;
struct bpf_prog;
struct bpf_map;

/* map is generic key/value storage optionally accesible by eBPF programs */
@@ -37,6 +38,8 @@ struct bpf_map_ops {
	void (*map_fd_put_ptr)(void *ptr);
	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
	u32 (*map_fd_sys_lookup_elem)(void *ptr);
	int (*map_attach)(struct bpf_map *map,
			  struct bpf_prog *p1, struct bpf_prog *p2);
};

struct bpf_map {
@@ -138,8 +141,6 @@ enum bpf_reg_type {
	PTR_TO_PACKET_END,	 /* skb->data + headlen */
};

struct bpf_prog;

/* The information passed from prog-specific *_is_valid_access
 * back to the verifier.
 */
@@ -252,6 +253,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
void bpf_prog_sub(struct bpf_prog *prog, int i);
struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
int __bpf_prog_charge(struct user_struct *user, u32 pages);
void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
@@ -311,6 +313,7 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);

/* Map specifics */
struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);

@@ -344,6 +347,12 @@ static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog)
	return ERR_PTR(-EOPNOTSUPP);
}

static inline struct bpf_prog *__must_check
bpf_prog_inc_not_zero(struct bpf_prog *prog)
{
	return ERR_PTR(-EOPNOTSUPP);
}

static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
{
	return 0;
@@ -384,6 +393,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;

/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
+2 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops)
#endif
#ifdef CONFIG_BPF_EVENTS
BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
@@ -37,4 +38,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
#ifdef CONFIG_NET
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
#endif
+2 −0
Original line number Diff line number Diff line
@@ -727,6 +727,8 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex);

struct sock *do_sk_redirect_map(void);

#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
extern int bpf_jit_harden;
+42 −1
Original line number Diff line number Diff line
@@ -110,6 +110,7 @@ enum bpf_map_type {
	BPF_MAP_TYPE_ARRAY_OF_MAPS,
	BPF_MAP_TYPE_HASH_OF_MAPS,
	BPF_MAP_TYPE_DEVMAP,
	BPF_MAP_TYPE_SOCKMAP,
};

enum bpf_prog_type {
@@ -127,6 +128,7 @@ enum bpf_prog_type {
	BPF_PROG_TYPE_LWT_OUT,
	BPF_PROG_TYPE_LWT_XMIT,
	BPF_PROG_TYPE_SOCK_OPS,
	BPF_PROG_TYPE_SK_SKB,
};

enum bpf_attach_type {
@@ -134,11 +136,15 @@ enum bpf_attach_type {
	BPF_CGROUP_INET_EGRESS,
	BPF_CGROUP_INET_SOCK_CREATE,
	BPF_CGROUP_SOCK_OPS,
	BPF_CGROUP_SMAP_INGRESS,
	__MAX_BPF_ATTACH_TYPE
};

#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE

/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */
#define BPF_SOCKMAP_STRPARSER	(1U << 0)

/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
 * to the given target_fd cgroup the descendent cgroup will be able to
 * override effective bpf program that was inherited from this cgroup
@@ -210,6 +216,7 @@ union bpf_attr {
		__u32		attach_bpf_fd;	/* eBPF program to attach */
		__u32		attach_type;
		__u32		attach_flags;
		__u32		attach_bpf_fd2;
	};

	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -556,6 +563,23 @@ union bpf_attr {
 *     @mode: operation mode (enum bpf_adj_room_mode)
 *     @flags: reserved for future use
 *     Return: 0 on success or negative error code
 *
 * int bpf_sk_redirect_map(map, key, flags)
 *     Redirect skb to a sock in map using key as a lookup key for the
 *     sock in map.
 *     @map: pointer to sockmap
 *     @key: key to lookup sock in map
 *     @flags: reserved for future use
 *     Return: SK_REDIRECT
 *
 * int bpf_sock_map_update(skops, map, key, flags, map_flags)
 *	@skops: pointer to bpf_sock_ops
 *	@map: pointer to sockmap to update
 *	@key: key to insert/update sock in map
 *	@flags: same flags as map update elem
 *	@map_flags: sock map specific flags
 *	   bit 1: Enable strparser
 *	   other bits: reserved
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -609,7 +633,9 @@ union bpf_attr {
	FN(set_hash),			\
	FN(setsockopt),			\
	FN(skb_adjust_room),		\
	FN(redirect_map),
	FN(redirect_map),		\
	FN(sk_redirect_map),		\
	FN(sock_map_update),		\

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
@@ -686,6 +712,15 @@ struct __sk_buff {
	__u32 data;
	__u32 data_end;
	__u32 napi_id;

	/* accessed by BPF_PROG_TYPE_sk_skb types */
	__u32 family;
	__u32 remote_ip4;	/* Stored in network byte order */
	__u32 local_ip4;	/* Stored in network byte order */
	__u32 remote_ip6[4];	/* Stored in network byte order */
	__u32 local_ip6[4];	/* Stored in network byte order */
	__u32 remote_port;	/* Stored in network byte order */
	__u32 local_port;	/* stored in host byte order */
};

struct bpf_tunnel_key {
@@ -746,6 +781,12 @@ struct xdp_md {
	__u32 data_end;
};

enum sk_action {
	SK_ABORTED = 0,
	SK_DROP,
	SK_REDIRECT,
};

#define BPF_TAG_SIZE	8

struct bpf_prog_info {
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += devmap.o sockmap.o
endif
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
Loading