Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b5b5eca9 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bpf-support-for-sockets'



David Ahern says:

====================
net: Add bpf support for sockets

The recently added VRF support in Linux leverages the bind-to-device
API for programs to specify an L3 domain for a socket. While
SO_BINDTODEVICE has been around for ages, not every ipv4/ipv6 capable
program has support for it. Even for those programs that do support it,
the API requires processes to be started as root (CAP_NET_RAW) which
is not desirable from a general security perspective.

This patch set leverages Daniel Mack's work to attach bpf programs to
a cgroup to provide a capability to set sk_bound_dev_if for all
AF_INET{6} sockets opened by a process in a cgroup when the sockets
are allocated.

For example:
 1. configure vrf (e.g., using ifupdown2)
        auto eth0
        iface eth0 inet dhcp
            vrf mgmt

        auto mgmt
        iface mgmt
            vrf-table auto

 2. configure cgroup
        mount -t cgroup2 none /tmp/cgroupv2
        mkdir /tmp/cgroupv2/mgmt
        test_cgrp2_sock /tmp/cgroupv2/mgmt 15

 3. set shell into cgroup (e.g., can be done at login using pam)
        echo $$ >> /tmp/cgroupv2/mgmt/cgroup.procs

At this point all commands run in the shell (e.g, apt) have sockets
automatically bound to the VRF (see output of ss -ap 'dev == <vrf>'),
including processes not running as root.

This capability enables running any program in a VRF context and is key
to deploying Management VRF, a fundamental configuration for networking
gear, with any Linux OS installation.

This patchset also exports the socket family, type and protocol as
read-only allowing bpf filters to deny a process in a cgroup the ability
to open specific types of AF_INET or AF_INET6 sockets.

v7
- comments from Alexei

v6
- add export of socket family, type and protocol
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7f7bf160 554ae6e7
Loading
Loading
Loading
Loading
+37 −23
Original line number Diff line number Diff line
@@ -36,16 +36,19 @@ void cgroup_bpf_update(struct cgroup *cgrp,
		       struct bpf_prog *prog,
		       enum bpf_attach_type type);

int __cgroup_bpf_run_filter(struct sock *sk,
int __cgroup_bpf_run_filter_skb(struct sock *sk,
				struct sk_buff *skb,
				enum bpf_attach_type type);

/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
int __cgroup_bpf_run_filter_sk(struct sock *sk,
			       enum bpf_attach_type type);

/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
({									      \
	int __ret = 0;							      \
	if (cgroup_bpf_enabled)						      \
		__ret = __cgroup_bpf_run_filter(sk, skb,		\
		__ret = __cgroup_bpf_run_filter_skb(sk, skb,		      \
						    BPF_CGROUP_INET_INGRESS); \
									      \
	__ret;								      \
@@ -57,12 +60,22 @@ int __cgroup_bpf_run_filter(struct sock *sk,
	if (cgroup_bpf_enabled && sk && sk == skb->sk) {		       \
		typeof(sk) __sk = sk_to_full_sk(sk);			       \
		if (sk_fullsock(__sk))					       \
			__ret = __cgroup_bpf_run_filter(__sk, skb,	\
			__ret = __cgroup_bpf_run_filter_skb(__sk, skb,	       \
						      BPF_CGROUP_INET_EGRESS); \
	}								       \
	__ret;								       \
})

#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
({									       \
	int __ret = 0;							       \
	if (cgroup_bpf_enabled && sk) {					       \
		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
						 BPF_CGROUP_INET_SOCK_CREATE); \
	}								       \
	__ret;								       \
})

#else

struct cgroup_bpf {};
@@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,

#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })

#endif /* CONFIG_CGROUP_BPF */

+15 −0
Original line number Diff line number Diff line
@@ -389,6 +389,21 @@ struct sock {
	 * Because of non atomicity rules, all
	 * changes are protected by socket lock.
	 */
	unsigned int		__sk_flags_offset[0];
#ifdef __BIG_ENDIAN_BITFIELD
#define SK_FL_PROTO_SHIFT  16
#define SK_FL_PROTO_MASK   0x00ff0000

#define SK_FL_TYPE_SHIFT   0
#define SK_FL_TYPE_MASK    0x0000ffff
#else
#define SK_FL_PROTO_SHIFT  8
#define SK_FL_PROTO_MASK   0x0000ff00

#define SK_FL_TYPE_SHIFT   16
#define SK_FL_TYPE_MASK    0xffff0000
#endif

	kmemcheck_bitfield_begin(flags);
	unsigned int		sk_padding : 2,
				sk_no_check_tx : 1,
+9 −0
Original line number Diff line number Diff line
@@ -101,6 +101,7 @@ enum bpf_prog_type {
	BPF_PROG_TYPE_XDP,
	BPF_PROG_TYPE_PERF_EVENT,
	BPF_PROG_TYPE_CGROUP_SKB,
	BPF_PROG_TYPE_CGROUP_SOCK,
	BPF_PROG_TYPE_LWT_IN,
	BPF_PROG_TYPE_LWT_OUT,
	BPF_PROG_TYPE_LWT_XMIT,
@@ -109,6 +110,7 @@ enum bpf_prog_type {
enum bpf_attach_type {
	BPF_CGROUP_INET_INGRESS,
	BPF_CGROUP_INET_EGRESS,
	BPF_CGROUP_INET_SOCK_CREATE,
	__MAX_BPF_ATTACH_TYPE
};

@@ -567,6 +569,13 @@ enum bpf_ret_code {
	/* >127 are reserved for prog type specific return codes */
};

struct bpf_sock {
	__u32 bound_dev_if;
	__u32 family;
	__u32 type;
	__u32 protocol;
};

/* User return codes for XDP prog type.
 * A valid XDP program must return one of these defined values. All other
 * return codes are reserved for future use. Unknown return codes will result
+38 −5
Original line number Diff line number Diff line
@@ -118,7 +118,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
}

/**
 * __cgroup_bpf_run_filter() - Run a program for packet filtering
 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
 * @sk: The socken sending or receiving traffic
 * @skb: The skb that is being sent or received
 * @type: The type of program to be exectuted
@@ -132,7 +132,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
 * This function will return %-EPERM if any if an attached program was found
 * and if it returned != 1 during execution. In all other cases, 0 is returned.
 */
int __cgroup_bpf_run_filter(struct sock *sk,
int __cgroup_bpf_run_filter_skb(struct sock *sk,
				struct sk_buff *skb,
				enum bpf_attach_type type)
{
@@ -164,4 +164,37 @@ int __cgroup_bpf_run_filter(struct sock *sk,

	return ret;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter);
EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);

/**
 * __cgroup_bpf_run_filter_sk() - Run a program on a sock
 * @sk: sock structure to manipulate
 * @type: The type of program to be exectuted
 *
 * socket is passed is expected to be of type INET or INET6.
 *
 * The program type passed in via @type must be suitable for sock
 * filtering. No further check is performed to assert that.
 *
 * This function will return %-EPERM if any if an attached program was found
 * and if it returned != 1 during execution. In all other cases, 0 is returned.
 */
int __cgroup_bpf_run_filter_sk(struct sock *sk,
			       enum bpf_attach_type type)
{
	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
	struct bpf_prog *prog;
	int ret = 0;


	rcu_read_lock();

	prog = rcu_dereference(cgrp->bpf.effective[type]);
	if (prog)
		ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;

	rcu_read_unlock();

	return ret;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
+19 −14
Original line number Diff line number Diff line
@@ -856,6 +856,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
{
	struct bpf_prog *prog;
	struct cgroup *cgrp;
	enum bpf_prog_type ptype;

	if (!capable(CAP_NET_ADMIN))
		return -EPERM;
@@ -866,8 +867,16 @@ static int bpf_prog_attach(const union bpf_attr *attr)
	switch (attr->attach_type) {
	case BPF_CGROUP_INET_INGRESS:
	case BPF_CGROUP_INET_EGRESS:
		prog = bpf_prog_get_type(attr->attach_bpf_fd,
					 BPF_PROG_TYPE_CGROUP_SKB);
		ptype = BPF_PROG_TYPE_CGROUP_SKB;
		break;
	case BPF_CGROUP_INET_SOCK_CREATE:
		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
		break;
	default:
		return -EINVAL;
	}

	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
	if (IS_ERR(prog))
		return PTR_ERR(prog);

@@ -879,11 +888,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)

	cgroup_bpf_update(cgrp, prog, attr->attach_type);
	cgroup_put(cgrp);
		break;

	default:
		return -EINVAL;
	}

	return 0;
}
@@ -903,6 +907,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
	switch (attr->attach_type) {
	case BPF_CGROUP_INET_INGRESS:
	case BPF_CGROUP_INET_EGRESS:
	case BPF_CGROUP_INET_SOCK_CREATE:
		cgrp = cgroup_get_from_fd(attr->target_fd);
		if (IS_ERR(cgrp))
			return PTR_ERR(cgrp);
Loading