Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2798b80b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'eBPF-based-device-cgroup-controller'

Roman Gushchin says:

====================
eBPF-based device cgroup controller

This patchset introduces an eBPF-based device controller for cgroup v2.

Patches (1) and (2) are a preparational work required to share some code
  with the existing device controller implementation.
Patch (3) is the main patch, which introduces a new bpf prog type
  and all necessary infrastructure.
Patch (4) moves cgroup_helpers.c/h to use them by patch (4).
Patch (5) implements an example of eBPF program which controls access
  to device files and corresponding userspace test.

v3:
  Renamed constants introduced by patch (3) to BPF_DEVCG_*

v2:
  Added patch (1).

v1:
  https://lkml.org/lkml/2017/11/1/363


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 488e5b30 37f1ba09
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
				     struct bpf_sock_ops_kern *sock_ops,
				     enum bpf_attach_type type);

int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
				      short access, enum bpf_attach_type type);

/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
({									      \
@@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
	}								       \
	__ret;								       \
})

#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access)	      \
({									      \
	int __ret = 0;							      \
	if (cgroup_bpf_enabled)						      \
		__ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
							  access,	      \
							  BPF_CGROUP_DEVICE); \
									      \
	__ret;								      \
})
#else

struct cgroup_bpf {};
@@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })

#endif /* CONFIG_CGROUP_BPF */

+3 −0
Original line number Diff line number Diff line
@@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
#endif

BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
+63 −4
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/fs.h>
#include <linux/bpf-cgroup.h>

#define DEVCG_ACC_MKNOD 1
#define DEVCG_ACC_READ  2
#define DEVCG_ACC_WRITE 4
#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE)

#define DEVCG_DEV_BLOCK 1
#define DEVCG_DEV_CHAR  2
#define DEVCG_DEV_ALL   4  /* this represents all devices */

#ifdef CONFIG_CGROUP_DEVICE
extern int __devcgroup_inode_permission(struct inode *inode, int mask);
extern int devcgroup_inode_mknod(int mode, dev_t dev);
extern int __devcgroup_check_permission(short type, u32 major, u32 minor,
					short access);
#else
static inline int __devcgroup_check_permission(short type, u32 major, u32 minor,
					       short access)
{ return 0; }
#endif

#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
static inline int devcgroup_check_permission(short type, u32 major, u32 minor,
					     short access)
{
	int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);

	if (rc)
		return -EPERM;

	return __devcgroup_check_permission(type, major, minor, access);
}

static inline int devcgroup_inode_permission(struct inode *inode, int mask)
{
	short type, access = 0;

	if (likely(!inode->i_rdev))
		return 0;
	if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))

	if (S_ISBLK(inode->i_mode))
		type = DEVCG_DEV_BLOCK;
	else if (S_ISCHR(inode->i_mode))
		type = DEVCG_DEV_CHAR;
	else
		return 0;

	if (mask & MAY_WRITE)
		access |= DEVCG_ACC_WRITE;
	if (mask & MAY_READ)
		access |= DEVCG_ACC_READ;

	return devcgroup_check_permission(type, imajor(inode), iminor(inode),
					  access);
}

static inline int devcgroup_inode_mknod(int mode, dev_t dev)
{
	short type;

	if (!S_ISBLK(mode) && !S_ISCHR(mode))
		return 0;
	return __devcgroup_inode_permission(inode, mask);

	if (S_ISBLK(mode))
		type = DEVCG_DEV_BLOCK;
	else
		type = DEVCG_DEV_CHAR;

	return devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
					  DEVCG_ACC_MKNOD);
}

#else
static inline int devcgroup_inode_permission(struct inode *inode, int mask)
{ return 0; }
+15 −0
Original line number Diff line number Diff line
@@ -132,6 +132,7 @@ enum bpf_prog_type {
	BPF_PROG_TYPE_LWT_XMIT,
	BPF_PROG_TYPE_SOCK_OPS,
	BPF_PROG_TYPE_SK_SKB,
	BPF_PROG_TYPE_CGROUP_DEVICE,
};

enum bpf_attach_type {
@@ -141,6 +142,7 @@ enum bpf_attach_type {
	BPF_CGROUP_SOCK_OPS,
	BPF_SK_SKB_STREAM_PARSER,
	BPF_SK_SKB_STREAM_VERDICT,
	BPF_CGROUP_DEVICE,
	__MAX_BPF_ATTACH_TYPE
};

@@ -991,4 +993,17 @@ struct bpf_perf_event_value {
	__u64 running;
};

#define BPF_DEVCG_ACC_MKNOD	(1ULL << 0)
#define BPF_DEVCG_ACC_READ	(1ULL << 1)
#define BPF_DEVCG_ACC_WRITE	(1ULL << 2)

#define BPF_DEVCG_DEV_BLOCK	(1ULL << 0)
#define BPF_DEVCG_DEV_CHAR	(1ULL << 1)

struct bpf_cgroup_dev_ctx {
	__u32 access_type; /* (access << 16) | type */
	__u32 major;
	__u32 minor;
};

#endif /* _UAPI__LINUX_BPF_H__ */
+67 −0
Original line number Diff line number Diff line
@@ -522,3 +522,70 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
	return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);

int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
				      short access, enum bpf_attach_type type)
{
	struct cgroup *cgrp;
	struct bpf_cgroup_dev_ctx ctx = {
		.access_type = (access << 16) | dev_type,
		.major = major,
		.minor = minor,
	};
	int allow = 1;

	rcu_read_lock();
	cgrp = task_dfl_cgroup(current);
	allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
				   BPF_PROG_RUN);
	rcu_read_unlock();

	return !allow;
}
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);

static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id)
{
	switch (func_id) {
	case BPF_FUNC_map_lookup_elem:
		return &bpf_map_lookup_elem_proto;
	case BPF_FUNC_map_update_elem:
		return &bpf_map_update_elem_proto;
	case BPF_FUNC_map_delete_elem:
		return &bpf_map_delete_elem_proto;
	case BPF_FUNC_get_current_uid_gid:
		return &bpf_get_current_uid_gid_proto;
	case BPF_FUNC_trace_printk:
		if (capable(CAP_SYS_ADMIN))
			return bpf_get_trace_printk_proto();
	default:
		return NULL;
	}
}

static bool cgroup_dev_is_valid_access(int off, int size,
				       enum bpf_access_type type,
				       struct bpf_insn_access_aux *info)
{
	if (type == BPF_WRITE)
		return false;

	if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
		return false;
	/* The verifier guarantees that size > 0. */
	if (off % size != 0)
		return false;
	if (size != sizeof(__u32))
		return false;

	return true;
}

const struct bpf_prog_ops cg_dev_prog_ops = {
};

const struct bpf_verifier_ops cg_dev_verifier_ops = {
	.get_func_proto		= cgroup_dev_func_proto,
	.is_valid_access	= cgroup_dev_is_valid_access,
};
Loading