Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7b146ceb authored by Andrey Ignatov's avatar Andrey Ignatov Committed by Alexei Starovoitov
Browse files

bpf: Sysctl hook



Containerized applications may run as root and it may create problems
for whole host. Specifically such applications may change a sysctl and
affect applications in other containers.

Furthermore in existing infrastructure it may not be possible to just
completely disable writing to sysctl, instead such a process should be
gradual with ability to log what sysctl are being changed by a
container, investigate, limit the set of writable sysctl to currently
used ones (so that new ones can not be changed) and eventually reduce
this set to zero.

The patch introduces new program type BPF_PROG_TYPE_CGROUP_SYSCTL and
attach type BPF_CGROUP_SYSCTL to solve these problems on cgroup basis.

New program type has access to following minimal context:
	struct bpf_sysctl {
		__u32	write;
	};

Where @write indicates whether sysctl is being read (= 0) or written (=
1).

Helpers to access sysctl name and value will be introduced separately.

BPF_CGROUP_SYSCTL attach point is added to sysctl code right before
passing control to ctl_table->proc_handler so that BPF program can
either allow or deny access to sysctl.

Suggested-by: default avatarRoman Gushchin <guro@fb.com>
Signed-off-by: default avatarAndrey Ignatov <rdna@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent b1cd609d
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
#include "internal.h"

static const struct dentry_operations proc_sys_dentry_operations;
@@ -588,6 +589,10 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
	if (!table->proc_handler)
		goto out;

	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write);
	if (error)
		goto out;

	/* careful: calling conventions are nasty here */
	res = count;
	error = table->proc_handler(table, write, buf, &res, ppos);
+18 −0
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@ struct bpf_map;
struct bpf_prog;
struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
struct ctl_table;
struct ctl_table_header;

#ifdef CONFIG_CGROUP_BPF

@@ -109,6 +111,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
				      short access, enum bpf_attach_type type);

int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
				   struct ctl_table *table, int write,
				   enum bpf_attach_type type);

static inline enum bpf_cgroup_storage_type cgroup_storage_type(
	struct bpf_map *map)
{
@@ -253,6 +259,17 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
									      \
	__ret;								      \
})


#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write)			       \
({									       \
	int __ret = 0;							       \
	if (cgroup_bpf_enabled)						       \
		__ret = __cgroup_bpf_run_filter_sysctl(head, table, write,     \
						       BPF_CGROUP_SYSCTL);     \
	__ret;								       \
})

int cgroup_bpf_prog_attach(const union bpf_attr *attr,
			   enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
@@ -321,6 +338,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; })

#define for_each_cgroup_storage_type(stype) for (; false; )

+1 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
#endif
#ifdef CONFIG_BPF_LIRC_MODE2
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
+8 −0
Original line number Diff line number Diff line
@@ -33,6 +33,8 @@ struct bpf_prog_aux;
struct xdp_rxq_info;
struct xdp_buff;
struct sock_reuseport;
struct ctl_table;
struct ctl_table_header;

/* ArgX, context and stack frame pointer register positions. Note,
 * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -1177,4 +1179,10 @@ struct bpf_sock_ops_kern {
					 */
};

struct bpf_sysctl_kern {
	struct ctl_table_header *head;
	struct ctl_table *table;
	int write;
};

#endif /* __LINUX_FILTER_H__ */
+9 −0
Original line number Diff line number Diff line
@@ -167,6 +167,7 @@ enum bpf_prog_type {
	BPF_PROG_TYPE_LIRC_MODE2,
	BPF_PROG_TYPE_SK_REUSEPORT,
	BPF_PROG_TYPE_FLOW_DISSECTOR,
	BPF_PROG_TYPE_CGROUP_SYSCTL,
};

enum bpf_attach_type {
@@ -188,6 +189,7 @@ enum bpf_attach_type {
	BPF_CGROUP_UDP6_SENDMSG,
	BPF_LIRC_MODE2,
	BPF_FLOW_DISSECTOR,
	BPF_CGROUP_SYSCTL,
	__MAX_BPF_ATTACH_TYPE
};

@@ -3308,4 +3310,11 @@ struct bpf_line_info {
struct bpf_spin_lock {
	__u32	val;
};

struct bpf_sysctl {
	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
				 * Allows 1,2,4-byte read, but no write.
				 */
};

#endif /* _UAPI__LINUX_BPF_H__ */
Loading