cgroup: add support for eBPF programs (30070984) · Commits · e / devices / android_kernel_teracube_emerald

include/linux/bpf-cgroup.h

0 → 100644

+79 −0

Original line number	Diff line number	Diff line
		#ifndef _BPF_CGROUP_H
		#define _BPF_CGROUP_H

		#include <linux/bpf.h>
		#include <linux/jump_label.h>
		#include <uapi/linux/bpf.h>

		struct sock;
		struct cgroup;
		struct sk_buff;

		#ifdef CONFIG_CGROUP_BPF

		extern struct static_key_false cgroup_bpf_enabled_key;
		#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)

		struct cgroup_bpf {
		/*
		* Store two sets of bpf_prog pointers, one for programs that are
		* pinned directly to this cgroup, and one for those that are effective
		* when this cgroup is accessed.
		*/
		struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
		struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
		};

		void cgroup_bpf_put(struct cgroup *cgrp);
		void cgroup_bpf_inherit(struct cgroup cgrp, struct cgroup parent);

		void __cgroup_bpf_update(struct cgroup *cgrp,
		struct cgroup *parent,
		struct bpf_prog *prog,
		enum bpf_attach_type type);

		/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
		void cgroup_bpf_update(struct cgroup *cgrp,
		struct bpf_prog *prog,
		enum bpf_attach_type type);

		int __cgroup_bpf_run_filter(struct sock *sk,
		struct sk_buff *skb,
		enum bpf_attach_type type);

		/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
		#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \
		({ \
		int __ret = 0; \
		if (cgroup_bpf_enabled) \
		__ret = __cgroup_bpf_run_filter(sk, skb, \
		BPF_CGROUP_INET_INGRESS); \
		\
		__ret; \
		})

		#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) \
		({ \
		int __ret = 0; \
		if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
		typeof(sk) __sk = sk_to_full_sk(sk); \
		if (sk_fullsock(__sk)) \
		__ret = __cgroup_bpf_run_filter(__sk, skb, \
		BPF_CGROUP_INET_EGRESS); \
		} \
		__ret; \
		})

		#else

		struct cgroup_bpf {};
		static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
		static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
		struct cgroup *parent) {}

		#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
		#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })

		#endif /* CONFIG_CGROUP_BPF */

		#endif /* _BPF_CGROUP_H */

include/linux/cgroup-defs.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -16,6 +16,7 @@
		#include <linux/percpu-refcount.h>
		#include <linux/percpu-rwsem.h>
		#include <linux/workqueue.h>
		#include <linux/bpf-cgroup.h>

		#ifdef CONFIG_CGROUPS

		@@ -300,6 +301,9 @@ struct cgroup {
		/* used to schedule release agent */
		struct work_struct release_agent_work;

		/* used to store eBPF programs */
		struct cgroup_bpf bpf;

		/* ids of the ancestors at each level including self */
		int ancestor_ids[];
		};

init/Kconfig

+12 −0

Original line number	Diff line number	Diff line
		@@ -1154,6 +1154,18 @@ config CGROUP_PERF

		Say N if unsure.

		config CGROUP_BPF
		bool "Support for eBPF programs attached to cgroups"
		depends on BPF_SYSCALL && SOCK_CGROUP_DATA
		help
		Allow attaching eBPF programs to a cgroup using the bpf(2)
		syscall command BPF_PROG_ATTACH.

		In which context these programs are accessed depends on the type
		of attachment. For instance, programs that are attached using
		BPF_CGROUP_INET_INGRESS will be executed on the ingress path of
		inet sockets.

		config CGROUP_DEBUG
		bool "Example controller"
		default n

kernel/bpf/Makefile

+1 −0

Original line number	Diff line number	Diff line
		@@ -5,3 +5,4 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list
		ifeq ($(CONFIG_PERF_EVENTS),y)
		obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
		endif
		obj-$(CONFIG_CGROUP_BPF) += cgroup.o

kernel/bpf/cgroup.c

0 → 100644

+167 −0

Original line number	Diff line number	Diff line
		/*
		* Functions to manage eBPF programs attached to cgroups
		*
		* Copyright (c) 2016 Daniel Mack
		*
		* This file is subject to the terms and conditions of version 2 of the GNU
		* General Public License. See the file COPYING in the main directory of the
		* Linux distribution for more details.
		*/

		#include <linux/kernel.h>
		#include <linux/atomic.h>
		#include <linux/cgroup.h>
		#include <linux/slab.h>
		#include <linux/bpf.h>
		#include <linux/bpf-cgroup.h>
		#include <net/sock.h>

		DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
		EXPORT_SYMBOL(cgroup_bpf_enabled_key);

		/**
		* cgroup_bpf_put() - put references of all bpf programs
		* @cgrp: the cgroup to modify
		*/
		void cgroup_bpf_put(struct cgroup *cgrp)
		{
		unsigned int type;

		for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
		struct bpf_prog *prog = cgrp->bpf.prog[type];

		if (prog) {
		bpf_prog_put(prog);
		static_branch_dec(&cgroup_bpf_enabled_key);
		}
		}
		}

		/**
		* cgroup_bpf_inherit() - inherit effective programs from parent
		* @cgrp: the cgroup to modify
		* @parent: the parent to inherit from
		*/
		void cgroup_bpf_inherit(struct cgroup cgrp, struct cgroup parent)
		{
		unsigned int type;

		for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
		struct bpf_prog *e;

		e = rcu_dereference_protected(parent->bpf.effective[type],
		lockdep_is_held(&cgroup_mutex));
		rcu_assign_pointer(cgrp->bpf.effective[type], e);
		}
		}

		/**
		* __cgroup_bpf_update() - Update the pinned program of a cgroup, and
		* propagate the change to descendants
		* @cgrp: The cgroup which descendants to traverse
		* @parent: The parent of @cgrp, or %NULL if @cgrp is the root
		* @prog: A new program to pin
		* @type: Type of pinning operation (ingress/egress)
		*
		* Each cgroup has a set of two pointers for bpf programs; one for eBPF
		* programs it owns, and which is effective for execution.
		*
		* If @prog is %NULL, this function attaches a new program to the cgroup and
		* releases the one that is currently attached, if any. @prog is then made
		* the effective program of type @type in that cgroup.
		*
		* If @prog is %NULL, the currently attached program of type @type is released,
		* and the effective program of the parent cgroup (if any) is inherited to
		* @cgrp.
		*
		* Then, the descendants of @cgrp are walked and the effective program for
		* each of them is set to the effective program of @cgrp unless the
		* descendant has its own program attached, in which case the subbranch is
		* skipped. This ensures that delegated subcgroups with own programs are left
		* untouched.
		*
		* Must be called with cgroup_mutex held.
		*/
		void __cgroup_bpf_update(struct cgroup *cgrp,
		struct cgroup *parent,
		struct bpf_prog *prog,
		enum bpf_attach_type type)
		{
		struct bpf_prog old_prog, effective;
		struct cgroup_subsys_state *pos;

		old_prog = xchg(cgrp->bpf.prog + type, prog);

		effective = (!prog && parent) ?
		rcu_dereference_protected(parent->bpf.effective[type],
		lockdep_is_held(&cgroup_mutex)) :
		prog;

		css_for_each_descendant_pre(pos, &cgrp->self) {
		struct cgroup *desc = container_of(pos, struct cgroup, self);

		/* skip the subtree if the descendant has its own program */
		if (desc->bpf.prog[type] && desc != cgrp)
		pos = css_rightmost_descendant(pos);
		else
		rcu_assign_pointer(desc->bpf.effective[type],
		effective);
		}

		if (prog)
		static_branch_inc(&cgroup_bpf_enabled_key);

		if (old_prog) {
		bpf_prog_put(old_prog);
		static_branch_dec(&cgroup_bpf_enabled_key);
		}
		}

		/**
		* __cgroup_bpf_run_filter() - Run a program for packet filtering
		* @sk: The socken sending or receiving traffic
		* @skb: The skb that is being sent or received
		* @type: The type of program to be exectuted
		*
		* If no socket is passed, or the socket is not of type INET or INET6,
		* this function does nothing and returns 0.
		*
		* The program type passed in via @type must be suitable for network
		* filtering. No further check is performed to assert that.
		*
		* This function will return %-EPERM if any if an attached program was found
		* and if it returned != 1 during execution. In all other cases, 0 is returned.
		*/
		int __cgroup_bpf_run_filter(struct sock *sk,
		struct sk_buff *skb,
		enum bpf_attach_type type)
		{
		struct bpf_prog *prog;
		struct cgroup *cgrp;
		int ret = 0;

		if (!sk \|\| !sk_fullsock(sk))
		return 0;

		if (sk->sk_family != AF_INET &&
		sk->sk_family != AF_INET6)
		return 0;

		cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);

		rcu_read_lock();

		prog = rcu_dereference(cgrp->bpf.effective[type]);
		if (prog) {
		unsigned int offset = skb->data - skb_network_header(skb);

		__skb_push(skb, offset);
		ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
		__skb_pull(skb, offset);
		}

		rcu_read_unlock();

		return ret;
		}
		EXPORT_SYMBOL(__cgroup_bpf_run_filter);