Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a3f74617 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by David S. Miller
Browse files

cgroup: bpf: Add an example to do cgroup checking in BPF



test_cgrp2_array_pin.c:
A userland program that creates a bpf_map (BPF_MAP_TYPE_GROUP_ARRAY),
pouplates/updates it with a cgroup2's backed fd and pins it to a
bpf-fs's file.  The pinned file can be loaded by tc and then used
by the bpf prog later.  This program can also update an existing pinned
array and it could be useful for debugging/testing purpose.

test_cgrp2_tc_kern.c:
A bpf prog which should be loaded by tc.  It is to demonstrate
the usage of bpf_skb_in_cgroup.

test_cgrp2_tc.sh:
A script that glues the test_cgrp2_array_pin.c and
test_cgrp2_tc_kern.c together.  The idea is like:
1. Load the test_cgrp2_tc_kern.o by tc
2. Use test_cgrp2_array_pin.c to populate a BPF_MAP_TYPE_CGROUP_ARRAY
   with a cgroup fd
3. Do a 'ping -6 ff02::1%ve' to ensure the packet has been
   dropped because of a match on the cgroup

Most of the lines in test_cgrp2_tc.sh is the boilerplate
to setup the cgroup/bpf-fs/net-devices/netns...etc.  It is
not bulletproof on errors but should work well enough and
give enough debug info if things did not go well.

Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4a482f34
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ hostprogs-y += offwaketime
hostprogs-y += spintest
hostprogs-y += map_perf_test
hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin

test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
@@ -40,6 +41,7 @@ offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
spintest-objs := bpf_load.o libbpf.o spintest_user.o
map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -61,6 +63,7 @@ always += map_perf_test_kern.o
always += test_overhead_tp_kern.o
always += test_overhead_kprobe_kern.o
always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include

+2 −0
Original line number Diff line number Diff line
@@ -70,6 +70,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
	(void *) BPF_FUNC_l3_csum_replace;
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
	(void *) BPF_FUNC_l4_csum_replace;
static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
	(void *) BPF_FUNC_skb_in_cgroup;

#if defined(__x86_64__)

+109 −0
Original line number Diff line number Diff line
/* Copyright (c) 2016 Facebook
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#include <linux/unistd.h>
#include <linux/bpf.h>

#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>

#include "libbpf.h"

static void usage(void)
{
	printf("Usage: test_cgrp2_array_pin [...]\n");
	printf("       -F <file>   File to pin an BPF cgroup array\n");
	printf("       -U <file>   Update an already pinned BPF cgroup array\n");
	printf("       -v <value>  Full path of the cgroup2\n");
	printf("       -h          Display this help\n");
}

int main(int argc, char **argv)
{
	const char *pinned_file = NULL, *cg2 = NULL;
	int create_array = 1;
	int array_key = 0;
	int array_fd = -1;
	int cg2_fd = -1;
	int ret = -1;
	int opt;

	while ((opt = getopt(argc, argv, "F:U:v:")) != -1) {
		switch (opt) {
		/* General args */
		case 'F':
			pinned_file = optarg;
			break;
		case 'U':
			pinned_file = optarg;
			create_array = 0;
			break;
		case 'v':
			cg2 = optarg;
			break;
		default:
			usage();
			goto out;
		}
	}

	if (!cg2 || !pinned_file) {
		usage();
		goto out;
	}

	cg2_fd = open(cg2, O_RDONLY);
	if (cg2_fd < 0) {
		fprintf(stderr, "open(%s,...): %s(%d)\n",
			cg2, strerror(errno), errno);
		goto out;
	}

	if (create_array) {
		array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,
					  sizeof(uint32_t), sizeof(uint32_t),
					  1, 0);
		if (array_fd < 0) {
			fprintf(stderr,
				"bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n",
				strerror(errno), errno);
			goto out;
		}
	} else {
		array_fd = bpf_obj_get(pinned_file);
		if (array_fd < 0) {
			fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
				pinned_file, strerror(errno), errno);
			goto out;
		}
	}

	ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0);
	if (ret) {
		perror("bpf_update_elem");
		goto out;
	}

	if (create_array) {
		ret = bpf_obj_pin(array_fd, pinned_file);
		if (ret) {
			fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n",
				pinned_file, strerror(errno), errno);
			goto out;
		}
	}

out:
	if (array_fd != -1)
		close(array_fd);
	if (cg2_fd != -1)
		close(cg2_fd);
	return ret;
}
+184 −0
Original line number Diff line number Diff line
#!/bin/bash

MY_DIR=$(dirname $0)
# Details on the bpf prog
BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin'
BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o"
BPF_SECTION='filter'

[ -z "$TC" ] && TC='tc'
[ -z "$IP" ] && IP='ip'

# Names of the veth interface, net namespace...etc.
HOST_IFC='ve'
NS_IFC='vens'
NS='ns'

find_mnt() {
    cat /proc/mounts | \
	awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }'
}

# Init cgroup2 vars
init_cgrp2_vars() {
    CGRP2_ROOT=$(find_mnt cgroup2)
    if [ -z "$CGRP2_ROOT" ]
    then
	CGRP2_ROOT='/mnt/cgroup2'
	MOUNT_CGRP2="yes"
    fi
    CGRP2_TC="$CGRP2_ROOT/tc"
    CGRP2_TC_LEAF="$CGRP2_TC/leaf"
}

# Init bpf fs vars
init_bpf_fs_vars() {
    local bpf_fs_root=$(find_mnt bpf)
    [ -n "$bpf_fs_root" ] || return -1
    BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals"
}

setup_cgrp2() {
    case $1 in
	start)
	    if [ "$MOUNT_CGRP2" == 'yes' ]
	    then
		[ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT
		mount -t cgroup2 none $CGRP2_ROOT || return $?
	    fi
	    mkdir -p $CGRP2_TC_LEAF
	    ;;
	*)
	    rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC
	    [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT
	    ;;
    esac
}

setup_bpf_cgrp2_array() {
    local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME"
    case $1 in
	start)
	    $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC
	    ;;
	*)
	    [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array
	    ;;
    esac
}

setup_net() {
    case $1 in
	start)
	    $IP link add $HOST_IFC type veth peer name $NS_IFC || return $?
	    $IP link set dev $HOST_IFC up || return $?
	    sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0

	    $IP netns add ns || return $?
	    $IP link set dev $NS_IFC netns ns || return $?
	    $IP -n $NS link set dev $NS_IFC up || return $?
	    $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0
	    $TC qdisc add dev $HOST_IFC clsact || return $?
	    $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $?
	    ;;
	*)
	    $IP netns del $NS
	    $IP link del $HOST_IFC
	    ;;
    esac
}

run_in_cgrp() {
    # Fork another bash and move it under the specified cgroup.
    # It makes the cgroup cleanup easier at the end of the test.
    cmd='echo $$ > '
    cmd="$cmd $1/cgroup.procs; exec $2"
    bash -c "$cmd"
}

do_test() {
    run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null"
    local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \
			   awk '/drop/{print substr($7, 0, index($7, ",")-1)}')
    if [[ $dropped -eq 0 ]]
    then
	echo "FAIL"
	return 1
    else
	echo "Successfully filtered $dropped packets"
	return 0
    fi
}

do_exit() {
    if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ]
    then
	echo "------ DEBUG ------"
	echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo
	echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo
	if [ -d "$BPF_FS_TC_SHARE" ]
	then
	    echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo
	fi
	echo "Host net:"
	$IP netns
	$IP link show dev $HOST_IFC
	$IP -6 a show dev $HOST_IFC
	$TC -s qdisc show dev $HOST_IFC
	echo
	echo "$NS net:"
	$IP -n $NS link show dev $NS_IFC
	$IP -n $NS -6 link show dev $NS_IFC
	echo "------ DEBUG ------"
	echo
    fi

    if [ "$MODE" != 'nocleanup' ]
    then
	setup_net stop
	setup_bpf_cgrp2_array stop
	setup_cgrp2 stop
    fi
}

init_cgrp2_vars
init_bpf_fs_vars

while [[ $# -ge 1 ]]
do
    a="$1"
    case $a in
	debug)
	    DEBUG='yes'
	    shift 1
	    ;;
	cleanup-only)
	    MODE='cleanuponly'
	    shift 1
	    ;;
	no-cleanup)
	    MODE='nocleanup'
	    shift 1
	    ;;
	*)
	    echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]"
	    echo "  debug: Print cgrp and network setup details at the end of the test"
	    echo "  cleanup-only: Try to cleanup things from last test.  No test will be run"
	    echo "  no-cleanup: Run the test but don't do cleanup at the end"
	    echo "[Note: If no arg is given, it will run the test and do cleanup at the end]"
	    echo
	    exit -1
	    ;;
    esac
done

trap do_exit 0

[ "$MODE" == 'cleanuponly' ] && exit

setup_cgrp2 start || exit $?
setup_net start || exit $?
init_bpf_fs_vars || exit $?
setup_bpf_cgrp2_array start || exit $?
do_test
echo
+69 −0
Original line number Diff line number Diff line
/* Copyright (c) 2016 Facebook
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#include <uapi/linux/if_ether.h>
#include <uapi/linux/in6.h>
#include <uapi/linux/ipv6.h>
#include <uapi/linux/pkt_cls.h>
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"

/* copy of 'struct ethhdr' without __packed */
struct eth_hdr {
	unsigned char   h_dest[ETH_ALEN];
	unsigned char   h_source[ETH_ALEN];
	unsigned short  h_proto;
};

#define PIN_GLOBAL_NS		2
struct bpf_elf_map {
	__u32 type;
	__u32 size_key;
	__u32 size_value;
	__u32 max_elem;
	__u32 flags;
	__u32 id;
	__u32 pinning;
};

struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = {
	.type		= BPF_MAP_TYPE_CGROUP_ARRAY,
	.size_key	= sizeof(uint32_t),
	.size_value	= sizeof(uint32_t),
	.pinning	= PIN_GLOBAL_NS,
	.max_elem	= 1,
};

SEC("filter")
int handle_egress(struct __sk_buff *skb)
{
	void *data = (void *)(long)skb->data;
	struct eth_hdr *eth = data;
	struct ipv6hdr *ip6h = data + sizeof(*eth);
	void *data_end = (void *)(long)skb->data_end;
	char dont_care_msg[] = "dont care %04x %d\n";
	char pass_msg[] = "pass\n";
	char reject_msg[] = "reject\n";

	/* single length check */
	if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
		return TC_ACT_OK;

	if (eth->h_proto != htons(ETH_P_IPV6) ||
	    ip6h->nexthdr != IPPROTO_ICMPV6) {
		bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
				 eth->h_proto, ip6h->nexthdr);
		return TC_ACT_OK;
	} else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
		bpf_trace_printk(pass_msg, sizeof(pass_msg));
		return TC_ACT_OK;
	} else {
		bpf_trace_printk(reject_msg, sizeof(reject_msg));
		return TC_ACT_SHOT;
	}
}

char _license[] SEC("license") = "GPL";