Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 36e04a2d authored by Jesper Dangaard Brouer's avatar Jesper Dangaard Brouer Committed by Daniel Borkmann
Browse files

samples/bpf: xdp2skb_meta shows transferring info from XDP to SKB



Creating a bpf sample that shows howto use the XDP 'data_meta'
infrastructure, created by Daniel Borkmann.  Very few drivers support
this feature, but I wanted a functional sample to begin with, when
working on adding driver support.

XDP data_meta is about creating a communication channel between BPF
programs.  This can be XDP tail-progs, but also other SKB based BPF
hooks, like in this case the TC clsact hook. In this sample I show
that XDP can store info named "mark", and TC/clsact chooses to use
this info and store it into the skb->mark.

It is a bit annoying that XDP and TC samples uses different tools/libs
when attaching their BPF hooks.  As the XDP and TC programs need to
cooperate and agree on a struct-layout, it is best/easiest if the two
programs can be contained within the same BPF restricted-C file.

As the bpf-loader, I choose to not use bpf_load.c (or libbpf), but
instead wrote a bash shell scripted named xdp2skb_meta.sh, which
demonstrate howto use the iproute cmdline tools 'tc' and 'ip' for
loading BPF programs.  To make it easy for first time users, the shell
script have command line parsing, and support --verbose and --dry-run
mode, if you just want to see/learn the tc+ip command syntax:

 # ./xdp2skb_meta.sh --dev ixgbe2 --dry-run
 # Dry-run mode: enable VERBOSE and don't call TC+IP
 tc qdisc del dev ixgbe2 clsact
 tc qdisc add dev ixgbe2 clsact
 tc filter add dev ixgbe2 ingress prio 1 handle 1 bpf da obj ./xdp2skb_meta_kern.o sec tc_mark
 # Flush XDP on device: ixgbe2
 ip link set dev ixgbe2 xdp off
 ip link set dev ixgbe2 xdp obj ./xdp2skb_meta_kern.o sec xdp_mark

Signed-off-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 632130ed
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -142,6 +142,7 @@ always += xdp_redirect_map_kern.o
always += xdp_redirect_cpu_kern.o
always += xdp_monitor_kern.o
always += xdp_rxq_info_kern.o
always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
+220 −0
Original line number Diff line number Diff line
#!/bin/bash
#
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
#
# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load
# eBPF programs, both for XDP and clsbpf.  Shell script function
# wrappers and even long options parsing is illustrated, for ease of
# use.
#
# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs
# that need to collaborate between XDP and TC hooks.  Thus, it is
# convenient that the same tool load both programs that need to work
# together.
#
BPF_FILE=xdp2skb_meta_kern.o
DIR=$(dirname $0)

export TC=/usr/sbin/tc
export IP=/usr/sbin/ip

function usage() {
    echo ""
    echo "Usage: $0 [-vfh] --dev ethX"
    echo "  -d | --dev     :             Network device (required)"
    echo "  --flush        :             Cleanup flush TC and XDP progs"
    echo "  --list         : (\$LIST)     List TC and XDP progs"
    echo "  -v | --verbose : (\$VERBOSE)  Verbose"
    echo "  --dry-run      : (\$DRYRUN)   Dry-run only (echo commands)"
    echo ""
}

## -- General shell logging cmds --
function err() {
    local exitcode=$1
    shift
    echo "ERROR: $@" >&2
    exit $exitcode
}

function info() {
    if [[ -n "$VERBOSE" ]]; then
	echo "# $@"
    fi
}

## -- Helper function calls --

# Wrapper call for TC and IP
# - Will display the offending command on failure
function _call_cmd() {
    local cmd="$1"
    local allow_fail="$2"
    shift 2
    if [[ -n "$VERBOSE" ]]; then
	echo "$(basename $cmd) $@"
    fi
    if [[ -n "$DRYRUN" ]]; then
	return
    fi
    $cmd "$@"
    local status=$?
    if (( $status != 0 )); then
	if [[ "$allow_fail" == "" ]]; then
	    err 2 "Exec error($status) occurred cmd: \"$cmd $@\""
	fi
    fi
}
function call_tc() {
    _call_cmd "$TC" "" "$@"
}
function call_tc_allow_fail() {
    _call_cmd "$TC" "allow_fail" "$@"
}
function call_ip() {
    _call_cmd "$IP" "" "$@"
}

##  --- Parse command line arguments / parameters ---
# Using external program "getopt" to get --long-options
OPTIONS=$(getopt -o vfhd: \
    --long verbose,flush,help,list,dev:,dry-run -- "$@")
if (( $? != 0 )); then
    err 4 "Error calling getopt"
fi
eval set -- "$OPTIONS"

unset DEV
unset FLUSH
while true; do
    case "$1" in
	-d | --dev ) # device
	    DEV=$2
	    info "Device set to: DEV=$DEV" >&2
	    shift 2
	    ;;
	-v | --verbose)
	    VERBOSE=yes
	    # info "Verbose mode: VERBOSE=$VERBOSE" >&2
	    shift
	    ;;
	--dry-run )
	    DRYRUN=yes
	    VERBOSE=yes
	    info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2
	    shift
            ;;
	-f | --flush )
	    FLUSH=yes
	    shift
	    ;;
	--list )
	    LIST=yes
	    shift
	    ;;
	-- )
	    shift
	    break
	    ;;
	-h | --help )
	    usage;
	    exit 0
	    ;;
	* )
	    shift
	    break
	    ;;
    esac
done

FILE="$DIR/$BPF_FILE"
if [[ ! -e $FILE ]]; then
    err 3 "Missing BPF object file ($FILE)"
fi

if [[ -z $DEV ]]; then
    usage
    err 2 "Please specify network device -- required option --dev"
fi

## -- Function calls --

function list_tc()
{
    local device="$1"
    shift
    info "Listing current TC ingress rules"
    call_tc filter show dev $device ingress
}

function list_xdp()
{
    local device="$1"
    shift
    info "Listing current XDP device($device) setting"
    call_ip link show dev $device | grep --color=auto xdp
}

function flush_tc()
{
    local device="$1"
    shift
    info "Flush TC on device: $device"
    call_tc_allow_fail filter del dev $device ingress
    call_tc_allow_fail qdisc del dev $device clsact
}

function flush_xdp()
{
    local device="$1"
    shift
    info "Flush XDP on device: $device"
    call_ip link set dev $device xdp off
}

function attach_tc_mark()
{
    local device="$1"
    local file="$2"
    local prog="tc_mark"
    shift 2

    # Re-attach clsact to clear/flush existing role
    call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null
    call_tc            qdisc add dev $device clsact

    # Attach BPF prog
    call_tc filter add dev $device ingress \
	    prio 1 handle 1 bpf da obj $file sec $prog
}

function attach_xdp_mark()
{
    local device="$1"
    local file="$2"
    local prog="xdp_mark"
    shift 2

    # Remove XDP prog in-case it's already loaded
    # TODO: Need ip-link option to override/replace existing XDP prog
    flush_xdp $device

    # Attach XDP/BPF prog
    call_ip link set dev $device xdp obj $file sec $prog
}

if [[ -n $FLUSH ]]; then
    flush_tc  $DEV
    flush_xdp $DEV
    exit 0
fi

if [[ -n $LIST ]]; then
    list_tc  $DEV
    list_xdp $DEV
    exit 0
fi

attach_tc_mark  $DEV $FILE
attach_xdp_mark $DEV $FILE
+103 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0
 * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
 *
 * Example howto transfer info from XDP to SKB, e.g. skb->mark
 * -----------------------------------------------------------
 * This uses the XDP data_meta infrastructure, and is a cooperation
 * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook.
 *
 * Notice: This example does not use the BPF C-loader (bpf_load.c),
 * but instead rely on the iproute2 TC tool for loading BPF-objects.
 */
#include <uapi/linux/bpf.h>
#include <uapi/linux/pkt_cls.h>

#include "bpf_helpers.h"

/*
 * This struct is stored in the XDP 'data_meta' area, which is located
 * just in-front-of the raw packet payload data.  The meaning is
 * specific to these two BPF programs that use it as a communication
 * channel.  XDP adjust/increase the area via a bpf-helper, and TC use
 * boundary checks to see if data have been provided.
 *
 * The struct must be 4 byte aligned, which here is enforced by the
 * struct __attribute__((aligned(4))).
 */
struct meta_info {
	__u32 mark;
} __attribute__((aligned(4)));

SEC("xdp_mark")
int _xdp_mark(struct xdp_md *ctx)
{
	struct meta_info *meta;
	void *data, *data_end;
	int ret;

	/* Reserve space in-front data pointer for our meta info.
	 * (Notice drivers not supporting data_meta will fail here!)
	 */
	ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
	if (ret < 0)
		return XDP_ABORTED;

	/* For some unknown reason, these ctx pointers must be read
	 * after bpf_xdp_adjust_meta, else verifier will reject prog.
	 */
	data = (void *)(unsigned long)ctx->data;

	/* Check data_meta have room for meta_info struct */
	meta = (void *)(unsigned long)ctx->data_meta;
	if (meta + 1 > data)
		return XDP_ABORTED;

	meta->mark = 42;

	return XDP_PASS;
}

SEC("tc_mark")
int _tc_mark(struct __sk_buff *ctx)
{
	void *data      = (void *)(unsigned long)ctx->data;
	void *data_end  = (void *)(unsigned long)ctx->data_end;
	void *data_meta = (void *)(unsigned long)ctx->data_meta;
	struct meta_info *meta = data_meta;

	/* Check XDP gave us some data_meta */
	if (meta + 1 > data) {
		ctx->mark = 41;
		 /* Skip "accept" if no data_meta is avail */
		return TC_ACT_OK;
	}

	/* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */
	ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */

	return TC_ACT_OK;
}

/* Manually attaching these programs:
export DEV=ixgbe2
export FILE=xdp2skb_meta_kern.o

# via TC command
tc qdisc del dev $DEV clsact 2> /dev/null
tc qdisc add dev $DEV clsact
tc filter  add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark
tc filter show dev $DEV ingress

# XDP via IP command:
ip link set dev $DEV xdp off
ip link set dev $DEV xdp obj $FILE sec xdp_mark

# Use iptable to "see" if SKBs are marked
iptables -I INPUT -p icmp -m mark --mark 41  # == 0x29
iptables -I INPUT -p icmp -m mark --mark 42  # == 0x2a

# Hint: catch XDP_ABORTED errors via
perf record -e xdp:*
perf script

*/