Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 25d4dae1 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'XDP-redirect-tracepoints'



Jesper Dangaard Brouer says:

====================
XDP redirect tracepoints

I feel this is as far as I can take the tracepoint infrastructure to
assist XDP monitoring.

Tracepoints comes with a base overhead of 25 nanosec for an attached
bpf_prog, and 48 nanosec for using a full perf record. This is
problematic for the XDP use-case, but it is very convenient to use the
existing perf infrastructure.

From a performance perspective, the real solution would be to attach
another bpf_prog (that understand xdp_buff), but I'm not sure we want
to introduce yet another bpf attach API for this.

One thing left is to standardize the possible err return codes, to a
limited set, to allow easier (and faster) mapping into a bpf map.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d0fcece7 3ffab546
Loading
Loading
Loading
Loading
+82 −18
Original line number Diff line number Diff line
@@ -31,55 +31,119 @@ TRACE_EVENT(xdp_exception,
	TP_ARGS(dev, xdp, act),

	TP_STRUCT__entry(
		__array(u8, prog_tag, 8)
		__field(int, prog_id)
		__field(u32, act)
		__field(int, ifindex)
	),

	TP_fast_assign(
		BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag));
		memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag));
		__entry->prog_id	= xdp->aux->id;
		__entry->act		= act;
		__entry->ifindex	= dev->ifindex;
	),

	TP_printk("prog=%s action=%s ifindex=%d",
		  __print_hex_str(__entry->prog_tag, 8),
	TP_printk("prog_id=%d action=%s ifindex=%d",
		  __entry->prog_id,
		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
		  __entry->ifindex)
);

TRACE_EVENT(xdp_redirect,
DECLARE_EVENT_CLASS(xdp_redirect_template,

	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp, u32 act,
		 int to_index, int err),
		 const struct bpf_prog *xdp,
		 int to_ifindex, int err,
		 const struct bpf_map *map, u32 map_index),

	TP_ARGS(dev, xdp, act, to_index, err),
	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),

	TP_STRUCT__entry(
		__array(u8, prog_tag, 8)
		__field(int, prog_id)
		__field(u32, act)
		__field(int, ifindex)
		__field(int, to_index)
		__field(int, err)
		__field(int, to_ifindex)
		__field(u32, map_id)
		__field(int, map_index)
	),

	TP_fast_assign(
		BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag));
		memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag));
		__entry->act		= act;
		__entry->prog_id	= xdp->aux->id;
		__entry->act		= XDP_REDIRECT;
		__entry->ifindex	= dev->ifindex;
		__entry->to_index	= to_index;
		__entry->err		= err;
		__entry->to_ifindex	= to_ifindex;
		__entry->map_id		= map ? map->id : 0;
		__entry->map_index	= map_index;
	),

	TP_printk("prog=%s action=%s ifindex=%d to_index=%d err=%d",
		  __print_hex_str(__entry->prog_tag, 8),
	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d",
		  __entry->prog_id,
		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
		  __entry->ifindex, __entry->to_index,
		  __entry->ifindex, __entry->to_ifindex,
		  __entry->err)
);

DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 int to_ifindex, int err,
		 const struct bpf_map *map, u32 map_index),
	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
);

DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 int to_ifindex, int err,
		 const struct bpf_map *map, u32 map_index),
	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
);

#define _trace_xdp_redirect(dev, xdp, to)		\
	 trace_xdp_redirect(dev, xdp, to, 0, NULL, 0);

#define _trace_xdp_redirect_err(dev, xdp, to, err)	\
	 trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0);

DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 int to_ifindex, int err,
		 const struct bpf_map *map, u32 map_index),
	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
		  " map_id=%d map_index=%d",
		  __entry->prog_id,
		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
		  __entry->ifindex, __entry->to_ifindex,
		  __entry->err,
		  __entry->map_id, __entry->map_index)
);

DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 int to_ifindex, int err,
		 const struct bpf_map *map, u32 map_index),
	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
		  " map_id=%d map_index=%d",
		  __entry->prog_id,
		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
		  __entry->ifindex, __entry->to_ifindex,
		  __entry->err,
		  __entry->map_id, __entry->map_index)
);

#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
	 trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0,	\
				0, map, idx);

#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err)	\
	 trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0,	\
				    err, map, idx);

#endif /* _TRACE_XDP_H */

#include <trace/define_trace.h>
+24 −13
Original line number Diff line number Diff line
@@ -2515,16 +2515,20 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
	fwd = __dev_map_lookup_elem(map, index);
	if (!fwd) {
		err = -EINVAL;
		goto out;
		goto err;
	}
	if (ri->map_to_flush && ri->map_to_flush != map)
		xdp_do_flush_map();

	err = __bpf_tx_xdp(fwd, map, xdp, index);
	if (likely(!err))
	if (unlikely(err))
		goto err;

	ri->map_to_flush = map;
out:
	trace_xdp_redirect(dev, xdp_prog, XDP_REDIRECT, index, err);
	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
	return 0;
err:
	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
	return err;
}

@@ -2543,12 +2547,17 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
	ri->ifindex = 0;
	if (unlikely(!fwd)) {
		err = -EINVAL;
		goto out;
		goto err;
	}

	err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
out:
	trace_xdp_redirect(dev, xdp_prog, XDP_REDIRECT, index, err);
	if (unlikely(err))
		goto err;

	_trace_xdp_redirect(dev, xdp_prog, index);
	return 0;
err:
	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
	return err;
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
@@ -2566,23 +2575,25 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
	ri->ifindex = 0;
	if (unlikely(!fwd)) {
		err = -EINVAL;
		goto out;
		goto err;
	}

	if (unlikely(!(fwd->flags & IFF_UP))) {
		err = -ENETDOWN;
		goto out;
		goto err;
	}

	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
	if (skb->len > len) {
		err = -EMSGSIZE;
		goto out;
		goto err;
	}

	skb->dev = fwd;
out:
	trace_xdp_redirect(dev, xdp_prog, XDP_REDIRECT, index, err);
	_trace_xdp_redirect(dev, xdp_prog, index);
	return 0;
err:
	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
	return err;
}
EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
+4 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ hostprogs-y += per_socket_stats_example
hostprogs-y += load_sock_ops
hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
hostprogs-y += xdp_monitor
hostprogs-y += syscall_tp

# Libbpf dependencies
@@ -83,6 +84,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o

# Tell kbuild to always build the programs
@@ -127,6 +129,7 @@ always += tcp_iw_kern.o
always += tcp_clamp_kern.o
always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
always += xdp_monitor_kern.o
always += syscall_tp_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
@@ -166,6 +169,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
HOSTLOADLIBES_test_map_in_map += -lelf
HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_syscall_tp += -lelf

# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
+88 −0
Original line number Diff line number Diff line
/* XDP monitor tool, based on tracepoints
 *
 *  Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
 */
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"

struct bpf_map_def SEC("maps") redirect_err_cnt = {
	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
	.key_size = sizeof(u32),
	.value_size = sizeof(u64),
	.max_entries = 2,
	/* TODO: have entries for all possible errno's */
};

/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
 * Code in:                kernel/include/trace/events/xdp.h
 */
struct xdp_redirect_ctx {
	unsigned short common_type;	//	offset:0;  size:2; signed:0;
	unsigned char common_flags;	//	offset:2;  size:1; signed:0;
	unsigned char common_preempt_count;//	offset:3;  size:1; signed:0;
	int common_pid;			//	offset:4;  size:4; signed:1;

	int prog_id;			//	offset:8;  size:4; signed:1;
	u32 act;			//	offset:12  size:4; signed:0;
	int ifindex;			//	offset:16  size:4; signed:1;
	int err;			//	offset:20  size:4; signed:1;
	int to_ifindex;			//	offset:24  size:4; signed:1;
	u32 map_id;			//	offset:28  size:4; signed:0;
	int map_index;			//	offset:32  size:4; signed:1;
};					//	offset:36

enum {
	XDP_REDIRECT_SUCCESS = 0,
	XDP_REDIRECT_ERROR = 1
};

static __always_inline
int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
{
	u32 key = XDP_REDIRECT_ERROR;
	int err = ctx->err;
	u64 *cnt;

	if (!err)
		key = XDP_REDIRECT_SUCCESS;

	cnt  = bpf_map_lookup_elem(&redirect_err_cnt, &key);
	if (!cnt)
		return 0;
	*cnt += 1;

	return 0; /* Indicate event was filtered (no further processing)*/
	/*
	 * Returning 1 here would allow e.g. a perf-record tracepoint
	 * to see and record these events, but it doesn't work well
	 * in-practice as stopping perf-record also unload this
	 * bpf_prog.  Plus, there is additional overhead of doing so.
	 */
}

SEC("tracepoint/xdp/xdp_redirect_err")
int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
{
	return xdp_redirect_collect_stat(ctx);
}


SEC("tracepoint/xdp/xdp_redirect_map_err")
int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
{
	return xdp_redirect_collect_stat(ctx);
}

/* Likely unloaded when prog starts */
SEC("tracepoint/xdp/xdp_redirect")
int trace_xdp_redirect(struct xdp_redirect_ctx *ctx)
{
	return xdp_redirect_collect_stat(ctx);
}

/* Likely unloaded when prog starts */
SEC("tracepoint/xdp/xdp_redirect_map")
int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx)
{
	return xdp_redirect_collect_stat(ctx);
}
+295 −0
Original line number Diff line number Diff line
/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
 */
static const char *__doc__=
 "XDP monitor tool, based on tracepoints\n"
;

static const char *__doc_err_only__=
 " NOTICE: Only tracking XDP redirect errors\n"
 "         Enable TX success stats via '--stats'\n"
 "         (which comes with a per packet processing overhead)\n"
;

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <locale.h>

#include <getopt.h>
#include <net/if.h>
#include <time.h>

#include "libbpf.h"
#include "bpf_load.h"
#include "bpf_util.h"

static int verbose = 1;
static bool debug = false;

static const struct option long_options[] = {
	{"help",	no_argument,		NULL, 'h' },
	{"debug",	no_argument,		NULL, 'D' },
	{"stats",	no_argument,		NULL, 'S' },
	{"sec", 	required_argument,	NULL, 's' },
	{0, 0, NULL,  0 }
};

static void usage(char *argv[])
{
	int i;
	printf("\nDOCUMENTATION:\n%s\n", __doc__);
	printf("\n");
	printf(" Usage: %s (options-see-below)\n",
	       argv[0]);
	printf(" Listing options:\n");
	for (i = 0; long_options[i].name != 0; i++) {
		printf(" --%-15s", long_options[i].name);
		if (long_options[i].flag != NULL)
			printf(" flag (internal value:%d)",
			       *long_options[i].flag);
		else
			printf("(internal short-option: -%c)",
			       long_options[i].val);
		printf("\n");
	}
	printf("\n");
}

#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
__u64 gettime(void)
{
	struct timespec t;
	int res;

	res = clock_gettime(CLOCK_MONOTONIC, &t);
	if (res < 0) {
		fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
		exit(EXIT_FAILURE);
	}
	return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
}

enum {
	REDIR_SUCCESS = 0,
	REDIR_ERROR = 1,
};
#define REDIR_RES_MAX 2
static const char *redir_names[REDIR_RES_MAX] = {
	[REDIR_SUCCESS]	= "Success",
	[REDIR_ERROR]	= "Error",
};
static const char *err2str(int err)
{
	if (err < REDIR_RES_MAX)
		return redir_names[err];
	return NULL;
}

struct record {
	__u64 counter;
	__u64 timestamp;
};

struct stats_record {
	struct record xdp_redir[REDIR_RES_MAX];
};

static void stats_print_headers(bool err_only)
{
	if (err_only)
		printf("\n%s\n", __doc_err_only__);

	printf("%-14s %-10s %-18s %-9s\n",
	       "XDP_REDIRECT", "pps ", "pps-human-readable", "measure-period");
}

static void stats_print(struct stats_record *rec,
			struct stats_record *prev,
			bool err_only)
{
	int i = 0;

	if (err_only)
		i = REDIR_ERROR;

	for (; i < REDIR_RES_MAX; i++) {
		struct record *r = &rec->xdp_redir[i];
		struct record *p = &prev->xdp_redir[i];
		__u64 period  = 0;
		__u64 packets = 0;
		double pps = 0;
		double period_ = 0;

		if (p->timestamp) {
			packets = r->counter - p->counter;
			period  = r->timestamp - p->timestamp;
			if (period > 0) {
				period_ = ((double) period / NANOSEC_PER_SEC);
				pps = packets / period_;
			}
		}

		printf("%-14s %-10.0f %'-18.0f %f\n",
		       err2str(i), pps, pps, period_);
	}
}

static __u64 get_key32_value64_percpu(int fd, __u32 key)
{
	/* For percpu maps, userspace gets a value per possible CPU */
	unsigned int nr_cpus = bpf_num_possible_cpus();
	__u64 values[nr_cpus];
	__u64 sum = 0;
	int i;

	if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
		fprintf(stderr,
			"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
		return 0;
	}

	/* Sum values from each CPU */
	for (i = 0; i < nr_cpus; i++) {
		sum += values[i];
	}
	return sum;
}

static bool stats_collect(int fd, struct stats_record *rec)
{
	int i;

	/* TODO: Detect if someone unloaded the perf event_fd's, as
	 * this can happen by someone running perf-record -e
	 */

	for (i = 0; i < REDIR_RES_MAX; i++) {
		rec->xdp_redir[i].timestamp = gettime();
		rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i);
	}
	return true;
}

static void stats_poll(int interval, bool err_only)
{
	struct stats_record rec, prev;
	int map_fd;

	memset(&rec, 0, sizeof(rec));

	/* Trick to pretty printf with thousands separators use %' */
	setlocale(LC_NUMERIC, "en_US");

	/* Header */
	if (verbose)
		printf("\n%s", __doc__);

	/* TODO Need more advanced stats on error types */
	if (verbose)
		printf(" - Stats map: %s\n", map_data[0].name);
	map_fd = map_data[0].fd;

	stats_print_headers(err_only);
	fflush(stdout);

	while (1) {
		memcpy(&prev, &rec, sizeof(rec));
		stats_collect(map_fd, &rec);
		stats_print(&rec, &prev, err_only);
		fflush(stdout);
		sleep(interval);
	}
}

void print_bpf_prog_info(void)
{
	int i;

	/* Prog info */
	printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
	for (i = 0; i < prog_cnt; i++) {
		printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
	}

	/* Maps info */
	printf("Loaded BPF prog have %d map(s)\n", map_data_count);
	for (i = 0; i < map_data_count; i++) {
		char *name = map_data[i].name;
		int fd     = map_data[i].fd;

		printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
	}

	/* Event info */
	printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
	for (i = 0; i < prog_cnt; i++) {
		if (event_fd[i] != -1)
			printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
	}
}

int main(int argc, char **argv)
{
	int longindex = 0, opt;
	int ret = EXIT_SUCCESS;
	char bpf_obj_file[256];

	/* Default settings: */
	bool errors_only = true;
	int interval = 2;

	snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);

	/* Parse commands line args */
	while ((opt = getopt_long(argc, argv, "h",
				  long_options, &longindex)) != -1) {
		switch (opt) {
		case 'D':
			debug = true;
			break;
		case 'S':
			errors_only = false;
			break;
		case 's':
			interval = atoi(optarg);
			break;
		case 'h':
		default:
			usage(argv);
			return EXIT_FAILURE;
		}
	}

	if (load_bpf_file(bpf_obj_file)) {
		printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
		return 1;
	}
	if (!prog_fd[0]) {
		printf("ERROR - load_bpf_file: %s\n", strerror(errno));
		return 1;
	}

	if (debug) {
		print_bpf_prog_info();
	}

	/* Unload/stop tracepoint event by closing fd's */
	if (errors_only) {
		/* The prog_fd[i] and event_fd[i] depend on the
		 * order the functions was defined in _kern.c
		 */
		close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
		close(prog_fd[2]);  /* func: trace_xdp_redirect */
		close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
		close(prog_fd[3]);  /* func: trace_xdp_redirect_map */
	}

	stats_poll(interval, errors_only);

	return ret;
}
Loading