Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f6ef5658 authored by Daniel Borkmann's avatar Daniel Borkmann
Browse files

Merge branch 'bpf-raw-tracepoints'

Alexei Starovoitov says:

====================
v7->v8:
- moved 'u32 num_args' from 'struct tracepoint' into 'struct bpf_raw_event_map'
  that increases memory overhead, but can be optimized/compressed later.
  Now it's zero changes in tracepoint.[ch]

v6->v7:
- adopted Steven's bpf_raw_tp_map section approach to find tracepoint
  and corresponding bpf probe function instead of kallsyms approach.
  dropped kernel_tracepoint_find_by_name() patch

v5->v6:
- avoid changing semantics of for_each_kernel_tracepoint() function, instead
  introduce kernel_tracepoint_find_by_name() helper

v4->v5:
- adopted Daniel's fancy REPEAT macro in bpf_trace.c in patch 6

v3->v4:
- adopted Linus's CAST_TO_U64 macro to cast any integer, pointer, or small
  struct to u64. That nicely reduced the size of patch 1

v2->v3:
- with Linus's suggestion introduced generic COUNT_ARGS and CONCATENATE macros
  (or rather moved them from apparmor)
  that cleaned up patch 6
- added patch 4 to refactor trace_iwlwifi_dev_ucode_error() from 17 args to 4
  Now any tracepoint with >12 args will have build error

v1->v2:
- simplified api by combing bpf_raw_tp_open(name) + bpf_attach(prog_fd) into
  bpf_raw_tp_open(name, prog_fd) as suggested by Daniel.
  That simplifies bpf_detach as well which is now simple close() of fd.
- fixed memory leak in error path which was spotted by Daniel.
- fixed bpf_get_stackid(), bpf_perf_event_output() called from raw tracepoints
- added more tests
- fixed allyesconfig build caught by buildbot

v1:
This patch set is a different way to address the pressing need to access
task_struct pointers in sched tracepoints from bpf programs.

The first approach simply added these pointers to sched tracepoints:
https://lkml.org/lkml/2017/12/14/753
which Peter nacked.
Few options were discussed and eventually the discussion converged on
doing bpf specific tracepoint_probe_register() probe functions.
Details here:
https://lkml.org/lkml/2017/12/20/929



Patch 1 is kernel wide cleanup of pass-struct-by-value into
pass-struct-by-reference into tracepoints.

Patches 2 and 3 are minor cleanups to address allyesconfig build

Patch 4 refactor trace_iwlwifi_dev_ucode_error from 17 to 4 args

Patch 5 introduces COUNT_ARGS macro

Patch 6 introduces BPF_RAW_TRACEPOINT api.
the auto-cleanup and multiple concurrent users are must have
features of tracing api. For bpf raw tracepoints it looks like:
  // load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type
  prog_fd = bpf_prog_load(...);

  // receive anon_inode fd for given bpf_raw_tracepoint
  // and attach bpf program to it
  raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd);

Ctrl-C of tracing daemon or cmdline tool will automatically
detach bpf program, unload it and unregister tracepoint probe.
More details in patch 6.

Patch 7 - trivial support in libbpf
Patches 8, 9 - user space tests

samples/bpf/test_overhead performance on 1 cpu:

tracepoint    base  kprobe+bpf tracepoint+bpf raw_tracepoint+bpf
task_rename   1.1M   769K        947K            1.0M
urandom_read  789K   697K        750K            755K
====================

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 6f5c39fa 3bbe0869
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1153,7 +1153,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
	cinfo.sdma_ring_size = fd->cq->nentries;
	cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;

	trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
	trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo);
	if (copy_to_user((void __user *)arg, &cinfo, len))
		return -EFAULT;

+6 −6
Original line number Diff line number Diff line
@@ -106,7 +106,7 @@ TRACE_EVENT(hfi1_uctxtdata,
TRACE_EVENT(hfi1_ctxt_info,
	    TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt,
		     unsigned int subctxt,
		     struct hfi1_ctxt_info cinfo),
		     struct hfi1_ctxt_info *cinfo),
	    TP_ARGS(dd, ctxt, subctxt, cinfo),
	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
			     __field(unsigned int, ctxt)
@@ -120,11 +120,11 @@ TRACE_EVENT(hfi1_ctxt_info,
	    TP_fast_assign(DD_DEV_ASSIGN(dd);
			    __entry->ctxt = ctxt;
			    __entry->subctxt = subctxt;
			    __entry->egrtids = cinfo.egrtids;
			    __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
			    __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
			    __entry->sdma_ring_size = cinfo.sdma_ring_size;
			    __entry->rcvegr_size = cinfo.rcvegr_size;
			    __entry->egrtids = cinfo->egrtids;
			    __entry->rcvhdrq_cnt = cinfo->rcvhdrq_cnt;
			    __entry->rcvhdrq_size = cinfo->rcvhdrq_entsize;
			    __entry->sdma_ring_size = cinfo->sdma_ring_size;
			    __entry->rcvegr_size = cinfo->rcvegr_size;
			    ),
	    TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
		      __get_str(dev),
+1 −6
Original line number Diff line number Diff line
@@ -1651,12 +1651,7 @@ static void iwl_dump_nic_error_log(struct iwl_priv *priv)
			priv->status, table.valid);
	}

	trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low,
				      table.data1, table.data2, table.line,
				      table.blink2, table.ilink1, table.ilink2,
				      table.bcon_time, table.gp1, table.gp2,
				      table.gp3, table.ucode_ver, table.hw_ver,
				      0, table.brd_ver);
	trace_iwlwifi_dev_ucode_error(trans->dev, &table, 0, table.brd_ver);
	IWL_ERR(priv, "0x%08X | %-28s\n", table.error_id,
		desc_lookup(table.error_id));
	IWL_ERR(priv, "0x%08X | uPc\n", table.pc);
+18 −21
Original line number Diff line number Diff line
@@ -126,14 +126,11 @@ TRACE_EVENT(iwlwifi_dev_tx,
		  __entry->framelen, __entry->skbaddr)
);

struct iwl_error_event_table;
TRACE_EVENT(iwlwifi_dev_ucode_error,
	TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low,
		 u32 data1, u32 data2, u32 line, u32 blink2, u32 ilink1,
		 u32 ilink2, u32 bcon_time, u32 gp1, u32 gp2, u32 rev_type,
		 u32 major, u32 minor, u32 hw_ver, u32 brd_ver),
	TP_ARGS(dev, desc, tsf_low, data1, data2, line,
		 blink2, ilink1, ilink2, bcon_time, gp1, gp2,
		 rev_type, major, minor, hw_ver, brd_ver),
	TP_PROTO(const struct device *dev, const struct iwl_error_event_table *table,
		 u32 hw_ver, u32 brd_ver),
	TP_ARGS(dev, table, hw_ver, brd_ver),
	TP_STRUCT__entry(
		DEV_ENTRY
		__field(u32, desc)
@@ -155,20 +152,20 @@ TRACE_EVENT(iwlwifi_dev_ucode_error,
	),
	TP_fast_assign(
		DEV_ASSIGN;
		__entry->desc = desc;
		__entry->tsf_low = tsf_low;
		__entry->data1 = data1;
		__entry->data2 = data2;
		__entry->line = line;
		__entry->blink2 = blink2;
		__entry->ilink1 = ilink1;
		__entry->ilink2 = ilink2;
		__entry->bcon_time = bcon_time;
		__entry->gp1 = gp1;
		__entry->gp2 = gp2;
		__entry->rev_type = rev_type;
		__entry->major = major;
		__entry->minor = minor;
		__entry->desc = table->error_id;
		__entry->tsf_low = table->tsf_low;
		__entry->data1 = table->data1;
		__entry->data2 = table->data2;
		__entry->line = table->line;
		__entry->blink2 = table->blink2;
		__entry->ilink1 = table->ilink1;
		__entry->ilink2 = table->ilink2;
		__entry->bcon_time = table->bcon_time;
		__entry->gp1 = table->gp1;
		__entry->gp2 = table->gp2;
		__entry->rev_type = table->gp3;
		__entry->major = table->ucode_ver;
		__entry->minor = table->hw_ver;
		__entry->hw_ver = hw_ver;
		__entry->brd_ver = brd_ver;
	),
+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#ifndef __CHECKER__
#include "iwl-trans.h"

#include "dvm/commands.h"
#define CREATE_TRACE_POINTS
#include "iwl-devtrace.h"

Loading