Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a1d753d2 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bpf-perf-time-helpers'



Yonghong Song says:

====================
bpf: add two helpers to read perf event enabled/running time

Hardware pmu counters are limited resources. When there are more
pmu based perf events opened than available counters, kernel will
multiplex these events so each event gets certain percentage
(but not 100%) of the pmu time. In case that multiplexing happens,
the number of samples or counter value will not reflect the
case compared to no multiplexing. This makes comparison between
different runs difficult.

Typically, the number of samples or counter value should be
normalized before comparing to other experiments. The typical
normalization is done like:
  normalized_num_samples = num_samples * time_enabled / time_running
  normalized_counter_value = counter_value * time_enabled / time_running
where time_enabled is the time enabled for event and time_running is
the time running for event since last normalization.

This patch set implements two helper functions.
The helper bpf_perf_event_read_value reads counter/time_enabled/time_running
for perf event array map. The helper bpf_perf_prog_read_value read
counter/time_enabled/time_running for bpf prog with type BPF_PROG_TYPE_PERF_EVENT.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bdc47641 81b9cf80
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -806,6 +806,7 @@ struct perf_output_handle {
struct bpf_perf_event_data_kern {
	struct pt_regs *regs;
	struct perf_sample_data *data;
	struct perf_event *event;
};

#ifdef CONFIG_CGROUP_PERF
@@ -884,7 +885,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
				void *context);
extern void perf_pmu_migrate_context(struct pmu *pmu,
				int src_cpu, int dst_cpu);
int perf_event_read_local(struct perf_event *event, u64 *value);
int perf_event_read_local(struct perf_event *event, u64 *value,
			  u64 *enabled, u64 *running);
extern u64 perf_event_read_value(struct perf_event *event,
				 u64 *enabled, u64 *running);

@@ -1286,7 +1288,8 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *
{
	return ERR_PTR(-EINVAL);
}
static inline int perf_event_read_local(struct perf_event *event, u64 *value)
static inline int perf_event_read_local(struct perf_event *event, u64 *value,
					u64 *enabled, u64 *running)
{
	return -EINVAL;
}
+27 −2
Original line number Diff line number Diff line
@@ -641,6 +641,21 @@ union bpf_attr {
 *     @xdp_md: pointer to xdp_md
 *     @delta: An positive/negative integer to be added to xdp_md.data_meta
 *     Return: 0 on success or negative on error
 *
 * int bpf_perf_event_read_value(map, flags, buf, buf_size)
 *     read perf event counter value and perf event enabled/running time
 *     @map: pointer to perf_event_array map
 *     @flags: index of event in the map or bitmask flags
 *     @buf: buf to fill
 *     @buf_size: size of the buf
 *     Return: 0 on success or negative error code
 *
 * int bpf_perf_prog_read_value(ctx, buf, buf_size)
 *     read perf prog attached perf event counter and enabled/running time
 *     @ctx: pointer to ctx
 *     @buf: buf to fill
 *     @buf_size: size of the buf
 *     Return : 0 on success or negative error code
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -697,7 +712,9 @@ union bpf_attr {
	FN(redirect_map),		\
	FN(sk_redirect_map),		\
	FN(sock_map_update),		\
	FN(xdp_adjust_meta),
	FN(xdp_adjust_meta),		\
	FN(perf_event_read_value),	\
	FN(perf_prog_read_value),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
@@ -741,7 +758,9 @@ enum bpf_func_id {
#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
#define BPF_F_DONT_FRAGMENT		(1ULL << 2)

/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
 * BPF_FUNC_perf_event_read_value flags.
 */
#define BPF_F_INDEX_MASK		0xffffffffULL
#define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
@@ -934,4 +953,10 @@ enum {
#define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
#define TCP_BPF_SNDCWND_CLAMP	1002	/* Set sndcwnd_clamp */

struct bpf_perf_event_value {
	__u64 counter;
	__u64 enabled;
	__u64 running;
};

#endif /* _UAPI__LINUX_BPF_H__ */
+1 −1
Original line number Diff line number Diff line
@@ -492,7 +492,7 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map,

	ee = ERR_PTR(-EOPNOTSUPP);
	event = perf_file->private_data;
	if (perf_event_read_local(event, &value) == -EOPNOTSUPP)
	if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
		goto err_out;

	ee = bpf_event_entry_gen(perf_file, map_file);
+3 −1
Original line number Diff line number Diff line
@@ -1552,7 +1552,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
		break;
	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
		if (func_id != BPF_FUNC_perf_event_read &&
		    func_id != BPF_FUNC_perf_event_output)
		    func_id != BPF_FUNC_perf_event_output &&
		    func_id != BPF_FUNC_perf_event_read_value)
			goto error;
		break;
	case BPF_MAP_TYPE_STACK_TRACE:
@@ -1595,6 +1596,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
		break;
	case BPF_FUNC_perf_event_read:
	case BPF_FUNC_perf_event_output:
	case BPF_FUNC_perf_event_read_value:
		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
			goto error;
		break;
+13 −2
Original line number Diff line number Diff line
@@ -3684,10 +3684,12 @@ static inline u64 perf_event_count(struct perf_event *event)
 *     will not be local and we cannot read them atomically
 *   - must not have a pmu::count method
 */
int perf_event_read_local(struct perf_event *event, u64 *value)
int perf_event_read_local(struct perf_event *event, u64 *value,
			  u64 *enabled, u64 *running)
{
	unsigned long flags;
	int ret = 0;
	u64 now;

	/*
	 * Disabling interrupts avoids all counter scheduling (context
@@ -3718,13 +3720,21 @@ int perf_event_read_local(struct perf_event *event, u64 *value)
		goto out;
	}

	now = event->shadow_ctx_time + perf_clock();
	if (enabled)
		*enabled = now - event->tstamp_enabled;
	/*
	 * If the event is currently on this CPU, its either a per-task event,
	 * or local to this CPU. Furthermore it means its ACTIVE (otherwise
	 * oncpu == -1).
	 */
	if (event->oncpu == smp_processor_id())
	if (event->oncpu == smp_processor_id()) {
		event->pmu->read(event);
		if (running)
			*running = now - event->tstamp_running;
	} else if (running) {
		*running = event->total_time_running;
	}

	*value = local64_read(&event->count);
out:
@@ -8072,6 +8082,7 @@ static void bpf_overflow_handler(struct perf_event *event,
	struct bpf_perf_event_data_kern ctx = {
		.data = data,
		.regs = regs,
		.event = event,
	};
	int ret = 0;

Loading