Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1c024eca authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

perf, trace: Optimize tracepoints by using per-tracepoint-per-cpu hlist to track events



Avoid the swevent hash-table by using per-tracepoint
hlists.

Also, avoid conditionals on the fast path by ordering
with probe unregister so that we should never get on
the callback path without the data being there.

Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20100521090710.473188012@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent b7e2ecef
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -133,7 +133,7 @@ struct ftrace_event_call {
	void			*data;

	int			perf_refcount;
	void			*perf_data;
	struct hlist_head	*perf_events;
	int			(*perf_event_enable)(struct ftrace_event_call *);
	void			(*perf_event_disable)(struct ftrace_event_call *);
};
@@ -192,8 +192,10 @@ struct perf_event;

DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);

extern int perf_trace_enable(int event_id, void *data);
extern void perf_trace_disable(int event_id);
extern int  perf_trace_init(struct perf_event *event);
extern void perf_trace_destroy(struct perf_event *event);
extern int  perf_trace_enable(struct perf_event *event);
extern void perf_trace_disable(struct perf_event *event);
extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
				     char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
@@ -202,11 +204,9 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type,

static inline void
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
		       u64 count, struct pt_regs *regs, void *event)
		       u64 count, struct pt_regs *regs, void *head)
{
	struct trace_entry *entry = raw_data;

	perf_tp_event(entry->type, addr, count, raw_data, size, regs, event);
	perf_tp_event(addr, count, raw_data, size, regs, head);
	perf_swevent_put_recursion_context(rctx);
}
#endif
+4 −2
Original line number Diff line number Diff line
@@ -727,6 +727,7 @@ struct perf_event {
	perf_overflow_handler_t		overflow_handler;

#ifdef CONFIG_EVENT_TRACING
	struct ftrace_event_call	*tp_event;
	struct event_filter		*filter;
#endif

@@ -992,8 +993,9 @@ static inline bool perf_paranoid_kernel(void)
}

extern void perf_event_init(void);
extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
			  int entry_size, struct pt_regs *regs, void *event);
extern void perf_tp_event(u64 addr, u64 count, void *record,
			  int entry_size, struct pt_regs *regs,
			  struct hlist_head *head);
extern void perf_bp_event(struct perf_event *event, void *data);

#ifndef perf_misc_flags
+3 −1
Original line number Diff line number Diff line
@@ -768,6 +768,7 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \
	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
	struct ftrace_raw_##call *entry;				\
	u64 __addr = 0, __count = 1;					\
	struct hlist_head *head;					\
	int __entry_size;						\
	int __data_size;						\
	int rctx;							\
@@ -790,8 +791,9 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \
									\
	{ assign; }							\
									\
	head = per_cpu_ptr(event_call->perf_events, smp_processor_id());\
	perf_trace_buf_submit(entry, __entry_size, rctx, __addr,	\
			       __count, __regs, event_call->perf_data);	\
		__count, __regs, head);					\
}

#undef DEFINE_EVENT
+48 −46
Original line number Diff line number Diff line
@@ -4005,9 +4005,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
	perf_swevent_overflow(event, 0, nmi, data, regs);
}

static int perf_tp_event_match(struct perf_event *event,
				struct perf_sample_data *data);

static int perf_exclude_event(struct perf_event *event,
			      struct pt_regs *regs)
{
@@ -4037,10 +4034,6 @@ static int perf_swevent_match(struct perf_event *event,
	if (perf_exclude_event(event, regs))
		return 0;

	if (event->attr.type == PERF_TYPE_TRACEPOINT &&
	    !perf_tp_event_match(event, data))
		return 0;

	return 1;
}

@@ -4122,7 +4115,7 @@ end:

int perf_swevent_get_recursion_context(void)
{
	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
	int rctx;

	if (in_nmi())
@@ -4134,10 +4127,8 @@ int perf_swevent_get_recursion_context(void)
	else
		rctx = 0;

	if (cpuctx->recursion[rctx]) {
		put_cpu_var(perf_cpu_context);
	if (cpuctx->recursion[rctx])
		return -1;
	}

	cpuctx->recursion[rctx]++;
	barrier();
@@ -4151,7 +4142,6 @@ void perf_swevent_put_recursion_context(int rctx)
	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
	barrier();
	cpuctx->recursion[rctx]--;
	put_cpu_var(perf_cpu_context);
}
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);

@@ -4162,6 +4152,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
	struct perf_sample_data data;
	int rctx;

	preempt_disable_notrace();
	rctx = perf_swevent_get_recursion_context();
	if (rctx < 0)
		return;
@@ -4171,6 +4162,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);

	perf_swevent_put_recursion_context(rctx);
	preempt_enable_notrace();
}

static void perf_swevent_read(struct perf_event *event)
@@ -4486,11 +4478,43 @@ static int swevent_hlist_get(struct perf_event *event)

#ifdef CONFIG_EVENT_TRACING

void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
		   int entry_size, struct pt_regs *regs, void *event)
static const struct pmu perf_ops_tracepoint = {
	.enable		= perf_trace_enable,
	.disable	= perf_trace_disable,
	.read		= perf_swevent_read,
	.unthrottle	= perf_swevent_unthrottle,
};

static int perf_tp_filter_match(struct perf_event *event,
				struct perf_sample_data *data)
{
	void *record = data->raw->data;

	if (likely(!event->filter) || filter_match_preds(event->filter, record))
		return 1;
	return 0;
}

static int perf_tp_event_match(struct perf_event *event,
				struct perf_sample_data *data,
				struct pt_regs *regs)
{
	if (perf_exclude_event(event, regs))
		return 0;

	if (!perf_tp_filter_match(event, data))
		return 0;

	return 1;
}

void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
		   struct pt_regs *regs, struct hlist_head *head)
{
	const int type = PERF_TYPE_TRACEPOINT;
	struct perf_sample_data data;
	struct perf_event *event;
	struct hlist_node *node;

	struct perf_raw_record raw = {
		.size = entry_size,
		.data = record,
@@ -4499,30 +4523,18 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
	perf_sample_data_init(&data, addr);
	data.raw = &raw;

	if (!event) {
		do_perf_sw_event(type, event_id, count, 1, &data, regs);
		return;
	}

	if (perf_swevent_match(event, type, event_id, &data, regs))
	rcu_read_lock();
	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
		if (perf_tp_event_match(event, &data, regs))
			perf_swevent_add(event, count, 1, &data, regs);
	}
EXPORT_SYMBOL_GPL(perf_tp_event);

static int perf_tp_event_match(struct perf_event *event,
				struct perf_sample_data *data)
{
	void *record = data->raw->data;

	if (likely(!event->filter) || filter_match_preds(event->filter, record))
		return 1;
	return 0;
	rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(perf_tp_event);

static void tp_perf_event_destroy(struct perf_event *event)
{
	perf_trace_disable(event->attr.config);
	swevent_hlist_put(event);
	perf_trace_destroy(event);
}

static const struct pmu *tp_perf_event_init(struct perf_event *event)
@@ -4538,17 +4550,13 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
			!capable(CAP_SYS_ADMIN))
		return ERR_PTR(-EPERM);

	if (perf_trace_enable(event->attr.config, event))
	err = perf_trace_init(event);
	if (err)
		return NULL;

	event->destroy = tp_perf_event_destroy;
	err = swevent_hlist_get(event);
	if (err) {
		perf_trace_disable(event->attr.config);
		return ERR_PTR(err);
	}

	return &perf_ops_generic;
	return &perf_ops_tracepoint;
}

static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4576,12 +4584,6 @@ static void perf_event_free_filter(struct perf_event *event)

#else

static int perf_tp_event_match(struct perf_event *event,
				struct perf_sample_data *data)
{
	return 1;
}

static const struct pmu *tp_perf_event_init(struct perf_event *event)
{
	return NULL;
+67 −60
Original line number Diff line number Diff line
@@ -23,14 +23,25 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
/* Count the events in use (per event id, not per instance) */
static int	total_ref_count;

static int perf_trace_event_enable(struct ftrace_event_call *event, void *data)
static int perf_trace_event_init(struct ftrace_event_call *tp_event,
				 struct perf_event *p_event)
{
	struct hlist_head *list;
	int ret = -ENOMEM;
	int cpu;

	if (event->perf_refcount++ > 0) {
		event->perf_data = NULL;
	p_event->tp_event = tp_event;
	if (tp_event->perf_refcount++ > 0)
		return 0;
	}

	list = alloc_percpu(struct hlist_head);
	if (!list)
		goto fail;

	for_each_possible_cpu(cpu)
		INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));

	tp_event->perf_events = list;

	if (!total_ref_count) {
		char *buf;
@@ -39,20 +50,20 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data)
		for (i = 0; i < 4; i++) {
			buf = (char *)alloc_percpu(perf_trace_t);
			if (!buf)
				goto fail_buf;
				goto fail;

			rcu_assign_pointer(perf_trace_buf[i], buf);
			perf_trace_buf[i] = buf;
		}
	}

	ret = event->perf_event_enable(event);
	if (!ret) {
		event->perf_data = data;
	ret = tp_event->perf_event_enable(tp_event);
	if (ret)
		goto fail;

	total_ref_count++;
	return 0;
	}

fail_buf:
fail:
	if (!total_ref_count) {
		int i;

@@ -61,21 +72,26 @@ fail_buf:
			perf_trace_buf[i] = NULL;
		}
	}
	event->perf_refcount--;

	if (!--tp_event->perf_refcount) {
		free_percpu(tp_event->perf_events);
		tp_event->perf_events = NULL;
	}

	return ret;
}

int perf_trace_enable(int event_id, void *data)
int perf_trace_init(struct perf_event *p_event)
{
	struct ftrace_event_call *event;
	struct ftrace_event_call *tp_event;
	int event_id = p_event->attr.config;
	int ret = -EINVAL;

	mutex_lock(&event_mutex);
	list_for_each_entry(event, &ftrace_events, list) {
		if (event->id == event_id && event->perf_event_enable &&
		    try_module_get(event->mod)) {
			ret = perf_trace_event_enable(event, data);
	list_for_each_entry(tp_event, &ftrace_events, list) {
		if (tp_event->id == event_id && tp_event->perf_event_enable &&
		    try_module_get(tp_event->mod)) {
			ret = perf_trace_event_init(tp_event, p_event);
			break;
		}
	}
@@ -84,53 +100,52 @@ int perf_trace_enable(int event_id, void *data)
	return ret;
}

static void perf_trace_event_disable(struct ftrace_event_call *event)
int perf_trace_enable(struct perf_event *p_event)
{
	if (--event->perf_refcount > 0)
		return;
	struct ftrace_event_call *tp_event = p_event->tp_event;
	struct hlist_head *list;

	event->perf_event_disable(event);
	list = tp_event->perf_events;
	if (WARN_ON_ONCE(!list))
		return -EINVAL;

	if (!--total_ref_count) {
		char *buf[4];
		int i;
	list = per_cpu_ptr(list, smp_processor_id());
	hlist_add_head_rcu(&p_event->hlist_entry, list);

		for (i = 0; i < 4; i++) {
			buf[i] = perf_trace_buf[i];
			rcu_assign_pointer(perf_trace_buf[i], NULL);
	return 0;
}

		/*
		 * Ensure every events in profiling have finished before
		 * releasing the buffers
		 */
		synchronize_sched();

		for (i = 0; i < 4; i++)
			free_percpu(buf[i]);
	}
void perf_trace_disable(struct perf_event *p_event)
{
	hlist_del_rcu(&p_event->hlist_entry);
}

void perf_trace_disable(int event_id)
void perf_trace_destroy(struct perf_event *p_event)
{
	struct ftrace_event_call *event;
	struct ftrace_event_call *tp_event = p_event->tp_event;
	int i;

	mutex_lock(&event_mutex);
	list_for_each_entry(event, &ftrace_events, list) {
		if (event->id == event_id) {
			perf_trace_event_disable(event);
			module_put(event->mod);
			break;
	if (--tp_event->perf_refcount > 0)
		return;

	tp_event->perf_event_disable(tp_event);

	free_percpu(tp_event->perf_events);
	tp_event->perf_events = NULL;

	if (!--total_ref_count) {
		for (i = 0; i < 4; i++) {
			free_percpu(perf_trace_buf[i]);
			perf_trace_buf[i] = NULL;
		}
	}
	mutex_unlock(&event_mutex);
}

__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
				       struct pt_regs *regs, int *rctxp)
{
	struct trace_entry *entry;
	char *trace_buf, *raw_data;
	char *raw_data;
	int pc;

	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
@@ -139,13 +154,9 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,

	*rctxp = perf_swevent_get_recursion_context();
	if (*rctxp < 0)
		goto err_recursion;

	trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]);
	if (!trace_buf)
		goto err;
		return NULL;

	raw_data = per_cpu_ptr(trace_buf, smp_processor_id());
	raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());

	/* zero the dead bytes from align to not leak stack to user */
	memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
@@ -155,9 +166,5 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
	entry->type = type;

	return raw_data;
err:
	perf_swevent_put_recursion_context(*rctxp);
err_recursion:
	return NULL;
}
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
Loading