Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eadf48ca authored by Alexander Shishkin's avatar Alexander Shishkin Committed by Ingo Molnar
Browse files

perf/x86/intel/pt: Add support for address range filtering in PT



Newer versions of Intel PT support address ranges, which can be used to
define IP address range-based filters or TraceSTOP regions. Number of
ranges in enumerated via cpuid.

This patch implements PMU callbacks and related low-level code to allow
filter validation, configuration and programming into the hardware.

Signed-off-by: default avatarAlexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/1461771888-10409-7-git-send-email-alexander.shishkin@linux.intel.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 375637bc
Loading
Loading
Loading
Loading
+168 −11
Original line number Diff line number Diff line
@@ -265,6 +265,75 @@ static bool pt_event_valid(struct perf_event *event)
 * These all are cpu affine and operate on a local PT
 */

/* Address ranges and their corresponding msr configuration registers */
static const struct pt_address_range {
	unsigned long	msr_a;
	unsigned long	msr_b;
	unsigned int	reg_off;
} pt_address_ranges[] = {
	{
		.msr_a	 = MSR_IA32_RTIT_ADDR0_A,
		.msr_b	 = MSR_IA32_RTIT_ADDR0_B,
		.reg_off = RTIT_CTL_ADDR0_OFFSET,
	},
	{
		.msr_a	 = MSR_IA32_RTIT_ADDR1_A,
		.msr_b	 = MSR_IA32_RTIT_ADDR1_B,
		.reg_off = RTIT_CTL_ADDR1_OFFSET,
	},
	{
		.msr_a	 = MSR_IA32_RTIT_ADDR2_A,
		.msr_b	 = MSR_IA32_RTIT_ADDR2_B,
		.reg_off = RTIT_CTL_ADDR2_OFFSET,
	},
	{
		.msr_a	 = MSR_IA32_RTIT_ADDR3_A,
		.msr_b	 = MSR_IA32_RTIT_ADDR3_B,
		.reg_off = RTIT_CTL_ADDR3_OFFSET,
	}
};

static u64 pt_config_filters(struct perf_event *event)
{
	struct pt_filters *filters = event->hw.addr_filters;
	struct pt *pt = this_cpu_ptr(&pt_ctx);
	unsigned int range = 0;
	u64 rtit_ctl = 0;

	if (!filters)
		return 0;

	perf_event_addr_filters_sync(event);

	for (range = 0; range < filters->nr_filters; range++) {
		struct pt_filter *filter = &filters->filter[range];

		/*
		 * Note, if the range has zero start/end addresses due
		 * to its dynamic object not being loaded yet, we just
		 * go ahead and program zeroed range, which will simply
		 * produce no data. Note^2: if executable code at 0x0
		 * is a concern, we can set up an "invalid" configuration
		 * such as msr_b < msr_a.
		 */

		/* avoid redundant msr writes */
		if (pt->filters.filter[range].msr_a != filter->msr_a) {
			wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
			pt->filters.filter[range].msr_a = filter->msr_a;
		}

		if (pt->filters.filter[range].msr_b != filter->msr_b) {
			wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
			pt->filters.filter[range].msr_b = filter->msr_b;
		}

		rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
	}

	return rtit_ctl;
}

static void pt_config(struct perf_event *event)
{
	u64 reg;
@@ -274,7 +343,8 @@ static void pt_config(struct perf_event *event)
		wrmsrl(MSR_IA32_RTIT_STATUS, 0);
	}

	reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
	reg = pt_config_filters(event);
	reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;

	if (!event->attr.exclude_kernel)
		reg |= RTIT_CTL_OS;
@@ -921,6 +991,82 @@ static void pt_buffer_free_aux(void *data)
	kfree(buf);
}

static int pt_addr_filters_init(struct perf_event *event)
{
	struct pt_filters *filters;
	int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);

	if (!pt_cap_get(PT_CAP_num_address_ranges))
		return 0;

	filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
	if (!filters)
		return -ENOMEM;

	if (event->parent)
		memcpy(filters, event->parent->hw.addr_filters,
		       sizeof(*filters));

	event->hw.addr_filters = filters;

	return 0;
}

static void pt_addr_filters_fini(struct perf_event *event)
{
	kfree(event->hw.addr_filters);
	event->hw.addr_filters = NULL;
}

static int pt_event_addr_filters_validate(struct list_head *filters)
{
	struct perf_addr_filter *filter;
	int range = 0;

	list_for_each_entry(filter, filters, entry) {
		/* PT doesn't support single address triggers */
		if (!filter->range)
			return -EOPNOTSUPP;

		if (!filter->inode && !kernel_ip(filter->offset))
			return -EINVAL;

		if (++range > pt_cap_get(PT_CAP_num_address_ranges))
			return -EOPNOTSUPP;
	}

	return 0;
}

static void pt_event_addr_filters_sync(struct perf_event *event)
{
	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
	unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
	struct pt_filters *filters = event->hw.addr_filters;
	struct perf_addr_filter *filter;
	int range = 0;

	if (!filters)
		return;

	list_for_each_entry(filter, &head->list, entry) {
		if (filter->inode && !offs[range]) {
			msr_a = msr_b = 0;
		} else {
			/* apply the offset */
			msr_a = filter->offset + offs[range];
			msr_b = filter->size + msr_a;
		}

		filters->filter[range].msr_a  = msr_a;
		filters->filter[range].msr_b  = msr_b;
		filters->filter[range].config = filter->filter ? 1 : 2;
		range++;
	}

	filters->nr_filters = range;
}

/**
 * intel_pt_interrupt() - PT PMI handler
 */
@@ -1128,6 +1274,7 @@ static void pt_event_read(struct perf_event *event)

static void pt_event_destroy(struct perf_event *event)
{
	pt_addr_filters_fini(event);
	x86_del_exclusive(x86_lbr_exclusive_pt);
}

@@ -1142,6 +1289,11 @@ static int pt_event_init(struct perf_event *event)
	if (x86_add_exclusive(x86_lbr_exclusive_pt))
		return -EBUSY;

	if (pt_addr_filters_init(event)) {
		x86_del_exclusive(x86_lbr_exclusive_pt);
		return -ENOMEM;
	}

	event->destroy = pt_event_destroy;

	return 0;
@@ -1205,6 +1357,11 @@ static __init int pt_init(void)
	pt_pmu.pmu.read			 = pt_event_read;
	pt_pmu.pmu.setup_aux		 = pt_buffer_setup_aux;
	pt_pmu.pmu.free_aux		 = pt_buffer_free_aux;
	pt_pmu.pmu.addr_filters_sync     = pt_event_addr_filters_sync;
	pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
	pt_pmu.pmu.nr_addr_filters       =
		pt_cap_get(PT_CAP_num_address_ranges);

	ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);

	return ret;
+26 −0
Original line number Diff line number Diff line
@@ -140,14 +140,40 @@ struct pt_buffer {
	struct topa_entry	*topa_index[0];
};

#define PT_FILTERS_NUM	4

/**
 * struct pt_filter - IP range filter configuration
 * @msr_a:	range start, goes to RTIT_ADDRn_A
 * @msr_b:	range end, goes to RTIT_ADDRn_B
 * @config:	4-bit field in RTIT_CTL
 */
struct pt_filter {
	unsigned long	msr_a;
	unsigned long	msr_b;
	unsigned long	config;
};

/**
 * struct pt_filters - IP range filtering context
 * @filter:	filters defined for this context
 * @nr_filters:	number of defined filters in the @filter array
 */
struct pt_filters {
	struct pt_filter	filter[PT_FILTERS_NUM];
	unsigned int		nr_filters;
};

/**
 * struct pt - per-cpu pt context
 * @handle:	perf output handle
 * @filters:		last configured filters
 * @handle_nmi:	do handle PT PMI on this cpu, there's an active event
 * @vmx_on:	1 if VMX is ON on this cpu
 */
struct pt {
	struct perf_output_handle handle;
	struct pt_filters	filters;
	int			handle_nmi;
	int			vmx_on;
};