Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d1706b39 authored by Andi Kleen's avatar Andi Kleen Committed by Arnaldo Carvalho de Melo
Browse files

perf tools: Add support for skipping itrace instructions

When using 'perf script' to look at PT traces it is often useful to
ignore the initialization code at the beginning.

On larger traces which may have many millions of instructions in
initialization code doing that in a pipeline can be very slow, with perf
script spending a lot of CPU time calling printf and writing data.

This patch adds an extension to the --itrace argument that skips 'n'
events (instructions, branches or transactions) at the beginning. This
is much more efficient.

v2:
Add support for BTS (Adrian Hunter)
Document in itrace.txt
Fix branch check
Check transactions and instructions too

Committer note:

To test intel_pt one needs to make sure VT-x isn't active, i.e.
stopping KVM guests on the test machine, as described by Andi Kleen
at http://lkml.kernel.org/r/20160301234953.GD23621@tassilo.jf.intel.com



Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1459187142-20035-1-git-send-email-andi@firstfloor.org


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent f7380c12
Loading
Loading
Loading
Loading
+7 −0
Original line number Original line Diff line number Diff line
@@ -672,6 +672,7 @@ The letters are:
	d	create a debug log
	d	create a debug log
	g	synthesize a call chain (use with i or x)
	g	synthesize a call chain (use with i or x)
	l	synthesize last branch entries (use with i or x)
	l	synthesize last branch entries (use with i or x)
	s	skip initial number of events


"Instructions" events look like they were recorded by "perf record -e
"Instructions" events look like they were recorded by "perf record -e
instructions".
instructions".
@@ -730,6 +731,12 @@ from one sample to the next.


To disable trace decoding entirely, use the option --no-itrace.
To disable trace decoding entirely, use the option --no-itrace.


It is also possible to skip events generated (instructions, branches, transactions)
at the beginning. This is useful to ignore initialization code.

	--itrace=i0nss1000000

skips the first million instructions.


dump option
dump option
-----------
-----------
+8 −0
Original line number Original line Diff line number Diff line
@@ -7,6 +7,7 @@
		d	create a debug log
		d	create a debug log
		g	synthesize a call chain (use with i or x)
		g	synthesize a call chain (use with i or x)
		l	synthesize last branch entries (use with i or x)
		l	synthesize last branch entries (use with i or x)
		s       skip initial number of events


	The default is all events i.e. the same as --itrace=ibxe
	The default is all events i.e. the same as --itrace=ibxe


@@ -24,3 +25,10 @@


	Also the number of last branch entries (default 64, max. 1024) for
	Also the number of last branch entries (default 64, max. 1024) for
	instructions or transactions events can be specified.
	instructions or transactions events can be specified.

	It is also possible to skip events generated (instructions, branches, transactions)
	at the beginning. This is useful to ignore initialization code.

	--itrace=i0nss1000000

	skips the first million instructions.
+7 −0
Original line number Original line Diff line number Diff line
@@ -940,6 +940,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
	synth_opts->initial_skip = 0;
}
}


/*
/*
@@ -1064,6 +1065,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
				synth_opts->last_branch_sz = val;
				synth_opts->last_branch_sz = val;
			}
			}
			break;
			break;
		case 's':
			synth_opts->initial_skip = strtoul(p, &endptr, 10);
			if (p == endptr)
				goto out_err;
			p = endptr;
			break;
		case ' ':
		case ' ':
		case ',':
		case ',':
			break;
			break;
+2 −0
Original line number Original line Diff line number Diff line
@@ -68,6 +68,7 @@ enum itrace_period_type {
 * @last_branch_sz: branch context size
 * @last_branch_sz: branch context size
 * @period: 'instructions' events period
 * @period: 'instructions' events period
 * @period_type: 'instructions' events period type
 * @period_type: 'instructions' events period type
 * @initial_skip: skip N events at the beginning.
 */
 */
struct itrace_synth_opts {
struct itrace_synth_opts {
	bool			set;
	bool			set;
@@ -86,6 +87,7 @@ struct itrace_synth_opts {
	unsigned int		last_branch_sz;
	unsigned int		last_branch_sz;
	unsigned long long	period;
	unsigned long long	period;
	enum itrace_period_type	period_type;
	enum itrace_period_type	period_type;
	unsigned long		initial_skip;
};
};


/**
/**
+5 −0
Original line number Original line Diff line number Diff line
@@ -66,6 +66,7 @@ struct intel_bts {
	u64				branches_id;
	u64				branches_id;
	size_t				branches_event_size;
	size_t				branches_event_size;
	bool				synth_needs_swap;
	bool				synth_needs_swap;
	unsigned long			num_events;
};
};


struct intel_bts_queue {
struct intel_bts_queue {
@@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
	union perf_event event;
	union perf_event event;
	struct perf_sample sample = { .ip = 0, };
	struct perf_sample sample = { .ip = 0, };


	if (bts->synth_opts.initial_skip &&
	    bts->num_events++ <= bts->synth_opts.initial_skip)
		return 0;

	event.sample.header.type = PERF_RECORD_SAMPLE;
	event.sample.header.type = PERF_RECORD_SAMPLE;
	event.sample.header.misc = PERF_RECORD_MISC_USER;
	event.sample.header.misc = PERF_RECORD_MISC_USER;
	event.sample.header.size = sizeof(struct perf_event_header);
	event.sample.header.size = sizeof(struct perf_event_header);
Loading