Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cfa0bd52 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-2' of...

Merge tag 'perf-core-for-mingo-2' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements from Arnaldo Carvalho de Melo:

User visible changes:

  - Support handling complete branch stacks as histograms (Andi Kleen)

Infrastructure changes:

  - Prep work for supporting per-pkg and snapshot counters in 'perf stat' (Jiri Olsa)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents e460bfdc 09a6a1b0
Loading
Loading
Loading
Loading
+11 −1
Original line number Diff line number Diff line
@@ -159,7 +159,7 @@ OPTIONS
--dump-raw-trace::
        Dump raw trace in ASCII.

-g [type,min[,limit],order[,key]]::
-g [type,min[,limit],order[,key][,branch]]::
--call-graph::
        Display call chains using type, min percent threshold, optional print
	limit and order.
@@ -177,6 +177,11 @@ OPTIONS
	- function: compare on functions
	- address: compare on individual code addresses

	branch can be:
	- branch: include last branch information in callgraph
	when available. Usually more convenient to use --branch-history
	for this.

	Default: fractal,0.5,callee,function.

--children::
@@ -266,6 +271,11 @@ OPTIONS
	branch stacks and it will automatically switch to the branch view mode,
	unless --no-branch-stack is used.

--branch-history::
	Add the addresses of sampled taken branches to the callstack.
	This allows to examine the path the program took to each sample.
	The data collection must have used -b (or -j) and -g.

--objdump=<path>::
        Path to objdump binary.

+25 −6
Original line number Diff line number Diff line
@@ -226,7 +226,8 @@ static int report__setup_sample_type(struct report *rep)
			return -EINVAL;
		}
		if (symbol_conf.use_callchain) {
			ui__error("Selected -g but no callchain data. Did "
			ui__error("Selected -g or --branch-history but no "
				  "callchain data. Did\n"
				  "you call 'perf record' without -g?\n");
			return -1;
		}
@@ -575,6 +576,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
	struct stat st;
	bool has_br_stack = false;
	int branch_mode = -1;
	bool branch_call_mode = false;
	char callchain_default_opt[] = "fractal,0.5,callee";
	const char * const report_usage[] = {
		"perf report [<options>]",
@@ -637,8 +639,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
		   "regex filter to identify parent, see: '--sort parent'"),
	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
		    "Only display entries with parent-match"),
	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]",
		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. "
		     "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
		    "Accumulate callchains of children and show total overhead as well"),
@@ -684,7 +686,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
		    "Show event group information together"),
	OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
		    "use branch records for histogram filling", parse_branch_mode),
		    "use branch records for per branch histogram filling",
		    parse_branch_mode),
	OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
		    "add last branch records to call history"),
	OPT_STRING(0, "objdump", &objdump_path, "path",
		   "objdump binary to use for disassembly and annotations"),
	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
@@ -745,10 +750,24 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
	has_br_stack = perf_header__has_feat(&session->header,
					     HEADER_BRANCH_STACK);

	if ((branch_mode == -1 && has_br_stack) || branch_mode == 1) {
	/*
	 * Branch mode is a tristate:
	 * -1 means default, so decide based on the file having branch data.
	 * 0/1 means the user chose a mode.
	 */
	if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) &&
	    branch_call_mode == -1) {
		sort__mode = SORT_MODE__BRANCH;
		symbol_conf.cumulate_callchain = false;
	}
	if (branch_call_mode) {
		callchain_param.key = CCKEY_ADDRESS;
		callchain_param.branch_callstack = 1;
		symbol_conf.use_callchain = true;
		callchain_register_param(&callchain_param);
		if (sort_order == NULL)
			sort_order = "srcline,symbol,dso";
	}

	if (report.mem_mode) {
		if (sort__mode == SORT_MODE__BRANCH) {
+96 −9
Original line number Diff line number Diff line
@@ -388,20 +388,102 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
		update_stats(&runtime_itlb_cache_stats[0], count[0]);
}

static void zero_per_pkg(struct perf_evsel *counter)
{
	if (counter->per_pkg_mask)
		memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
}

static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
{
	unsigned long *mask = counter->per_pkg_mask;
	struct cpu_map *cpus = perf_evsel__cpus(counter);
	int s;

	*skip = false;

	if (!counter->per_pkg)
		return 0;

	if (cpu_map__empty(cpus))
		return 0;

	if (!mask) {
		mask = zalloc(MAX_NR_CPUS);
		if (!mask)
			return -ENOMEM;

		counter->per_pkg_mask = mask;
	}

	s = cpu_map__get_socket(cpus, cpu);
	if (s < 0)
		return -1;

	*skip = test_and_set_bit(s, mask) == 1;
	return 0;
}

static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
		   struct perf_counts_values *count)
{
	struct perf_counts_values *aggr = &evsel->counts->aggr;
	static struct perf_counts_values zero;
	bool skip = false;

	if (check_per_pkg(evsel, cpu, &skip)) {
		pr_err("failed to read per-pkg counter\n");
		return -1;
	}

	if (skip)
		count = &zero;

	switch (aggr_mode) {
	case AGGR_CORE:
	case AGGR_SOCKET:
	case AGGR_NONE:
		if (!evsel->snapshot)
			perf_evsel__compute_deltas(evsel, cpu, count);
		perf_counts_values__scale(count, scale, NULL);
		evsel->counts->cpu[cpu] = *count;
		update_shadow_stats(evsel, count->values);
		break;
	case AGGR_GLOBAL:
		aggr->val += count->val;
		if (scale) {
			aggr->ena += count->ena;
			aggr->run += count->run;
		}
	default:
		break;
	}

	return 0;
}

static int read_counter(struct perf_evsel *counter);

/*
 * Read out the results of a single counter:
 * aggregate counts across CPUs in system-wide mode
 */
static int read_counter_aggr(struct perf_evsel *counter)
{
	struct perf_counts_values *aggr = &counter->counts->aggr;
	struct perf_stat *ps = counter->priv;
	u64 *count = counter->counts->aggr.values;
	int i;

	if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
			       thread_map__nr(evsel_list->threads), scale) < 0)
	aggr->val = aggr->ena = aggr->run = 0;

	if (read_counter(counter))
		return -1;

	if (!counter->snapshot)
		perf_evsel__compute_deltas(counter, -1, aggr);
	perf_counts_values__scale(aggr, scale, &counter->counts->scaled);

	for (i = 0; i < 3; i++)
		update_stats(&ps->res_stats[i], count[i]);

@@ -424,16 +506,21 @@ static int read_counter_aggr(struct perf_evsel *counter)
 */
static int read_counter(struct perf_evsel *counter)
{
	u64 *count;
	int cpu;
	int nthreads = thread_map__nr(evsel_list->threads);
	int ncpus = perf_evsel__nr_cpus(counter);
	int cpu, thread;

	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
		if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
			return -1;
	if (counter->system_wide)
		nthreads = 1;

		count = counter->counts->cpu[cpu].values;
	if (counter->per_pkg)
		zero_per_pkg(counter);

		update_shadow_stats(counter, count);
	for (thread = 0; thread < nthreads; thread++) {
		for (cpu = 0; cpu < ncpus; cpu++) {
			if (perf_evsel__read_cb(counter, cpu, thread, read_cb))
				return -1;
		}
	}

	return 0;
+4 −0
Original line number Diff line number Diff line
@@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value)
		callchain_param.key = CCKEY_ADDRESS;
		return 0;
	}
	if (!strncmp(value, "branch", strlen(value))) {
		callchain_param.branch_callstack = 1;
		return 0;
	}
	return -1;
}

+1 −0
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@ struct callchain_param {
	sort_chain_func_t	sort;
	enum chain_order	order;
	enum chain_key		key;
	bool			branch_callstack;
};

extern struct callchain_param callchain_param;
Loading