Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b21484f1 authored by Greg Price's avatar Greg Price Committed by Arnaldo Carvalho de Melo
Browse files

perf report/top: Add option to collapse undesired parts of call graph



For example, in an application with an expensive function implemented
with deeply nested recursive calls, the default call-graph presentation
is dominated by the different callchains within that function.  By
ignoring these callees, we can collect the callchains leading into the
function and compactly identify what to blame for expensive calls.

For example, in this report the callers of garbage_collect() are
scattered across the tree:

  $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
      22.03%     ruby  [.] gc_mark
                 --- gc_mark
                    |--59.40%-- mark_keyvalue
                    |          st_foreach
                    |          gc_mark_children
                    |          |--99.75%-- rb_gc_mark
                    |          |          rb_vm_mark
                    |          |          gc_mark_children
                    |          |          gc_marks
                    |          |          |--99.00%-- garbage_collect

If we ignore the callees of garbage_collect(), its callers are coalesced:

  $ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
      72.92%     ruby  [.] garbage_collect
                 --- garbage_collect
                     vm_xmalloc
                    |--47.08%-- ruby_xmalloc
                    |          st_insert2
                    |          rb_hash_aset
                    |          |--98.45%-- features_index_add
                    |          |          rb_provide_feature
                    |          |          rb_require_safe
                    |          |          vm_call_method

Signed-off-by: default avatarGreg Price <price@mit.edu>
Tested-by: default avatarJiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu
Link: http://lkml.kernel.org/r/20130708115746.GO22203@biohazard-cafe.mit.edu


Cc: Fengguang Wu <fengguang.wu@intel.com>
[ remove spaces at beginning of line, reported by Fengguang Wu ]
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent dc098b35
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -135,6 +135,11 @@ OPTIONS
--inverted::
        alias for inverted caller based call graph.

--ignore-callees=<regex>::
        Ignore callees of the function(s) matching the given regex.
        This has the effect of collecting the callers of each such
        function into one place in the call-graph tree.

--pretty=<key>::
        Pretty printing style.  key: normal, raw

+5 −0
Original line number Diff line number Diff line
@@ -155,6 +155,11 @@ Default is to monitor all CPUS.

	Default: fractal,0.5,callee.

--ignore-callees=<regex>::
        Ignore callees of the function(s) matching the given regex.
        This has the effect of collecting the callers of each such
        function into one place in the call-graph tree.

--percent-limit::
	Do not show entries which have an overhead under that percent.
	(Default: 0).
+24 −3
Original line number Diff line number Diff line
@@ -89,7 +89,7 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
	if ((sort__has_parent || symbol_conf.use_callchain) &&
	    sample->callchain) {
		err = machine__resolve_callchain(machine, evsel, al->thread,
						 sample, &parent);
						 sample, &parent, al);
		if (err)
			return err;
	}
@@ -180,7 +180,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
	if ((sort__has_parent || symbol_conf.use_callchain)
	    && sample->callchain) {
		err = machine__resolve_callchain(machine, evsel, al->thread,
						 sample, &parent);
						 sample, &parent, al);
		if (err)
			return err;
	}
@@ -254,7 +254,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,

	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
		err = machine__resolve_callchain(machine, evsel, al->thread,
						 sample, &parent);
						 sample, &parent, al);
		if (err)
			return err;
	}
@@ -681,6 +681,24 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
	return 0;
}

int
report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
				const char *arg, int unset __maybe_unused)
{
	if (arg) {
		int err = regcomp(&ignore_callees_regex, arg, REG_EXTENDED);
		if (err) {
			char buf[BUFSIZ];
			regerror(err, &ignore_callees_regex, buf, sizeof(buf));
			pr_err("Invalid --ignore-callees regex: %s\n%s", arg, buf);
			return -1;
		}
		have_ignore_callees = 1;
	}

	return 0;
}

static int
parse_branch_mode(const struct option *opt __maybe_unused,
		  const char *str __maybe_unused, int unset)
@@ -771,6 +789,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
		    "alias for inverted call graph"),
	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
		   "ignore callees of these functions in call graphs",
		   report_parse_ignore_callees_opt),
	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
		   "only consider symbols in these dsos"),
	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+4 −2
Original line number Diff line number Diff line
@@ -773,8 +773,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
		    sample->callchain) {
			err = machine__resolve_callchain(machine, evsel,
							 al.thread, sample,
							 &parent);

							 &parent, &al);
			if (err)
				return;
		}
@@ -1109,6 +1108,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
			     "mode[,dump_size]", record_callchain_help,
			     &parse_callchain_opt, "fp"),
	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
		   "ignore callees of these functions in call graphs",
		   report_parse_ignore_callees_opt),
	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
		    "Show a column with the sum of periods"),
	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+15 −9
Original line number Diff line number Diff line
@@ -1058,11 +1058,10 @@ int machine__process_event(struct machine *machine, union perf_event *event)
	return ret;
}

static bool symbol__match_parent_regex(struct symbol *sym)
static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
{
	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
		return 1;

	return 0;
}

@@ -1159,8 +1158,8 @@ struct branch_info *machine__resolve_bstack(struct machine *machine,
static int machine__resolve_callchain_sample(struct machine *machine,
					     struct thread *thread,
					     struct ip_callchain *chain,
					     struct symbol **parent)

					     struct symbol **parent,
					     struct addr_location *root_al)
{
	u8 cpumode = PERF_RECORD_MISC_USER;
	unsigned int i;
@@ -1211,8 +1210,15 @@ static int machine__resolve_callchain_sample(struct machine *machine,
					   MAP__FUNCTION, ip, &al, NULL);
		if (al.sym != NULL) {
			if (sort__has_parent && !*parent &&
			    symbol__match_parent_regex(al.sym))
			    symbol__match_regex(al.sym, &parent_regex))
				*parent = al.sym;
			else if (have_ignore_callees && root_al &&
			  symbol__match_regex(al.sym, &ignore_callees_regex)) {
				/* Treat this symbol as the root,
				   forgetting its callees. */
				*root_al = al;
				callchain_cursor_reset(&callchain_cursor);
			}
			if (!symbol_conf.use_callchain)
				break;
		}
@@ -1237,13 +1243,13 @@ int machine__resolve_callchain(struct machine *machine,
			       struct perf_evsel *evsel,
			       struct thread *thread,
			       struct perf_sample *sample,
			       struct symbol **parent)

			       struct symbol **parent,
			       struct addr_location *root_al)
{
	int ret;

	ret = machine__resolve_callchain_sample(machine, thread,
						sample->callchain, parent);
						sample->callchain, parent, root_al);
	if (ret)
		return ret;

Loading