Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 32b8af82 authored by Jiri Olsa's avatar Jiri Olsa Committed by Arnaldo Carvalho de Melo
Browse files

perf stat: Introduce --per-thread option



Currently all the -p option PID arguments tasks values get aggregated
and printed as single values.

Adding --per-tasks option to print values per task.

  $ perf stat  -e cycles,instructions --per-thread -p 30190,30242
  ^C
   Performance counter stats for process id '30190,30242':

               cat-30190                     0      cycles
               yes-30242         3,842,525,421      cycles
               cat-30190                     0      instructions
               yes-30242        10,370,817,010      instructions

         1.143155657 seconds time elapsed

Also works under interval mode:

  $ perf stat  -e cycles,instructions --per-thread -p 30190,30242 -I 1000
  #           time             comm-pid                  counts unit events
       1.000073435              cat-30190                89,058      cycles
       1.000073435              yes-30242         3,360,786,902      cycles                     (100.00%)
       1.000073435              cat-30190                14,066      instructions
       1.000073435              yes-30242         9,069,937,462      instructions
       2.000204830              cat-30190                     0      cycles
       2.000204830              yes-30242         3,351,667,626      cycles
       2.000204830              cat-30190                     0      instructions
       2.000204830              yes-30242         9,045,796,885      instructions
  ^C     2.771286639              cat-30190                     0      cycles
       2.771286639              yes-30242         2,593,884,166      cycles
       2.771286639              cat-30190                     0      instructions
       2.771286639              yes-30242         7,001,171,191      instructions

It works only with -t and -p options, otherwise following error is
printed:

  $ perf stat  -e cycles --per-thread  -I 1000 ls
  The --per-thread option is only available when monitoring via -p -t options.
      -p, --pid <pid>       stat events on existing process id
      -t, --tid <tid>       stat events on existing thread id

Signed-off-by: default avatarJiri Olsa <jolsa@kernel.org>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1435310967-14570-23-git-send-email-jolsa@kernel.org


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent d4f63a47
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod
use --per-core in addition to -a. (system-wide).  The output includes the
core number and the number of online logical processors on that physical processor.

--per-thread::
Aggregate counts per monitored threads, when monitoring threads (-t option)
or processes (-p option).

-D msecs::
--delay msecs::
After starting the program, wait msecs before measuring. This is useful to
+74 −2
Original line number Diff line number Diff line
@@ -231,6 +231,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
		count = &zero;

	switch (aggr_mode) {
	case AGGR_THREAD:
	case AGGR_CORE:
	case AGGR_SOCKET:
	case AGGR_NONE:
@@ -602,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
			csv_output ? 0 : -4,
			perf_evsel__cpus(evsel)->map[id], csv_sep);
		break;
	case AGGR_THREAD:
		fprintf(output, "%*s-%*d%s",
			csv_output ? 0 : 16,
			thread_map__comm(evsel->threads, id),
			csv_output ? 0 : -8,
			thread_map__pid(evsel->threads, id),
			csv_sep);
		break;
	case AGGR_GLOBAL:
	default:
		break;
@@ -750,6 +759,40 @@ static void print_aggr(char *prefix)
	}
}

static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
{
	int nthreads = thread_map__nr(counter->threads);
	int ncpus = cpu_map__nr(counter->cpus);
	int cpu, thread;
	double uval;

	for (thread = 0; thread < nthreads; thread++) {
		u64 ena = 0, run = 0, val = 0;

		for (cpu = 0; cpu < ncpus; cpu++) {
			val += perf_counts(counter->counts, cpu, thread)->val;
			ena += perf_counts(counter->counts, cpu, thread)->ena;
			run += perf_counts(counter->counts, cpu, thread)->run;
		}

		if (prefix)
			fprintf(output, "%s", prefix);

		uval = val * counter->scale;

		if (nsec_counter(counter))
			nsec_printout(thread, 0, counter, uval);
		else
			abs_printout(thread, 0, counter, uval);

		if (!csv_output)
			print_noise(counter, 1.0);

		print_running(run, ena);
		fputc('\n', output);
	}
}

/*
 * Print out the results of a single counter:
 * aggregated counts in system-wide mode
@@ -876,6 +919,9 @@ static void print_interval(char *prefix, struct timespec *ts)
		case AGGR_NONE:
			fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
			break;
		case AGGR_THREAD:
			fprintf(output, "#           time             comm-pid                  counts %*s events\n", unit_width, "unit");
			break;
		case AGGR_GLOBAL:
		default:
			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
@@ -944,6 +990,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
	case AGGR_SOCKET:
		print_aggr(prefix);
		break;
	case AGGR_THREAD:
		evlist__for_each(evsel_list, counter)
			print_aggr_thread(counter, prefix);
		break;
	case AGGR_GLOBAL:
		evlist__for_each(evsel_list, counter)
			print_counter_aggr(counter, prefix);
@@ -1031,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void)
		break;
	case AGGR_NONE:
	case AGGR_GLOBAL:
	case AGGR_THREAD:
	default:
		break;
	}
@@ -1255,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
		     "aggregate counts per processor socket", AGGR_SOCKET),
	OPT_SET_UINT(0, "per-core", &aggr_mode,
		     "aggregate counts per physical processor core", AGGR_CORE),
	OPT_SET_UINT(0, "per-thread", &aggr_mode,
		     "aggregate counts per thread", AGGR_THREAD),
	OPT_UINTEGER('D', "delay", &initial_delay,
		     "ms to wait before starting measurement after program start"),
	OPT_END()
@@ -1346,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
		run_count = 1;
	}

	/* no_aggr, cgroup are for system-wide only */
	if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) &&
	if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
		fprintf(stderr, "The --per-thread option is only available "
			"when monitoring via -p -t options.\n");
		parse_options_usage(NULL, options, "p", 1);
		parse_options_usage(NULL, options, "t", 1);
		goto out;
	}

	/*
	 * no_aggr, cgroup are for system-wide only
	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
	 */
	if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
	    !target__has_cpu(&target)) {
		fprintf(stderr, "both cgroup and no-aggregation "
			"modes only available in system-wide mode\n");
@@ -1375,6 +1439,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
		}
		goto out;
	}

	/*
	 * Initialize thread_map with comm names,
	 * so we could print it out on output.
	 */
	if (aggr_mode == AGGR_THREAD)
		thread_map__read_comms(evsel_list->threads);

	if (interval && interval < 100) {
		pr_err("print interval must be >= 100ms\n");
		parse_options_usage(stat_usage, options, "I", 1);
+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ enum aggr_mode {
	AGGR_GLOBAL,
	AGGR_SOCKET,
	AGGR_CORE,
	AGGR_THREAD,
};

struct perf_counts_values {