Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e3b0ac1b authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

 - Do event name substring search as last resort in 'perf list'.
   (Arnaldo Carvalho de Melo)

   E.g.:

    # perf list clock

    List of pre-defined events (to be used in -e):

     cpu-clock                                          [Software event]
     task-clock                                         [Software event]

     uncore_cbox_0/clockticks/                          [Kernel PMU event]
     uncore_cbox_1/clockticks/                          [Kernel PMU event]

     kvm:kvm_pvclock_update                             [Tracepoint event]
     kvm:kvm_update_master_clock                        [Tracepoint event]
     power:clock_disable                                [Tracepoint event]
     power:clock_enable                                 [Tracepoint event]
     power:clock_set_rate                               [Tracepoint event]
     syscalls:sys_enter_clock_adjtime                   [Tracepoint event]
     syscalls:sys_enter_clock_getres                    [Tracepoint event]
     syscalls:sys_enter_clock_gettime                   [Tracepoint event]
     syscalls:sys_enter_clock_nanosleep                 [Tracepoint event]
     syscalls:sys_enter_clock_settime                   [Tracepoint event]
     syscalls:sys_exit_clock_adjtime                    [Tracepoint event]
     syscalls:sys_exit_clock_getres                     [Tracepoint event]
     syscalls:sys_exit_clock_gettime                    [Tracepoint event]
     syscalls:sys_exit_clock_nanosleep                  [Tracepoint event]
     syscalls:sys_exit_clock_settime                    [Tracepoint event]

 - Reduce min 'perf stat --interval-print/-I' to 10ms. (Kan Liang)

   perf stat --interval in action:

   # perf stat -e cycles -I 50 -a usleep $((200 * 1000))
   print interval < 100ms. The overhead percentage could be high in some cases. Please proceed with caution.
   #   time                    counts unit events
      0.050233636         48,240,396      cycles
      0.100557098         35,492,594      cycles
      0.150804687         39,295,112      cycles
      0.201032269         33,101,961      cycles
      0.201980732            786,379      cycles
  #

 - Allow for max_stack greater than PERF_MAX_STACK_DEPTH, as when
   synthesizing callchains from Intel PT data. (Adrian Hunter)

 - Allow probing on kmodules without DWARF. (Masami Hiramatsu)

 - Fix a segfault when processing a perf.data file with callchains using
   "perf report --call-graph none". (Namhyung Kim)

 - Fix unresolved COMMs in 'perf top' when -s comm is used. (Namhyung Kim)

 - Register idle thread in 'perf top'. (Namhyung Kim)

 - Change 'record.samples' type to unsigned long long, fixing output of
   number of samples in 32-bit architectures. (Yang Shi)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents c2365b93 19afd104
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -125,6 +125,8 @@ To limit the list use:
. If none of the above is matched, it will apply the supplied glob to all
. If none of the above is matched, it will apply the supplied glob to all
  events, printing the ones that match.
  events, printing the ones that match.


. As a last resort, it will do a substring search in all event names.

One or more types can be used at the same time, listing the events for the
One or more types can be used at the same time, listing the events for the
types specified.
types specified.


+3 −2
Original line number Original line Diff line number Diff line
@@ -128,8 +128,9 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m


-I msecs::
-I msecs::
--interval-print msecs::
--interval-print msecs::
	Print count deltas every N milliseconds (minimum: 100ms)
Print count deltas every N milliseconds (minimum: 10ms)
	example: perf stat -I 1000 -e cycles -a sleep 5
The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
	example: 'perf stat -I 1000 -e cycles -a sleep 5'


--per-socket::
--per-socket::
Aggregate counts per processor socket for system-wide mode measurements.  This
Aggregate counts per processor socket for system-wide mode measurements.  This
+16 −2
Original line number Original line Diff line number Diff line
@@ -45,6 +45,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
	}
	}


	for (i = 0; i < argc; ++i) {
	for (i = 0; i < argc; ++i) {
		char *sep, *s;

		if (strcmp(argv[i], "tracepoint") == 0)
		if (strcmp(argv[i], "tracepoint") == 0)
			print_tracepoint_events(NULL, NULL, raw_dump);
			print_tracepoint_events(NULL, NULL, raw_dump);
		else if (strcmp(argv[i], "hw") == 0 ||
		else if (strcmp(argv[i], "hw") == 0 ||
@@ -60,8 +62,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
			print_hwcache_events(NULL, raw_dump);
			print_hwcache_events(NULL, raw_dump);
		else if (strcmp(argv[i], "pmu") == 0)
		else if (strcmp(argv[i], "pmu") == 0)
			print_pmu_events(NULL, raw_dump);
			print_pmu_events(NULL, raw_dump);
		else {
		else if ((sep = strchr(argv[i], ':')) != NULL) {
			char *sep = strchr(argv[i], ':'), *s;
			int sep_idx;
			int sep_idx;


			if (sep == NULL) {
			if (sep == NULL) {
@@ -76,6 +77,19 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
			s[sep_idx] = '\0';
			s[sep_idx] = '\0';
			print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
			print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
			free(s);
			free(s);
		} else {
			if (asprintf(&s, "*%s*", argv[i]) < 0) {
				printf("Critical: Not enough memory! Trying to continue...\n");
				continue;
			}
			print_symbol_events(s, PERF_TYPE_HARDWARE,
					    event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
			print_symbol_events(s, PERF_TYPE_SOFTWARE,
					    event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
			print_hwcache_events(s, raw_dump);
			print_pmu_events(s, raw_dump);
			print_tracepoint_events(NULL, s, raw_dump);
			free(s);
		}
		}
	}
	}
	return 0;
	return 0;
+3 −5
Original line number Original line Diff line number Diff line
@@ -182,10 +182,8 @@ static int opt_set_target(const struct option *opt, const char *str,
	if  (str) {
	if  (str) {
		if (!strcmp(opt->long_name, "exec"))
		if (!strcmp(opt->long_name, "exec"))
			params.uprobes = true;
			params.uprobes = true;
#ifdef HAVE_DWARF_SUPPORT
		else if (!strcmp(opt->long_name, "module"))
		else if (!strcmp(opt->long_name, "module"))
			params.uprobes = false;
			params.uprobes = false;
#endif
		else
		else
			return ret;
			return ret;


@@ -490,9 +488,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
		   "file", "vmlinux pathname"),
		   "file", "vmlinux pathname"),
	OPT_STRING('s', "source", &symbol_conf.source_prefix,
	OPT_STRING('s', "source", &symbol_conf.source_prefix,
		   "directory", "path to kernel source"),
		   "directory", "path to kernel source"),
	OPT_CALLBACK('m', "module", NULL, "modname|path",
		"target module name (for online) or path (for offline)",
		opt_set_target),
	OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
	OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
		"Don't search inlined functions"),
		"Don't search inlined functions"),
#endif
#endif
@@ -509,6 +504,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
		     opt_set_filter),
		     opt_set_filter),
	OPT_CALLBACK('x', "exec", NULL, "executable|path",
	OPT_CALLBACK('x', "exec", NULL, "executable|path",
			"target executable name or path", opt_set_target),
			"target executable name or path", opt_set_target),
	OPT_CALLBACK('m', "module", NULL, "modname|path",
		"target module name (for online) or path (for offline)",
		opt_set_target),
	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
		    "Enable symbol demangling"),
		    "Enable symbol demangling"),
	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+12 −4
Original line number Original line Diff line number Diff line
@@ -49,7 +49,7 @@ struct record {
	int			realtime_prio;
	int			realtime_prio;
	bool			no_buildid;
	bool			no_buildid;
	bool			no_buildid_cache;
	bool			no_buildid_cache;
	long			samples;
	unsigned long long	samples;
};
};


static int record__write(struct record *rec, void *bf, size_t size)
static int record__write(struct record *rec, void *bf, size_t size)
@@ -637,17 +637,25 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
	 * Let the child rip
	 * Let the child rip
	 */
	 */
	if (forks) {
	if (forks) {
		union perf_event event;
		union perf_event *event;

		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
		if (event == NULL) {
			err = -ENOMEM;
			goto out_child;
		}

		/*
		/*
		 * Some H/W events are generated before COMM event
		 * Some H/W events are generated before COMM event
		 * which is emitted during exec(), so perf script
		 * which is emitted during exec(), so perf script
		 * cannot see a correct process name for those events.
		 * cannot see a correct process name for those events.
		 * Synthesize COMM event to prevent it.
		 * Synthesize COMM event to prevent it.
		 */
		 */
		perf_event__synthesize_comm(tool, &event,
		perf_event__synthesize_comm(tool, event,
					    rec->evlist->workload.pid,
					    rec->evlist->workload.pid,
					    process_synthesized_event,
					    process_synthesized_event,
					    machine);
					    machine);
		free(event);


		perf_evlist__start_workload(rec->evlist);
		perf_evlist__start_workload(rec->evlist);
	}
	}
@@ -659,7 +667,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)


	auxtrace_snapshot_enabled = 1;
	auxtrace_snapshot_enabled = 1;
	for (;;) {
	for (;;) {
		int hits = rec->samples;
		unsigned long long hits = rec->samples;


		if (record__mmap_read_all(rec) < 0) {
		if (record__mmap_read_all(rec) < 0) {
			auxtrace_snapshot_enabled = 0;
			auxtrace_snapshot_enabled = 0;
Loading