Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e3b0ac1b authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

 - Do event name substring search as last resort in 'perf list'.
   (Arnaldo Carvalho de Melo)

   E.g.:

    # perf list clock

    List of pre-defined events (to be used in -e):

     cpu-clock                                          [Software event]
     task-clock                                         [Software event]

     uncore_cbox_0/clockticks/                          [Kernel PMU event]
     uncore_cbox_1/clockticks/                          [Kernel PMU event]

     kvm:kvm_pvclock_update                             [Tracepoint event]
     kvm:kvm_update_master_clock                        [Tracepoint event]
     power:clock_disable                                [Tracepoint event]
     power:clock_enable                                 [Tracepoint event]
     power:clock_set_rate                               [Tracepoint event]
     syscalls:sys_enter_clock_adjtime                   [Tracepoint event]
     syscalls:sys_enter_clock_getres                    [Tracepoint event]
     syscalls:sys_enter_clock_gettime                   [Tracepoint event]
     syscalls:sys_enter_clock_nanosleep                 [Tracepoint event]
     syscalls:sys_enter_clock_settime                   [Tracepoint event]
     syscalls:sys_exit_clock_adjtime                    [Tracepoint event]
     syscalls:sys_exit_clock_getres                     [Tracepoint event]
     syscalls:sys_exit_clock_gettime                    [Tracepoint event]
     syscalls:sys_exit_clock_nanosleep                  [Tracepoint event]
     syscalls:sys_exit_clock_settime                    [Tracepoint event]

 - Reduce min 'perf stat --interval-print/-I' to 10ms. (Kan Liang)

   perf stat --interval in action:

   # perf stat -e cycles -I 50 -a usleep $((200 * 1000))
   print interval < 100ms. The overhead percentage could be high in some cases. Please proceed with caution.
   #   time                    counts unit events
      0.050233636         48,240,396      cycles
      0.100557098         35,492,594      cycles
      0.150804687         39,295,112      cycles
      0.201032269         33,101,961      cycles
      0.201980732            786,379      cycles
  #

 - Allow for max_stack greater than PERF_MAX_STACK_DEPTH, as when
   synthesizing callchains from Intel PT data. (Adrian Hunter)

 - Allow probing on kmodules without DWARF. (Masami Hiramatsu)

 - Fix a segfault when processing a perf.data file with callchains using
   "perf report --call-graph none". (Namhyung Kim)

 - Fix unresolved COMMs in 'perf top' when -s comm is used. (Namhyung Kim)

 - Register idle thread in 'perf top'. (Namhyung Kim)

 - Change 'record.samples' type to unsigned long long, fixing output of
   number of samples in 32-bit architectures. (Yang Shi)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents c2365b93 19afd104
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -125,6 +125,8 @@ To limit the list use:
. If none of the above is matched, it will apply the supplied glob to all
  events, printing the ones that match.

. As a last resort, it will do a substring search in all event names.

One or more types can be used at the same time, listing the events for the
types specified.

+3 −2
Original line number Diff line number Diff line
@@ -128,8 +128,9 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m

-I msecs::
--interval-print msecs::
	Print count deltas every N milliseconds (minimum: 100ms)
	example: perf stat -I 1000 -e cycles -a sleep 5
Print count deltas every N milliseconds (minimum: 10ms)
The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
	example: 'perf stat -I 1000 -e cycles -a sleep 5'

--per-socket::
Aggregate counts per processor socket for system-wide mode measurements.  This
+16 −2
Original line number Diff line number Diff line
@@ -45,6 +45,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
	}

	for (i = 0; i < argc; ++i) {
		char *sep, *s;

		if (strcmp(argv[i], "tracepoint") == 0)
			print_tracepoint_events(NULL, NULL, raw_dump);
		else if (strcmp(argv[i], "hw") == 0 ||
@@ -60,8 +62,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
			print_hwcache_events(NULL, raw_dump);
		else if (strcmp(argv[i], "pmu") == 0)
			print_pmu_events(NULL, raw_dump);
		else {
			char *sep = strchr(argv[i], ':'), *s;
		else if ((sep = strchr(argv[i], ':')) != NULL) {
			int sep_idx;

			if (sep == NULL) {
@@ -76,6 +77,19 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
			s[sep_idx] = '\0';
			print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
			free(s);
		} else {
			if (asprintf(&s, "*%s*", argv[i]) < 0) {
				printf("Critical: Not enough memory! Trying to continue...\n");
				continue;
			}
			print_symbol_events(s, PERF_TYPE_HARDWARE,
					    event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
			print_symbol_events(s, PERF_TYPE_SOFTWARE,
					    event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
			print_hwcache_events(s, raw_dump);
			print_pmu_events(s, raw_dump);
			print_tracepoint_events(NULL, s, raw_dump);
			free(s);
		}
	}
	return 0;
+3 −5
Original line number Diff line number Diff line
@@ -182,10 +182,8 @@ static int opt_set_target(const struct option *opt, const char *str,
	if  (str) {
		if (!strcmp(opt->long_name, "exec"))
			params.uprobes = true;
#ifdef HAVE_DWARF_SUPPORT
		else if (!strcmp(opt->long_name, "module"))
			params.uprobes = false;
#endif
		else
			return ret;

@@ -490,9 +488,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
		   "file", "vmlinux pathname"),
	OPT_STRING('s', "source", &symbol_conf.source_prefix,
		   "directory", "path to kernel source"),
	OPT_CALLBACK('m', "module", NULL, "modname|path",
		"target module name (for online) or path (for offline)",
		opt_set_target),
	OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
		"Don't search inlined functions"),
#endif
@@ -509,6 +504,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
		     opt_set_filter),
	OPT_CALLBACK('x', "exec", NULL, "executable|path",
			"target executable name or path", opt_set_target),
	OPT_CALLBACK('m', "module", NULL, "modname|path",
		"target module name (for online) or path (for offline)",
		opt_set_target),
	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
		    "Enable symbol demangling"),
	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+12 −4
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ struct record {
	int			realtime_prio;
	bool			no_buildid;
	bool			no_buildid_cache;
	long			samples;
	unsigned long long	samples;
};

static int record__write(struct record *rec, void *bf, size_t size)
@@ -637,17 +637,25 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
	 * Let the child rip
	 */
	if (forks) {
		union perf_event event;
		union perf_event *event;

		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
		if (event == NULL) {
			err = -ENOMEM;
			goto out_child;
		}

		/*
		 * Some H/W events are generated before COMM event
		 * which is emitted during exec(), so perf script
		 * cannot see a correct process name for those events.
		 * Synthesize COMM event to prevent it.
		 */
		perf_event__synthesize_comm(tool, &event,
		perf_event__synthesize_comm(tool, event,
					    rec->evlist->workload.pid,
					    process_synthesized_event,
					    machine);
		free(event);

		perf_evlist__start_workload(rec->evlist);
	}
@@ -659,7 +667,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)

	auxtrace_snapshot_enabled = 1;
	for (;;) {
		int hits = rec->samples;
		unsigned long long hits = rec->samples;

		if (record__mmap_read_all(rec) < 0) {
			auxtrace_snapshot_enabled = 0;
Loading