Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1ccb8fed authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-4.16-20180110' of...

Merge tag 'perf-core-for-mingo-4.16-20180110' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- The 'perf test bpf' entry hooked a eBPF proggie to the
  SyS_epoll_wait() kernel function and expected it to be hit when calling
  the epoll_wait() libc wrapper, which changed recently, in systems such
  as Fedora 27, with the glibc wrapper calling instead the epoll_pwait()
  syscall, so switch to epoll_pwait() for both the kernel and libc
  function, getting it to work both in old and new systems (Arnaldo Carvalho de Melo)

- Beautify 'gettid' syscall result in 'perf trace', and in doing so
  noticed that we need to handle namespaces in 'perf trace', will be
  dealt with in follow up patches where we'll try to figure out if
  the recent support for namespace in tools/perf/ can be used for this
  purpose as well. (Arnaldo Carvalho de Melo)

- Introduce 'perf report --mmaps' and 'perf report --tasks' to show
  info present in 'perf.data' (Jiri Olsa, Arnaldo Carvalho de Melo)

- Synchronize kernel <-> tooling headers wrt meltdown/spectre changes
  (Arnaldo Carvalho de Melo)

- Fix a wrong offset issue when using /proc/kcore (Jin Yao)

- Fix bug that prevented annotating symbols in perf.data files
  generated with 'perf record --branch-any'  (Jin Yao)

- Add infrastructure to record first and last sample time to the
  perf.data file header, so that when processing all samples in
  a 'perf record' session, such as when doing build-id processing,
  or when specifically requesting that that info be recorded, use
  that in 'perf report --time', that also got support for percent
  slices in addition to absolute ones.

  I.e. now it is possible to ask for the samples in the 10%-20%
  time slice of a perf.data file (Jin Yao)

- Enable building with libbabeltrace by default (Jiri Olsa)

- Display perf_event_attr::namespaces when duping the attributes
  in verbose mode (Jiri Olsa)

- Allocate context task_ctx_data for child event (Jiri Olsa)

- Update comments for PERF_RECORD_ITRACE_START and PERF_RECORD_MISC_* (Jiri Olsa)

- Add support for showing PERF_RECORD_LOST events in 'perf script' (Jiri Olsa)

- Add 'perf report --stats' option to display quick statistics about
  metadata events (PERF_RECORD_*) i.e. what we get at the end of 'perf
  report -D' (Jiri Olsa)

- Fix compile error with libunwind x86 (Wang Nan)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 9128d3ed 5d64db29
Loading
Loading
Loading
Loading
+7 −3
Original line number Diff line number Diff line
@@ -612,9 +612,12 @@ struct perf_event_mmap_page {
 */
#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT	(1 << 12)
/*
 * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
 * different events so can reuse the same bit position.
 * Ditto PERF_RECORD_MISC_SWITCH_OUT.
 * Following PERF_RECORD_MISC_* are used on different
 * events, so can reuse the same bit position:
 *
 *   PERF_RECORD_MISC_MMAP_DATA  - PERF_RECORD_MMAP* events
 *   PERF_RECORD_MISC_COMM_EXEC  - PERF_RECORD_COMM event
 *   PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
 */
#define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
#define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
@@ -864,6 +867,7 @@ enum perf_event_type {
	 *	struct perf_event_header	header;
	 *	u32				pid;
	 *	u32				tid;
	 *	struct sample_id		sample_id;
	 * };
	 */
	PERF_RECORD_ITRACE_START		= 12,
+0 −15
Original line number Diff line number Diff line
@@ -178,21 +178,6 @@ put_callchain_entry(int rctx)
	put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
}

struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs)
{
	bool kernel = !event->attr.exclude_callchain_kernel;
	bool user   = !event->attr.exclude_callchain_user;
	/* Disallow cross-task user callchains. */
	bool crosstask = event->ctx->task && event->ctx->task != current;
	const u32 max_stack = event->attr.sample_max_stack;

	if (!kernel && !user)
		return NULL;

	return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
}

struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
		   u32 max_stack, bool crosstask, bool add_mark)
+39 −15
Original line number Diff line number Diff line
@@ -5815,19 +5815,11 @@ void perf_output_sample(struct perf_output_handle *handle,
		perf_output_read(handle, event);

	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
		if (data->callchain) {
		int size = 1;

			if (data->callchain)
		size += data->callchain->nr;

		size *= sizeof(u64);

		__output_copy(handle, data->callchain, size);
		} else {
			u64 nr = 0;
			perf_output_put(handle, nr);
		}
	}

	if (sample_type & PERF_SAMPLE_RAW) {
@@ -5980,6 +5972,26 @@ static u64 perf_virt_to_phys(u64 virt)
	return phys_addr;
}

static struct perf_callchain_entry __empty_callchain = { .nr = 0, };

static struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs)
{
	bool kernel = !event->attr.exclude_callchain_kernel;
	bool user   = !event->attr.exclude_callchain_user;
	/* Disallow cross-task user callchains. */
	bool crosstask = event->ctx->task && event->ctx->task != current;
	const u32 max_stack = event->attr.sample_max_stack;
	struct perf_callchain_entry *callchain;

	if (!kernel && !user)
		return &__empty_callchain;

	callchain = get_perf_callchain(regs, 0, kernel, user,
				       max_stack, crosstask, true);
	return callchain ?: &__empty_callchain;
}

void perf_prepare_sample(struct perf_event_header *header,
			 struct perf_sample_data *data,
			 struct perf_event *event,
@@ -6002,8 +6014,6 @@ void perf_prepare_sample(struct perf_event_header *header,
		int size = 1;

		data->callchain = perf_callchain(event, regs);

		if (data->callchain)
		size += data->callchain->nr;

		header->size += size * sizeof(u64);
@@ -10703,6 +10713,19 @@ inherit_event(struct perf_event *parent_event,
	if (IS_ERR(child_event))
		return child_event;


	if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) &&
	    !child_ctx->task_ctx_data) {
		struct pmu *pmu = child_event->pmu;

		child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size,
						   GFP_KERNEL);
		if (!child_ctx->task_ctx_data) {
			free_event(child_event);
			return NULL;
		}
	}

	/*
	 * is_orphaned_event() and list_add_tail(&parent_event->child_list)
	 * must be under the same lock in order to serialize against
@@ -10713,6 +10736,7 @@ inherit_event(struct perf_event *parent_event,
	if (is_orphaned_event(parent_event) ||
	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
		mutex_unlock(&parent_event->child_mutex);
		/* task_ctx_data is freed with child_ctx */
		free_event(child_event);
		return NULL;
	}
+0 −4
Original line number Diff line number Diff line
@@ -201,10 +201,6 @@ arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)

DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)

/* Callchain handling */
extern struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs);

static inline int get_recursion_context(int *recursion)
{
	int rctx;
+3 −1
Original line number Diff line number Diff line
@@ -197,11 +197,12 @@
#define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */

#define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */

#define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -340,5 +341,6 @@
#define X86_BUG_SWAPGS_FENCE		X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */

#endif /* _ASM_X86_CPUFEATURES_H */
Loading