Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 212146f0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "A couple of kernel side fixes:

   - Fix the Intel uncore driver on certain hardware configurations

   - Fix a CPU hotplug related memory allocation bug

   - Remove a spurious WARN()

  ... plus also a handful of perf tooling fixes"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf script python: Add Python3 support to tests/attr.py
  perf trace: Support multiple "vfs_getname" probes
  perf symbols: Filter out hidden symbols from labels
  perf symbols: Add fallback definitions for GELF_ST_VISIBILITY()
  tools headers uapi: Sync linux/in.h copy from the kernel sources
  perf clang: Do not use 'return std::move(something)'
  perf mem/c2c: Fix perf_mem_events to support powerpc
  perf tests evsel-tp-sched: Fix bitwise operator
  perf/core: Don't WARN() for impossible ring-buffer sizes
  perf/x86/intel: Delay memory deallocation until x86_pmu_dead_cpu()
  perf/x86/intel/uncore: Add Node ID mask
parents d2a6aae9 3bb26006
Loading
Loading
Loading
Loading
+11 −5
Original line number Diff line number Diff line
@@ -3558,6 +3558,14 @@ static void free_excl_cntrs(int cpu)
}

static void intel_pmu_cpu_dying(int cpu)
{
	fini_debug_store_on_cpu(cpu);

	if (x86_pmu.counter_freezing)
		disable_counter_freeze();
}

static void intel_pmu_cpu_dead(int cpu)
{
	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
	struct intel_shared_regs *pc;
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
	}

	free_excl_cntrs(cpu);

	fini_debug_store_on_cpu(cpu);

	if (x86_pmu.counter_freezing)
		disable_counter_freeze();
}

static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = {
	.cpu_prepare		= intel_pmu_cpu_prepare,
	.cpu_starting		= intel_pmu_cpu_starting,
	.cpu_dying		= intel_pmu_cpu_dying,
	.cpu_dead		= intel_pmu_cpu_dead,
};

static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = {
	.cpu_prepare		= intel_pmu_cpu_prepare,
	.cpu_starting		= intel_pmu_cpu_starting,
	.cpu_dying		= intel_pmu_cpu_dying,
	.cpu_dead		= intel_pmu_cpu_dead,

	.guest_get_msrs		= intel_guest_get_msrs,
	.sched_task		= intel_pmu_sched_task,
};
+3 −1
Original line number Diff line number Diff line
@@ -1222,6 +1222,8 @@ static struct pci_driver snbep_uncore_pci_driver = {
	.id_table	= snbep_uncore_pci_ids,
};

#define NODE_ID_MASK	0x7

/*
 * build pci bus to socket mapping
 */
@@ -1243,7 +1245,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
		err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
		if (err)
			break;
		nodeid = config;
		nodeid = config & NODE_ID_MASK;
		/* get the Node ID mapping */
		err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
		if (err)
+3 −0
Original line number Diff line number Diff line
@@ -734,6 +734,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
	size = sizeof(struct ring_buffer);
	size += nr_pages * sizeof(void *);

	if (order_base_2(size) >= MAX_ORDER)
		goto fail;

	rb = kzalloc(size, GFP_KERNEL);
	if (!rb)
		goto fail;
+1 −1
Original line number Diff line number Diff line
@@ -268,7 +268,7 @@ struct sockaddr_in {
#define	IN_MULTICAST(a)		IN_CLASSD(a)
#define	IN_MULTICAST_NET	0xe0000000

#define	IN_BADCLASS(a)		((((long int) (a) ) == 0xffffffff)
#define	IN_BADCLASS(a)		(((long int) (a) ) == (long int)0xffffffff)
#define	IN_EXPERIMENTAL(a)	IN_BADCLASS((a))

#define	IN_CLASSE(a)		((((long int) (a)) & 0xf0000000) == 0xf0000000)
+12 −4
Original line number Diff line number Diff line
@@ -19,8 +19,11 @@ C2C stands for Cache To Cache.
The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
you to track down the cacheline contentions.

The tool is based on x86's load latency and precise store facility events
provided by Intel CPUs. These events provide:
On x86, the tool is based on load latency and precise store facility events
provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling
with thresholding feature.

These events provide:
  - memory address of the access
  - type of the access (load and store details)
  - latency (in cycles) of the load access
@@ -46,7 +49,7 @@ RECORD OPTIONS

-l::
--ldlat::
	Configure mem-loads latency.
	Configure mem-loads latency. (x86 only)

-k::
--all-kernel::
@@ -119,11 +122,16 @@ Following perf record options are configured by default:
  -W,-d,--phys-data,--sample-cpu

Unless specified otherwise with '-e' option, following events are monitored by
default:
default on x86:

  cpu/mem-loads,ldlat=30/P
  cpu/mem-stores/P

and following on PowerPC:

  cpu/mem-loads/
  cpu/mem-stores/

User can pass any 'perf record' option behind '--' mark, like (to enable
callchains and system wide monitoring):

Loading