Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 96b90f27 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "This update has mostly fixes, but also other bits:

   - perf tooling fixes

   - PMU driver fixes

   - Intel Broadwell PMU driver HW-enablement for LBR callstacks

   - a late coming 'perf kmem' tool update that enables it to also
     analyze page allocation data.  Note, this comes with MM tracepoint
     changes that we believe to not break anything: because it changes
     the formerly opaque 'struct page *' field that uniquely identifies
     pages to 'pfn' which identifies pages uniquely too, but isn't as
     opaque and can be used for other purposes as well"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel/pt: Fix and clean up error handling in pt_event_add()
  perf/x86/intel: Add Broadwell support for the LBR callstack
  perf/x86/intel/rapl: Fix energy counter measurements but supporing per domain energy units
  perf/x86/intel: Fix Core2,Atom,NHM,WSM cycles:pp events
  perf/x86: Fix hw_perf_event::flags collision
  perf probe: Fix segfault when probe with lazy_line to file
  perf probe: Find compilation directory path for lazy matching
  perf probe: Set retprobe flag when probe in address-based alternative mode
  perf kmem: Analyze page allocator events also
  tracing, mm: Record pfn instead of pointer to struct page
parents 396c9df2 0c99241c
Loading
Loading
Loading
Loading
+9 −9
Original line number Diff line number Diff line
@@ -65,15 +65,15 @@ struct event_constraint {
/*
 * struct hw_perf_event.flags flags
 */
#define PERF_X86_EVENT_PEBS_LDLAT	0x1 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST		0x2 /* st data address sampling */
#define PERF_X86_EVENT_PEBS_ST_HSW	0x4 /* haswell style datala, store */
#define PERF_X86_EVENT_COMMITTED	0x8 /* event passed commit_txn */
#define PERF_X86_EVENT_PEBS_LD_HSW	0x10 /* haswell style datala, load */
#define PERF_X86_EVENT_PEBS_NA_HSW	0x20 /* haswell style datala, unknown */
#define PERF_X86_EVENT_EXCL		0x40 /* HT exclusivity on counter */
#define PERF_X86_EVENT_DYNAMIC		0x80 /* dynamic alloc'd constraint */
#define PERF_X86_EVENT_RDPMC_ALLOWED	0x40 /* grant rdpmc permission */
#define PERF_X86_EVENT_PEBS_LDLAT	0x0001 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST		0x0002 /* st data address sampling */
#define PERF_X86_EVENT_PEBS_ST_HSW	0x0004 /* haswell style datala, store */
#define PERF_X86_EVENT_COMMITTED	0x0008 /* event passed commit_txn */
#define PERF_X86_EVENT_PEBS_LD_HSW	0x0010 /* haswell style datala, load */
#define PERF_X86_EVENT_PEBS_NA_HSW	0x0020 /* haswell style datala, unknown */
#define PERF_X86_EVENT_EXCL		0x0040 /* HT exclusivity on counter */
#define PERF_X86_EVENT_DYNAMIC		0x0080 /* dynamic alloc'd constraint */
#define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */


struct amd_nb {
+1 −1
Original line number Diff line number Diff line
@@ -3275,7 +3275,7 @@ __init int intel_pmu_init(void)
		hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
									      BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;

		intel_pmu_lbr_init_snb();
		intel_pmu_lbr_init_hsw();

		x86_pmu.event_constraints = intel_bdw_event_constraints;
		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+8 −0
Original line number Diff line number Diff line
@@ -558,6 +558,8 @@ struct event_constraint intel_core2_pebs_event_constraints[] = {
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
	EVENT_CONSTRAINT_END
};

@@ -565,6 +567,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
	EVENT_CONSTRAINT_END
};

@@ -588,6 +592,8 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
	EVENT_CONSTRAINT_END
};

@@ -603,6 +609,8 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
	INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
	EVENT_CONSTRAINT_END
};

+15 −18
Original line number Diff line number Diff line
@@ -988,39 +988,36 @@ static int pt_event_add(struct perf_event *event, int mode)
	int ret = -EBUSY;

	if (pt->handle.event)
		goto out;
		goto fail;

	buf = perf_aux_output_begin(&pt->handle, event);
	if (!buf) {
	ret = -EINVAL;
		goto out;
	}
	if (!buf)
		goto fail_stop;

	pt_buffer_reset_offsets(buf, pt->handle.head);
	if (!buf->snapshot) {
		ret = pt_buffer_reset_markers(buf, &pt->handle);
		if (ret) {
			perf_aux_output_end(&pt->handle, 0, true);
			goto out;
		}
		if (ret)
			goto fail_end_stop;
	}

	if (mode & PERF_EF_START) {
		pt_event_start(event, 0);
		if (hwc->state == PERF_HES_STOPPED) {
			pt_event_del(event, 0);
		ret = -EBUSY;
		}
		if (hwc->state == PERF_HES_STOPPED)
			goto fail_end_stop;
	} else {
		hwc->state = PERF_HES_STOPPED;
	}

	ret = 0;
out:
	return 0;

	if (ret)
fail_end_stop:
	perf_aux_output_end(&pt->handle, 0, true);
fail_stop:
	hwc->state = PERF_HES_STOPPED;

fail:
	return ret;
}

+73 −21
Original line number Diff line number Diff line
@@ -62,6 +62,14 @@
#define RAPL_IDX_PP1_NRG_STAT	3	/* gpu */
#define INTEL_RAPL_PP1		0x4	/* pseudo-encoding */

#define NR_RAPL_DOMAINS         0x4
static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
	"pp0-core",
	"package",
	"dram",
	"pp1-gpu",
};

/* Clients have PP0, PKG */
#define RAPL_IDX_CLN	(1<<RAPL_IDX_PP0_NRG_STAT|\
			 1<<RAPL_IDX_PKG_NRG_STAT|\
@@ -112,7 +120,6 @@ static struct perf_pmu_events_attr event_attr_##v = { \

struct rapl_pmu {
	spinlock_t	 lock;
	int		 hw_unit;  /* 1/2^hw_unit Joule */
	int		 n_active; /* number of active events */
	struct list_head active_list;
	struct pmu	 *pmu; /* pointer to rapl_pmu_class */
@@ -120,6 +127,7 @@ struct rapl_pmu {
	struct hrtimer   hrtimer;
};

static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /* 1/2^hw_unit Joule */
static struct pmu rapl_pmu_class;
static cpumask_t rapl_cpu_mask;
static int rapl_cntr_mask;
@@ -127,6 +135,7 @@ static int rapl_cntr_mask;
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);

static struct x86_pmu_quirk *rapl_quirks;
static inline u64 rapl_read_counter(struct perf_event *event)
{
	u64 raw;
@@ -134,15 +143,28 @@ static inline u64 rapl_read_counter(struct perf_event *event)
	return raw;
}

static inline u64 rapl_scale(u64 v)
#define rapl_add_quirk(func_)						\
do {									\
	static struct x86_pmu_quirk __quirk __initdata = {		\
		.func = func_,						\
	};								\
	__quirk.next = rapl_quirks;					\
	rapl_quirks = &__quirk;						\
} while (0)

static inline u64 rapl_scale(u64 v, int cfg)
{
	if (cfg > NR_RAPL_DOMAINS) {
		pr_warn("invalid domain %d, failed to scale data\n", cfg);
		return v;
	}
	/*
	 * scale delta to smallest unit (1/2^32)
	 * users must then scale back: count * 1/(1e9*2^32) to get Joules
	 * or use ldexp(count, -32).
	 * Watts = Joules/Time delta
	 */
	return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
	return v << (32 - rapl_hw_unit[cfg - 1]);
}

static u64 rapl_event_update(struct perf_event *event)
@@ -173,7 +195,7 @@ static u64 rapl_event_update(struct perf_event *event)
	delta = (new_raw_count << shift) - (prev_raw_count << shift);
	delta >>= shift;

	sdelta = rapl_scale(delta);
	sdelta = rapl_scale(delta, event->hw.config);

	local64_add(sdelta, &event->count);

@@ -546,12 +568,22 @@ static void rapl_cpu_init(int cpu)
	cpumask_set_cpu(cpu, &rapl_cpu_mask);
}

static __init void rapl_hsw_server_quirk(void)
{
	/*
	 * DRAM domain on HSW server has fixed energy unit which can be
	 * different than the unit from power unit MSR.
	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
	 */
	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
}

static int rapl_cpu_prepare(int cpu)
{
	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
	int phys_id = topology_physical_package_id(cpu);
	u64 ms;
	u64 msr_rapl_power_unit_bits;

	if (pmu)
		return 0;
@@ -559,24 +591,13 @@ static int rapl_cpu_prepare(int cpu)
	if (phys_id < 0)
		return -1;

	/* protect rdmsrl() to handle virtualization */
	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
		return -1;

	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
	if (!pmu)
		return -1;

	spin_lock_init(&pmu->lock);

	INIT_LIST_HEAD(&pmu->active_list);

	/*
	 * grab power unit as: 1/2^unit Joules
	 *
	 * we cache in local PMU instance
	 */
	pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
	pmu->pmu = &rapl_pmu_class;

	/*
@@ -586,8 +607,8 @@ static int rapl_cpu_prepare(int cpu)
	 * divide interval by 2 to avoid lockstep (2 * 100)
	 * if hw unit is 32, then we use 2 ms 1/200/2
	 */
	if (pmu->hw_unit < 32)
		ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1));
	if (rapl_hw_unit[0] < 32)
		ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
	else
		ms = 2;

@@ -655,6 +676,20 @@ static int rapl_cpu_notifier(struct notifier_block *self,
	return NOTIFY_OK;
}

static int rapl_check_hw_unit(void)
{
	u64 msr_rapl_power_unit_bits;
	int i;

	/* protect rdmsrl() to handle virtualization */
	if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
		return -1;
	for (i = 0; i < NR_RAPL_DOMAINS; i++)
		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;

	return 0;
}

static const struct x86_cpu_id rapl_cpu_match[] = {
	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
	[1] = {},
@@ -664,6 +699,8 @@ static int __init rapl_pmu_init(void)
{
	struct rapl_pmu *pmu;
	int cpu, ret;
	struct x86_pmu_quirk *quirk;
	int i;

	/*
	 * check for Intel processor family 6
@@ -678,6 +715,11 @@ static int __init rapl_pmu_init(void)
		rapl_cntr_mask = RAPL_IDX_CLN;
		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
		break;
	case 63: /* Haswell-Server */
		rapl_add_quirk(rapl_hsw_server_quirk);
		rapl_cntr_mask = RAPL_IDX_SRV;
		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
		break;
	case 60: /* Haswell */
	case 69: /* Haswell-Celeron */
		rapl_cntr_mask = RAPL_IDX_HSW;
@@ -693,7 +735,13 @@ static int __init rapl_pmu_init(void)
		/* unsupported */
		return 0;
	}
	ret = rapl_check_hw_unit();
	if (ret)
		return ret;

	/* run cpu model quirks */
	for (quirk = rapl_quirks; quirk; quirk = quirk->next)
		quirk->func();
	cpu_notifier_register_begin();

	for_each_online_cpu(cpu) {
@@ -714,14 +762,18 @@ static int __init rapl_pmu_init(void)

	pmu = __this_cpu_read(rapl_pmu);

	pr_info("RAPL PMU detected, hw unit 2^-%d Joules,"
	pr_info("RAPL PMU detected,"
		" API unit is 2^-32 Joules,"
		" %d fixed counters"
		" %llu ms ovfl timer\n",
		pmu->hw_unit,
		hweight32(rapl_cntr_mask),
		ktime_to_ms(pmu->timer_interval));

	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
		if (rapl_cntr_mask & (1 << i)) {
			pr_info("hw unit of domain %s 2^-%d Joules\n",
				rapl_domain_names[i], rapl_hw_unit[i]);
		}
	}
out:
	cpu_notifier_register_done();

Loading