Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b36817e8 authored by Stephane Eranian's avatar Stephane Eranian Committed by Ingo Molnar
Browse files

perf/x86: Add Intel LBR sharing logic



The Intel LBR on some recent processor is capable
of filtering branches by type. The filter is configurable
via the LBR_SELECT MSR register.

There are limitation on how this register can be used.

On Nehalem/Westmere, the LBR_SELECT is shared by the two HT threads
when HT is on. It is private to each core when HT is off.

On SandyBridge, the LBR_SELECT register is private to each thread
when HT is on. It is private to each core when HT is off.

The kernel must manage the sharing of LBR_SELECT. It allows
multiple users on the same logical CPU to use LBR_SELECT as
long as they program it with the same value. Across sibling
CPUs (HT threads), the same restriction applies on NHM/WSM.

This patch implements this sharing logic by leveraging the
mechanism put in place for managing the offcore_response
shared MSR.

We modify __intel_shared_reg_get_constraints() to cause
x86_get_event_constraint() to be called because LBR may
be associated with events that may be counter constrained.

Signed-off-by: default avatarStephane Eranian <eranian@google.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1328826068-11713-4-git-send-email-eranian@google.com


Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 225ce539
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -426,6 +426,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
	/* mark unused */
	event->hw.extra_reg.idx = EXTRA_REG_NONE;

	/* mark not used */
	event->hw.extra_reg.idx = EXTRA_REG_NONE;
	event->hw.branch_reg.idx = EXTRA_REG_NONE;

	return x86_pmu.hw_config(event);
}

+4 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ enum extra_reg_type {

	EXTRA_REG_RSP_0 = 0,	/* offcore_response_0 */
	EXTRA_REG_RSP_1 = 1,	/* offcore_response_1 */
	EXTRA_REG_LBR   = 2,	/* lbr_select */

	EXTRA_REG_MAX		/* number of entries needed */
};
@@ -130,6 +131,7 @@ struct cpu_hw_events {
	void				*lbr_context;
	struct perf_branch_stack	lbr_stack;
	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
	struct er_account		*lbr_sel;

	/*
	 * Intel host/guest exclude bits
@@ -342,6 +344,8 @@ struct x86_pmu {
	 */
	unsigned long	lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
	int		lbr_nr;			   /* hardware stack size */
	u64		lbr_sel_mask;		   /* LBR_SELECT valid bits */
	const int	*lbr_sel_map;		   /* lbr_select mappings */

	/*
	 * Extra registers for events
+44 −26
Original line number Diff line number Diff line
@@ -1123,17 +1123,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
 */
static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
				   struct perf_event *event)
				   struct perf_event *event,
				   struct hw_perf_event_extra *reg)
{
	struct event_constraint *c = &emptyconstraint;
	struct hw_perf_event_extra *reg = &event->hw.extra_reg;
	struct er_account *era;
	unsigned long flags;
	int orig_idx = reg->idx;

	/* already allocated shared msr */
	if (reg->alloc)
		return &unconstrained;
		return NULL; /* call x86_get_event_constraint() */

again:
	era = &cpuc->shared_regs->regs[reg->idx];
@@ -1156,14 +1156,10 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
		reg->alloc = 1;

		/*
		 * All events using extra_reg are unconstrained.
		 * Avoids calling x86_get_event_constraints()
		 *
		 * Must revisit if extra_reg controlling events
		 * ever have constraints. Worst case we go through
		 * the regular event constraint table.
		 * need to call x86_get_event_constraint()
		 * to check if associated event has constraints
		 */
		c = &unconstrained;
		c = NULL;
	} else if (intel_try_alt_er(event, orig_idx)) {
		raw_spin_unlock_irqrestore(&era->lock, flags);
		goto again;
@@ -1200,11 +1196,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
			      struct perf_event *event)
{
	struct event_constraint *c = NULL;

	if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
		c = __intel_shared_reg_get_constraints(cpuc, event);
	struct event_constraint *c = NULL, *d;
	struct hw_perf_event_extra *xreg, *breg;

	xreg = &event->hw.extra_reg;
	if (xreg->idx != EXTRA_REG_NONE) {
		c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
		if (c == &emptyconstraint)
			return c;
	}
	breg = &event->hw.branch_reg;
	if (breg->idx != EXTRA_REG_NONE) {
		d = __intel_shared_reg_get_constraints(cpuc, event, breg);
		if (d == &emptyconstraint) {
			__intel_shared_reg_put_constraints(cpuc, xreg);
			c = d;
		}
	}
	return c;
}

@@ -1252,6 +1260,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
	reg = &event->hw.extra_reg;
	if (reg->idx != EXTRA_REG_NONE)
		__intel_shared_reg_put_constraints(cpuc, reg);

	reg = &event->hw.branch_reg;
	if (reg->idx != EXTRA_REG_NONE)
		__intel_shared_reg_put_constraints(cpuc, reg);
}

static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1431,7 +1443,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{
	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

	if (!x86_pmu.extra_regs)
	if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
		return NOTIFY_OK;

	cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1453,9 +1465,12 @@ static void intel_pmu_cpu_starting(int cpu)
	 */
	intel_pmu_lbr_reset();

	if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
	cpuc->lbr_sel = NULL;

	if (!cpuc->shared_regs)
		return;

	if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
		for_each_cpu(i, topology_thread_cpumask(cpu)) {
			struct intel_shared_regs *pc;

@@ -1466,11 +1481,14 @@ static void intel_pmu_cpu_starting(int cpu)
				break;
			}
		}

		cpuc->shared_regs->core_id = core_id;
		cpuc->shared_regs->refcnt++;
	}

	if (x86_pmu.lbr_sel_map)
		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
}

static void intel_pmu_cpu_dying(int cpu)
{
	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);