Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a0e9c6ef authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "The biggest chunk of the changes are two regression fixes: a HT
  workaround fix and an event-group scheduling fix.  It's been verified
  with 5 days of fuzzer testing.

  Other fixes:

   - eBPF fix
   - a BIOS breakage detection fix
   - PMU driver fixes"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel/pt: Fix a refactoring bug
  perf/x86: Tweak broken BIOS rules during check_hw_exists()
  perf/x86/intel/pt: Untangle pt_buffer_reset_markers()
  perf: Disallow sparse AUX allocations for non-SG PMUs in overwrite mode
  perf/x86: Improve HT workaround GP counter constraint
  perf/x86: Fix event/group validation
  perf: Fix race in BPF program unregister
parents 59433347 b44a2b53
Loading
Loading
Loading
Loading
+62 −21
Original line number Diff line number Diff line
@@ -190,6 +190,7 @@ static bool check_hw_exists(void)
	u64 val, val_fail, val_new= ~0;
	int i, reg, reg_fail, ret = 0;
	int bios_fail = 0;
	int reg_safe = -1;

	/*
	 * Check to see if the BIOS enabled any of the counters, if so
@@ -204,6 +205,8 @@ static bool check_hw_exists(void)
			bios_fail = 1;
			val_fail = val;
			reg_fail = reg;
		} else {
			reg_safe = i;
		}
	}

@@ -221,12 +224,23 @@ static bool check_hw_exists(void)
		}
	}

	/*
	 * If all the counters are enabled, the below test will always
	 * fail.  The tools will also become useless in this scenario.
	 * Just fail and disable the hardware counters.
	 */

	if (reg_safe == -1) {
		reg = reg_safe;
		goto msr_fail;
	}

	/*
	 * Read the current value, change it and read it back to see if it
	 * matches, this is needed to detect certain hardware emulators
	 * (qemu/kvm) that don't trap on the MSR access and always return 0s.
	 */
	reg = x86_pmu_event_addr(0);
	reg = x86_pmu_event_addr(reg_safe);
	if (rdmsrl_safe(reg, &val))
		goto msr_fail;
	val ^= 0xffffUL;
@@ -611,6 +625,7 @@ struct sched_state {
	int	event;		/* event index */
	int	counter;	/* counter index */
	int	unassigned;	/* number of events to be assigned left */
	int	nr_gp;		/* number of GP counters used */
	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};

@@ -620,27 +635,29 @@ struct sched_state {
struct perf_sched {
	int			max_weight;
	int			max_events;
	struct perf_event	**events;
	struct sched_state	state;
	int			max_gp;
	int			saved_states;
	struct event_constraint	**constraints;
	struct sched_state	state;
	struct sched_state	saved[SCHED_STATES_MAX];
};

/*
 * Initialize interator that runs through all events and counters.
 */
static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
			    int num, int wmin, int wmax)
static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
			    int num, int wmin, int wmax, int gpmax)
{
	int idx;

	memset(sched, 0, sizeof(*sched));
	sched->max_events	= num;
	sched->max_weight	= wmax;
	sched->events		= events;
	sched->max_gp		= gpmax;
	sched->constraints	= constraints;

	for (idx = 0; idx < num; idx++) {
		if (events[idx]->hw.constraint->weight == wmin)
		if (constraints[idx]->weight == wmin)
			break;
	}

@@ -687,7 +704,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
	if (sched->state.event >= sched->max_events)
		return false;

	c = sched->events[sched->state.event]->hw.constraint;
	c = sched->constraints[sched->state.event];
	/* Prefer fixed purpose counters */
	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
		idx = INTEL_PMC_IDX_FIXED;
@@ -696,12 +713,17 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
				goto done;
		}
	}

	/* Grab the first unused counter starting with idx */
	idx = sched->state.counter;
	for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
		if (!__test_and_set_bit(idx, sched->state.used))
		if (!__test_and_set_bit(idx, sched->state.used)) {
			if (sched->state.nr_gp++ >= sched->max_gp)
				return false;

			goto done;
		}
	}

	return false;

@@ -745,7 +767,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
			if (sched->state.weight > sched->max_weight)
				return false;
		}
		c = sched->events[sched->state.event]->hw.constraint;
		c = sched->constraints[sched->state.event];
	} while (c->weight != sched->state.weight);

	sched->state.counter = 0;	/* start with first counter */
@@ -756,12 +778,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
/*
 * Assign a counter for each event.
 */
int perf_assign_events(struct perf_event **events, int n,
			int wmin, int wmax, int *assign)
int perf_assign_events(struct event_constraint **constraints, int n,
			int wmin, int wmax, int gpmax, int *assign)
{
	struct perf_sched sched;

	perf_sched_init(&sched, events, n, wmin, wmax);
	perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);

	do {
		if (!perf_sched_find_counter(&sched))
@@ -788,9 +810,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
		x86_pmu.start_scheduling(cpuc);

	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
		hwc = &cpuc->event_list[i]->hw;
		cpuc->event_constraint[i] = NULL;
		c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
		hwc->constraint = c;
		cpuc->event_constraint[i] = c;

		wmin = min(wmin, c->weight);
		wmax = max(wmax, c->weight);
@@ -801,7 +823,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
	 */
	for (i = 0; i < n; i++) {
		hwc = &cpuc->event_list[i]->hw;
		c = hwc->constraint;
		c = cpuc->event_constraint[i];

		/* never assigned */
		if (hwc->idx == -1)
@@ -821,9 +843,26 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
	}

	/* slow path */
	if (i != n)
		unsched = perf_assign_events(cpuc->event_list, n, wmin,
					     wmax, assign);
	if (i != n) {
		int gpmax = x86_pmu.num_counters;

		/*
		 * Do not allow scheduling of more than half the available
		 * generic counters.
		 *
		 * This helps avoid counter starvation of sibling thread by
		 * ensuring at most half the counters cannot be in exclusive
		 * mode. There is no designated counters for the limits. Any
		 * N/2 counters can be used. This helps with events with
		 * specific counter constraints.
		 */
		if (is_ht_workaround_enabled() && !cpuc->is_fake &&
		    READ_ONCE(cpuc->excl_cntrs->exclusive_present))
			gpmax /= 2;

		unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
					     wmax, gpmax, assign);
	}

	/*
	 * In case of success (unsched = 0), mark events as committed,
@@ -840,7 +879,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
			e = cpuc->event_list[i];
			e->hw.flags |= PERF_X86_EVENT_COMMITTED;
			if (x86_pmu.commit_scheduling)
				x86_pmu.commit_scheduling(cpuc, e, assign[i]);
				x86_pmu.commit_scheduling(cpuc, i, assign[i]);
		}
	}

@@ -1292,8 +1331,10 @@ static void x86_pmu_del(struct perf_event *event, int flags)
		x86_pmu.put_event_constraints(cpuc, event);

	/* Delete the array entry. */
	while (++i < cpuc->n_events)
	while (++i < cpuc->n_events) {
		cpuc->event_list[i-1] = cpuc->event_list[i];
		cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
	}
	--cpuc->n_events;

	perf_event_update_userpage(event);
+17 −7
Original line number Diff line number Diff line
@@ -74,6 +74,7 @@ struct event_constraint {
#define PERF_X86_EVENT_EXCL		0x0040 /* HT exclusivity on counter */
#define PERF_X86_EVENT_DYNAMIC		0x0080 /* dynamic alloc'd constraint */
#define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */
#define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */


struct amd_nb {
@@ -134,8 +135,6 @@ enum intel_excl_state_type {
struct intel_excl_states {
	enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
	enum intel_excl_state_type state[X86_PMC_IDX_MAX];
	int  num_alloc_cntrs;/* #counters allocated */
	int  max_alloc_cntrs;/* max #counters allowed */
	bool sched_started; /* true if scheduling has started */
};

@@ -144,6 +143,11 @@ struct intel_excl_cntrs {

	struct intel_excl_states states[2];

	union {
		u16	has_exclusive[2];
		u32	exclusive_present;
	};

	int		refcnt;		/* per-core: #HT threads */
	unsigned	core_id;	/* per-core: core id */
};
@@ -172,7 +176,11 @@ struct cpu_hw_events {
					     added in the current transaction */
	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
	u64			tags[X86_PMC_IDX_MAX];

	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
	struct event_constraint	*event_constraint[X86_PMC_IDX_MAX];

	int			n_excl; /* the number of exclusive events */

	unsigned int		group_flag;
	int			is_fake;
@@ -519,9 +527,7 @@ struct x86_pmu {
	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
						 struct perf_event *event);

	void		(*commit_scheduling)(struct cpu_hw_events *cpuc,
					     struct perf_event *event,
					     int cntr);
	void		(*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);

	void		(*start_scheduling)(struct cpu_hw_events *cpuc);

@@ -717,8 +723,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,

void x86_pmu_enable_all(int added);

int perf_assign_events(struct perf_event **events, int n,
			int wmin, int wmax, int *assign);
int perf_assign_events(struct event_constraint **constraints, int n,
			int wmin, int wmax, int gpmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);

void x86_pmu_stop(struct perf_event *event, int flags);
@@ -929,4 +935,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
	return NULL;
}

static inline int is_ht_workaround_enabled(void)
{
	return 0;
}
#endif /* CONFIG_CPU_SUP_INTEL */
+14 −31
Original line number Diff line number Diff line
@@ -1923,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
	xl = &excl_cntrs->states[tid];

	xl->sched_started = true;
	xl->num_alloc_cntrs = 0;
	/*
	 * lock shared state until we are done scheduling
	 * in stop_event_scheduling()
@@ -2000,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
	 * across HT threads
	 */
	is_excl = c->flags & PERF_X86_EVENT_EXCL;
	if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
		event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
		if (!cpuc->n_excl++)
			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
	}

	/*
	 * xl = state of current HT
@@ -2008,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
	xl = &excl_cntrs->states[tid];
	xlo = &excl_cntrs->states[o_tid];

	/*
	 * do not allow scheduling of more than max_alloc_cntrs
	 * which is set to half the available generic counters.
	 * this helps avoid counter starvation of sibling thread
	 * by ensuring at most half the counters cannot be in
	 * exclusive mode. There is not designated counters for the
	 * limits. Any N/2 counters can be used. This helps with
	 * events with specifix counter constraints
	 */
	if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
		return &emptyconstraint;

	cx = c;

	/*
@@ -2106,7 +2098,7 @@ static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
			    struct perf_event *event)
{
	struct event_constraint *c1 = event->hw.constraint;
	struct event_constraint *c1 = cpuc->event_constraint[idx];
	struct event_constraint *c2;

	/*
@@ -2150,6 +2142,11 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,

	xl = &excl_cntrs->states[tid];
	xlo = &excl_cntrs->states[o_tid];
	if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
		hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
		if (!--cpuc->n_excl)
			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
	}

	/*
	 * put_constraint may be called from x86_schedule_events()
@@ -2188,8 +2185,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
					struct perf_event *event)
{
	struct event_constraint *c = event->hw.constraint;

	intel_put_shared_regs_event_constraints(cpuc, event);

	/*
@@ -2197,19 +2192,14 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
	 * all events are subject to and must call the
	 * put_excl_constraints() routine
	 */
	if (c && cpuc->excl_cntrs)
	if (cpuc->excl_cntrs)
		intel_put_excl_constraints(cpuc, event);

	/* cleanup dynamic constraint */
	if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
		event->hw.constraint = NULL;
}

static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
				    struct perf_event *event, int cntr)
static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
{
	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
	struct event_constraint *c = event->hw.constraint;
	struct event_constraint *c = cpuc->event_constraint[idx];
	struct intel_excl_states *xlo, *xl;
	int tid = cpuc->excl_thread_id;
	int o_tid = 1 - tid;
@@ -2639,8 +2629,6 @@ static void intel_pmu_cpu_starting(int cpu)
		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];

	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
		int h = x86_pmu.num_counters >> 1;

		for_each_cpu(i, topology_thread_cpumask(cpu)) {
			struct intel_excl_cntrs *c;

@@ -2654,11 +2642,6 @@ static void intel_pmu_cpu_starting(int cpu)
		}
		cpuc->excl_cntrs->core_id = core_id;
		cpuc->excl_cntrs->refcnt++;
		/*
		 * set hard limit to half the number of generic counters
		 */
		cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
		cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
	}
}

+2 −2
Original line number Diff line number Diff line
@@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_event *event)

	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);

	if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
	else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
		cpuc->pebs_enabled &= ~(1ULL << 63);

	if (cpuc->enabled)
+23 −13
Original line number Diff line number Diff line
@@ -151,7 +151,7 @@ static int __init pt_pmu_hw_init(void)

		de_attr->attr.attr.name = pt_caps[i].name;

		sysfs_attr_init(&de_attrs->attr.attr);
		sysfs_attr_init(&de_attr->attr.attr);

		de_attr->attr.attr.mode		= S_IRUGO;
		de_attr->attr.show		= pt_cap_show;
@@ -615,7 +615,8 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
				   struct perf_output_handle *handle)

{
	unsigned long idx, npages, end;
	unsigned long head = local64_read(&buf->head);
	unsigned long idx, npages, wakeup;

	if (buf->snapshot)
		return 0;
@@ -634,17 +635,26 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
	buf->topa_index[buf->stop_pos]->stop = 0;
	buf->topa_index[buf->intr_pos]->intr = 0;

	if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
		npages = (handle->size + 1) >> PAGE_SHIFT;
		end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages;
		/*if (end > handle->wakeup >> PAGE_SHIFT)
		  end = handle->wakeup >> PAGE_SHIFT;*/
		idx = end & (buf->nr_pages - 1);
	/* how many pages till the STOP marker */
	npages = handle->size >> PAGE_SHIFT;

	/* if it's on a page boundary, fill up one more page */
	if (!offset_in_page(head + handle->size + 1))
		npages++;

	idx = (head >> PAGE_SHIFT) + npages;
	idx &= buf->nr_pages - 1;
	buf->stop_pos = idx;
		idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1;

	wakeup = handle->wakeup >> PAGE_SHIFT;

	/* in the worst case, wake up the consumer one page before hard stop */
	idx = (head >> PAGE_SHIFT) + npages - 1;
	if (idx > wakeup)
		idx = wakeup;

	idx &= buf->nr_pages - 1;
	buf->intr_pos = idx;
	}

	buf->topa_index[buf->stop_pos]->stop = 1;
	buf->topa_index[buf->intr_pos]->intr = 1;
Loading