Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a042e261 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-fixes-for-linus' of...

Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
  perf python scripting: Add futex-contention script
  perf python scripting: Fixup cut'n'paste error in sctop script
  perf scripting: Shut up 'perf record' final status
  perf record: Remove newline character from perror() argument
  perf python scripting: Support fedora 11 (audit 1.7.17)
  perf python scripting: Improve the syscalls-by-pid script
  perf python scripting: print the syscall name on sctop
  perf python scripting: Improve the syscalls-counts script
  perf python scripting: Improve the failed-syscalls-by-pid script
  kprobes: Remove redundant text_mutex lock in optimize
  x86/oprofile: Fix uninitialized variable use in debug printk
  tracing: Fix 'faild' -> 'failed' typo
  perf probe: Fix format specified for Dwarf_Off parameter
  perf trace: Fix detection of script extension
  perf trace: Use $PERF_EXEC_PATH in canned report scripts
  perf tools: Document event modifiers
  perf tools: Remove direct slang.h include
  perf_events: Fix for transaction recovery in group_sched_in()
  perf_events: Revert: Fix transaction recovery in group_sched_in()
  perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations
  ...
parents f66dd539 e25804a0
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -121,6 +121,7 @@
#define MSR_AMD64_IBSDCLINAD		0xc0011038
#define MSR_AMD64_IBSDCPHYSAD		0xc0011039
#define MSR_AMD64_IBSCTL		0xc001103a
#define MSR_AMD64_IBSBRTARGET		0xc001103b

/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
+10 −9
Original line number Diff line number Diff line
@@ -122,6 +122,7 @@ union cpuid10_edx {
#define IBS_OP_VAL		(1ULL<<18)
#define IBS_OP_ENABLE		(1ULL<<17)
#define IBS_OP_MAX_CNT		0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT	0x007FFFFFULL	/* not a register bit mask */

#ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void);
+10 −11
Original line number Diff line number Diff line
@@ -237,6 +237,7 @@ struct x86_pmu {
	 * Intel DebugStore bits
	 */
	int		bts, pebs;
	int		bts_active, pebs_active;
	int		pebs_record_size;
	void		(*drain_pebs)(struct pt_regs *regs);
	struct event_constraint *pebs_constraints;
@@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {}

#endif

static int reserve_ds_buffers(void);
static void reserve_ds_buffers(void);
static void release_ds_buffers(void);

static void hw_perf_event_destroy(struct perf_event *event)
@@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
	    (hwc->sample_period == 1)) {
		/* BTS is not supported by this architecture. */
		if (!x86_pmu.bts)
		if (!x86_pmu.bts_active)
			return -EOPNOTSUPP;

		/* BTS is currently only allowed for user-mode. */
@@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
		int precise = 0;

		/* Support for constant skid */
		if (x86_pmu.pebs)
		if (x86_pmu.pebs_active) {
			precise++;

			/* Support for IP fixup */
			if (x86_pmu.lbr_nr)
				precise++;
		}

		if (event->attr.precise_ip > precise)
			return -EOPNOTSUPP;
@@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
		if (atomic_read(&active_events) == 0) {
			if (!reserve_pmc_hardware())
				err = -EBUSY;
			else {
				err = reserve_ds_buffers();
				if (err)
					release_pmc_hardware();
			}
			else
				reserve_ds_buffers();
		}
		if (!err)
			atomic_inc(&active_events);
+150 −66
Original line number Diff line number Diff line
@@ -74,58 +74,57 @@ static void fini_debug_store_on_cpu(int cpu)
	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}

static void release_ds_buffers(void)
static int alloc_pebs_buffer(int cpu)
{
	int cpu;

	if (!x86_pmu.bts && !x86_pmu.pebs)
		return;

	get_online_cpus();
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
	int node = cpu_to_node(cpu);
	int max, thresh = 1; /* always use a single PEBS record */
	void *buffer;

	for_each_online_cpu(cpu)
		fini_debug_store_on_cpu(cpu);
	if (!x86_pmu.pebs)
		return 0;

	for_each_possible_cpu(cpu) {
		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
	buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
	if (unlikely(!buffer))
		return -ENOMEM;

		if (!ds)
			continue;
	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;

		per_cpu(cpu_hw_events, cpu).ds = NULL;
	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
	ds->pebs_index = ds->pebs_buffer_base;
	ds->pebs_absolute_maximum = ds->pebs_buffer_base +
		max * x86_pmu.pebs_record_size;

		kfree((void *)(unsigned long)ds->pebs_buffer_base);
		kfree((void *)(unsigned long)ds->bts_buffer_base);
		kfree(ds);
	}
	ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
		thresh * x86_pmu.pebs_record_size;

	put_online_cpus();
	return 0;
}

static int reserve_ds_buffers(void)
static void release_pebs_buffer(int cpu)
{
	int cpu, err = 0;
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

	if (!x86_pmu.bts && !x86_pmu.pebs)
		return 0;
	if (!ds || !x86_pmu.pebs)
		return;

	get_online_cpus();
	kfree((void *)(unsigned long)ds->pebs_buffer_base);
	ds->pebs_buffer_base = 0;
}

	for_each_possible_cpu(cpu) {
		struct debug_store *ds;
		void *buffer;
static int alloc_bts_buffer(int cpu)
{
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
	int node = cpu_to_node(cpu);
	int max, thresh;
	void *buffer;

		err = -ENOMEM;
		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
		if (unlikely(!ds))
			break;
		per_cpu(cpu_hw_events, cpu).ds = ds;
	if (!x86_pmu.bts)
		return 0;

		if (x86_pmu.bts) {
			buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
	buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
	if (unlikely(!buffer))
				break;
		return -ENOMEM;

	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
	thresh = max / 16;
@@ -136,39 +135,125 @@ static int reserve_ds_buffers(void)
		max * BTS_RECORD_SIZE;
	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
		thresh * BTS_RECORD_SIZE;

	return 0;
}

		if (x86_pmu.pebs) {
			buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
			if (unlikely(!buffer))
				break;
static void release_bts_buffer(int cpu)
{
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

			max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
	if (!ds || !x86_pmu.bts)
		return;

			ds->pebs_buffer_base = (u64)(unsigned long)buffer;
			ds->pebs_index = ds->pebs_buffer_base;
			ds->pebs_absolute_maximum = ds->pebs_buffer_base +
				max * x86_pmu.pebs_record_size;
			/*
			 * Always use single record PEBS
			 */
			ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
				x86_pmu.pebs_record_size;
	kfree((void *)(unsigned long)ds->bts_buffer_base);
	ds->bts_buffer_base = 0;
}

		err = 0;
static int alloc_ds_buffer(int cpu)
{
	int node = cpu_to_node(cpu);
	struct debug_store *ds;

	ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
	if (unlikely(!ds))
		return -ENOMEM;

	per_cpu(cpu_hw_events, cpu).ds = ds;

	return 0;
}

	if (err)
		release_ds_buffers();
	else {
static void release_ds_buffer(int cpu)
{
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;

	if (!ds)
		return;

	per_cpu(cpu_hw_events, cpu).ds = NULL;
	kfree(ds);
}

static void release_ds_buffers(void)
{
	int cpu;

	if (!x86_pmu.bts && !x86_pmu.pebs)
		return;

	get_online_cpus();
	for_each_online_cpu(cpu)
		fini_debug_store_on_cpu(cpu);

	for_each_possible_cpu(cpu) {
		release_pebs_buffer(cpu);
		release_bts_buffer(cpu);
		release_ds_buffer(cpu);
	}
	put_online_cpus();
}

static void reserve_ds_buffers(void)
{
	int bts_err = 0, pebs_err = 0;
	int cpu;

	x86_pmu.bts_active = 0;
	x86_pmu.pebs_active = 0;

	if (!x86_pmu.bts && !x86_pmu.pebs)
		return;

	if (!x86_pmu.bts)
		bts_err = 1;

	if (!x86_pmu.pebs)
		pebs_err = 1;

	get_online_cpus();

	for_each_possible_cpu(cpu) {
		if (alloc_ds_buffer(cpu)) {
			bts_err = 1;
			pebs_err = 1;
		}

		if (!bts_err && alloc_bts_buffer(cpu))
			bts_err = 1;

		if (!pebs_err && alloc_pebs_buffer(cpu))
			pebs_err = 1;

		if (bts_err && pebs_err)
			break;
	}

	if (bts_err) {
		for_each_possible_cpu(cpu)
			release_bts_buffer(cpu);
	}

	if (pebs_err) {
		for_each_possible_cpu(cpu)
			release_pebs_buffer(cpu);
	}

	if (bts_err && pebs_err) {
		for_each_possible_cpu(cpu)
			release_ds_buffer(cpu);
	} else {
		if (x86_pmu.bts && !bts_err)
			x86_pmu.bts_active = 1;

		if (x86_pmu.pebs && !pebs_err)
			x86_pmu.pebs_active = 1;

		for_each_online_cpu(cpu)
			init_debug_store_on_cpu(cpu);
	}

	put_online_cpus();

	return err;
}

/*
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
	if (!event)
		return 0;

	if (!ds)
	if (!x86_pmu.bts_active)
		return 0;

	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
	struct pebs_record_core *at, *top;
	int n;

	if (!ds || !x86_pmu.pebs)
	if (!x86_pmu.pebs_active)
		return;

	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
	u64 status = 0;
	int bit, n;

	if (!ds || !x86_pmu.pebs)
	if (!x86_pmu.pebs_active)
		return;

	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@ static void intel_ds_init(void)

#else /* CONFIG_CPU_SUP_INTEL */

static int reserve_ds_buffers(void)
static void reserve_ds_buffers(void)
{
	return 0;
}

static void release_ds_buffers(void)
+6 −0
Original line number Diff line number Diff line
@@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)
		case 0x11:
			cpu_type = "x86-64/family11h";
			break;
		case 0x12:
			cpu_type = "x86-64/family12h";
			break;
		case 0x14:
			cpu_type = "x86-64/family14h";
			break;
		default:
			return -ENODEV;
		}
Loading