Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eee3af4a authored by Markus Metzger's avatar Markus Metzger Committed by Ingo Molnar
Browse files

x86, ptrace: support for branch trace store(BTS)



Resend using different mail client

Changes to the last version:
- split implementation into two layers: ds/bts and ptrace
- renamed TIF's
- save/restore ds save area msr in __switch_to_xtra()
- make block-stepping only look at BTF bit

Signed-off-by: default avatarMarkus Metzger <markus.t.metzger@intel.com>
Signed-off-by: default avatarSuresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 7796931f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
		quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o

obj-y				+= ptrace.o
obj-y				+= ds.o
obj-y				+= tls.o
obj-y				+= step.o
obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+1 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
		i8253.o io_delay.o rtc.o

obj-y				+= ptrace.o
obj-y				+= ds.o
obj-y				+= step.o

obj-$(CONFIG_IA32_EMULATION)	+= tls.o
+5 −0
Original line number Diff line number Diff line
@@ -11,6 +11,8 @@
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/ds.h>

#include "cpu.h"

@@ -219,6 +221,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
		if (!(l1 & (1<<12)))
			set_bit(X86_FEATURE_PEBS, c->x86_capability);
	}

	if (cpu_has_bts)
		ds_init_intel(c);
}

static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)

arch/x86/kernel/ds.c

0 → 100644
+429 −0
Original line number Diff line number Diff line
/*
 * Debug Store support
 *
 * This provides a low-level interface to the hardware's Debug Store
 * feature that is used for last branch recording (LBR) and
 * precise-event based sampling (PEBS).
 *
 * Different architectures use a different DS layout/pointer size.
 * The below functions therefore work on a void*.
 *
 *
 * Since there is no user for PEBS, yet, only LBR (or branch
 * trace store, BTS) is supported.
 *
 *
 * Copyright (C) 2007 Intel Corporation.
 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
 */

#include <asm/ds.h>

#include <linux/errno.h>
#include <linux/string.h>
#include <linux/slab.h>


/*
 * Debug Store (DS) save area configuration (see Intel64 and IA32
 * Architectures Software Developer's Manual, section 18.5)
 *
 * The DS configuration consists of the following fields; different
 * architetures vary in the size of those fields.
 * - double-word aligned base linear address of the BTS buffer
 * - write pointer into the BTS buffer
 * - end linear address of the BTS buffer (one byte beyond the end of
 *   the buffer)
 * - interrupt pointer into BTS buffer
 *   (interrupt occurs when write pointer passes interrupt pointer)
 * - double-word aligned base linear address of the PEBS buffer
 * - write pointer into the PEBS buffer
 * - end linear address of the PEBS buffer (one byte beyond the end of
 *   the buffer)
 * - interrupt pointer into PEBS buffer
 *   (interrupt occurs when write pointer passes interrupt pointer)
 * - value to which counter is reset following counter overflow
 *
 * On later architectures, the last branch recording hardware uses
 * 64bit pointers even in 32bit mode.
 *
 *
 * Branch Trace Store (BTS) records store information about control
 * flow changes. They at least provide the following information:
 * - source linear address
 * - destination linear address
 *
 * Netburst supported a predicated bit that had been dropped in later
 * architectures. We do not suppor it.
 *
 *
 * In order to abstract from the actual DS and BTS layout, we describe
 * the access to the relevant fields.
 * Thanks to Andi Kleen for proposing this design.
 *
 * The implementation, however, is not as general as it might seem. In
 * order to stay somewhat simple and efficient, we assume an
 * underlying unsigned type (mostly a pointer type) and we expect the
 * field to be at least as big as that type.
 */

/*
 * A special from_ip address to indicate that the BTS record is an
 * info record that needs to be interpreted or skipped.
 */
#define BTS_ESCAPE_ADDRESS (-1)

/*
 * A field access descriptor
 */
struct access_desc {
	unsigned char offset;
	unsigned char size;
};

/*
 * The configuration for a particular DS/BTS hardware implementation.
 */
struct ds_configuration {
	/* the DS configuration */
	unsigned char  sizeof_ds;
	struct access_desc bts_buffer_base;
	struct access_desc bts_index;
	struct access_desc bts_absolute_maximum;
	struct access_desc bts_interrupt_threshold;
	/* the BTS configuration */
	unsigned char  sizeof_bts;
	struct access_desc from_ip;
	struct access_desc to_ip;
	/* BTS variants used to store additional information like
	   timestamps */
	struct access_desc info_type;
	struct access_desc info_data;
	unsigned long debugctl_mask;
};

/*
 * The global configuration used by the below accessor functions
 */
static struct ds_configuration ds_cfg;

/*
 * Accessor functions for some DS and BTS fields using the above
 * global ptrace_bts_cfg.
 */
static inline void *get_bts_buffer_base(char *base)
{
	return *(void **)(base + ds_cfg.bts_buffer_base.offset);
}
static inline void set_bts_buffer_base(char *base, void *value)
{
	(*(void **)(base + ds_cfg.bts_buffer_base.offset)) = value;
}
static inline void *get_bts_index(char *base)
{
	return *(void **)(base + ds_cfg.bts_index.offset);
}
static inline void set_bts_index(char *base, void *value)
{
	(*(void **)(base + ds_cfg.bts_index.offset)) = value;
}
static inline void *get_bts_absolute_maximum(char *base)
{
	return *(void **)(base + ds_cfg.bts_absolute_maximum.offset);
}
static inline void set_bts_absolute_maximum(char *base, void *value)
{
	(*(void **)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
}
static inline void *get_bts_interrupt_threshold(char *base)
{
	return *(void **)(base + ds_cfg.bts_interrupt_threshold.offset);
}
static inline void set_bts_interrupt_threshold(char *base, void *value)
{
	(*(void **)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
}
static inline long get_from_ip(char *base)
{
	return *(long *)(base + ds_cfg.from_ip.offset);
}
static inline void set_from_ip(char *base, long value)
{
	(*(long *)(base + ds_cfg.from_ip.offset)) = value;
}
static inline long get_to_ip(char *base)
{
	return *(long *)(base + ds_cfg.to_ip.offset);
}
static inline void set_to_ip(char *base, long value)
{
	(*(long *)(base + ds_cfg.to_ip.offset)) = value;
}
static inline unsigned char get_info_type(char *base)
{
	return *(unsigned char *)(base + ds_cfg.info_type.offset);
}
static inline void set_info_type(char *base, unsigned char value)
{
	(*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
}
/*
 * The info data might overlap with the info type on some architectures.
 * We therefore read and write the exact number of bytes.
 */
static inline unsigned long long get_info_data(char *base)
{
	unsigned long long value = 0;
	memcpy(&value,
	       base + ds_cfg.info_data.offset,
	       ds_cfg.info_data.size);
	return value;
}
static inline void set_info_data(char *base, unsigned long long value)
{
	memcpy(base + ds_cfg.info_data.offset,
	       &value,
	       ds_cfg.info_data.size);
}


int ds_allocate(void **dsp, size_t bts_size_in_records)
{
	size_t bts_size_in_bytes = 0;
	void *bts = 0;
	void *ds = 0;

	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
		return -EOPNOTSUPP;

	if (bts_size_in_records < 0)
		return -EINVAL;

	bts_size_in_bytes =
		bts_size_in_records * ds_cfg.sizeof_bts;

	if (bts_size_in_bytes <= 0)
		return -EINVAL;

	bts = kzalloc(bts_size_in_bytes, GFP_KERNEL);

	if (!bts)
		return -ENOMEM;

	ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);

	if (!ds) {
		kfree(bts);
		return -ENOMEM;
	}

	set_bts_buffer_base(ds, bts);
	set_bts_index(ds, bts);
	set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
	set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);

	*dsp = ds;
	return 0;
}

int ds_free(void **dsp)
{
	if (*dsp)
		kfree(get_bts_buffer_base(*dsp));
	kfree(*dsp);
	*dsp = 0;

	return 0;
}

int ds_get_bts_size(void *ds)
{
	size_t size_in_bytes;

	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
		return -EOPNOTSUPP;

	size_in_bytes =
		get_bts_absolute_maximum(ds) -
		get_bts_buffer_base(ds);

	return size_in_bytes / ds_cfg.sizeof_bts;
}

int ds_get_bts_index(void *ds)
{
	size_t index_offset_in_bytes;

	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
		return -EOPNOTSUPP;

	index_offset_in_bytes =
		get_bts_index(ds) -
		get_bts_buffer_base(ds);

	return index_offset_in_bytes / ds_cfg.sizeof_bts;
}

int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
{
	void *bts;

	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
		return -EOPNOTSUPP;

	if (index < 0)
		return -EINVAL;

	if (index >= ds_get_bts_size(ds))
		return -EINVAL;

	bts = get_bts_buffer_base(ds);
	bts = (char *)bts + (index * ds_cfg.sizeof_bts);

	memset(out, 0, sizeof(*out));
	if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
		out->qualifier         = get_info_type(bts);
		out->variant.timestamp = get_info_data(bts);
	} else {
		out->qualifier = BTS_BRANCH;
		out->variant.lbr.from_ip = get_from_ip(bts);
		out->variant.lbr.to_ip   = get_to_ip(bts);
	}

	return 0;
}

int ds_write_bts(void *ds, const struct bts_struct *in)
{
	void *bts;

	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
		return -EOPNOTSUPP;

	if (ds_get_bts_size(ds) <= 0)
		return -ENXIO;

	bts = get_bts_index(ds);

	memset(bts, 0, ds_cfg.sizeof_bts);
	switch (in->qualifier) {
	case BTS_INVALID:
		break;

	case BTS_BRANCH:
		set_from_ip(bts, in->variant.lbr.from_ip);
		set_to_ip(bts, in->variant.lbr.to_ip);
		break;

	case BTS_TASK_ARRIVES:
	case BTS_TASK_DEPARTS:
		set_from_ip(bts, BTS_ESCAPE_ADDRESS);
		set_info_type(bts, in->qualifier);
		set_info_data(bts, in->variant.timestamp);
		break;

	default:
		return -EINVAL;
	}

	bts = (char *)bts + ds_cfg.sizeof_bts;
	if (bts >= get_bts_absolute_maximum(ds))
		bts = get_bts_buffer_base(ds);
	set_bts_index(ds, bts);

	return 0;
}

unsigned long ds_debugctl_mask(void)
{
	return ds_cfg.debugctl_mask;
}

#ifdef __i386__
static const struct ds_configuration ds_cfg_netburst = {
	.sizeof_ds = 9 * 4,
	.bts_buffer_base = { 0, 4 },
	.bts_index = { 4, 4 },
	.bts_absolute_maximum = { 8, 4 },
	.bts_interrupt_threshold = { 12, 4 },
	.sizeof_bts = 3 * 4,
	.from_ip = { 0, 4 },
	.to_ip = { 4, 4 },
	.info_type = { 4, 1 },
	.info_data = { 5, 7 },
	.debugctl_mask = (1<<2)|(1<<3)
};

static const struct ds_configuration ds_cfg_pentium_m = {
	.sizeof_ds = 9 * 4,
	.bts_buffer_base = { 0, 4 },
	.bts_index = { 4, 4 },
	.bts_absolute_maximum = { 8, 4 },
	.bts_interrupt_threshold = { 12, 4 },
	.sizeof_bts = 3 * 4,
	.from_ip = { 0, 4 },
	.to_ip = { 4, 4 },
	.info_type = { 4, 1 },
	.info_data = { 5, 7 },
	.debugctl_mask = (1<<6)|(1<<7)
};
#endif /* _i386_ */

static const struct ds_configuration ds_cfg_core2 = {
	.sizeof_ds = 9 * 8,
	.bts_buffer_base = { 0, 8 },
	.bts_index = { 8, 8 },
	.bts_absolute_maximum = { 16, 8 },
	.bts_interrupt_threshold = { 24, 8 },
	.sizeof_bts = 3 * 8,
	.from_ip = { 0, 8 },
	.to_ip = { 8, 8 },
	.info_type = { 8, 1 },
	.info_data = { 9, 7 },
	.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
};

static inline void
ds_configure(const struct ds_configuration *cfg)
{
	ds_cfg = *cfg;
}

void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
{
	switch (c->x86) {
	case 0x6:
		switch (c->x86_model) {
#ifdef __i386__
		case 0xD:
		case 0xE: /* Pentium M */
			ds_configure(&ds_cfg_pentium_m);
			break;
#endif /* _i386_ */
		case 0xF: /* Core2 */
			ds_configure(&ds_cfg_core2);
			break;
		default:
			/* sorry, don't know about them */
			break;
		}
		break;
	case 0xF:
		switch (c->x86_model) {
#ifdef __i386__
		case 0x0:
		case 0x1:
		case 0x2: /* Netburst */
			ds_configure(&ds_cfg_netburst);
			break;
#endif /* _i386_ */
		default:
			/* sorry, don't know about them */
			break;
		}
		break;
	default:
		/* sorry, don't know about them */
		break;
	}
}
+18 −1
Original line number Diff line number Diff line
@@ -614,11 +614,21 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
		 struct tss_struct *tss)
{
	struct thread_struct *prev, *next;
	unsigned long debugctl;

	prev = &prev_p->thread;
	next = &next_p->thread;

	if (next->debugctlmsr != prev->debugctlmsr)
	debugctl = prev->debugctlmsr;
	if (next->ds_area_msr != prev->ds_area_msr) {
		/* we clear debugctl to make sure DS
		 * is not in use when we change it */
		debugctl = 0;
		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
		wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
	}

	if (next->debugctlmsr != debugctl)
		wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);

	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -642,6 +652,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
	}
#endif

	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);

	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);


	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
		/*
		 * Disable the bitmap via an invalid offset. We still cache
Loading