Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c8cd093a authored by Anton Blanchard's avatar Anton Blanchard Committed by Paul Mackerras
Browse files

powerpc: tracing: Add hypervisor call tracepoints



Add hcall_entry and hcall_exit tracepoints.  This replaces the inline
assembly HCALL_STATS code and converts it to use the new tracepoints.

To keep the disabled case as quick as possible, we embed a status word
in the TOC so we can get at it with a single load.  By doing so we
keep the overhead at a minimum.  Time taken for a null hcall:

No tracepoint code:	135.79 cycles
Disabled tracepoints:	137.95 cycles

For reference, before this patch enabling HCALL_STATS resulted in a null
hcall of 201.44 cycles!

Signed-off-by: default avatarAnton Blanchard <anton@samba.org>
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent 6795b85c
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE


config HCALL_STATS
config HCALL_STATS
	bool "Hypervisor call instrumentation"
	bool "Hypervisor call instrumentation"
	depends on PPC_PSERIES && DEBUG_FS
	depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
	help
	help
	  Adds code to keep track of the number of hypervisor calls made and
	  Adds code to keep track of the number of hypervisor calls made and
	  the amount of time spent in hypervisor calls.  Wall time spent in
	  the amount of time spent in hypervisor calls.  Wall time spent in
+2 −0
Original line number Original line Diff line number Diff line
@@ -274,6 +274,8 @@ struct hcall_stats {
	unsigned long	num_calls;	/* number of calls (on this CPU) */
	unsigned long	num_calls;	/* number of calls (on this CPU) */
	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
	unsigned long	tb_start;
	unsigned long	purr_start;
};
};
#define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)
#define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)


+45 −0
Original line number Original line Diff line number Diff line
@@ -76,6 +76,51 @@ TRACE_EVENT(timer_interrupt_exit,
	TP_printk("pt_regs=%p", __entry->regs)
	TP_printk("pt_regs=%p", __entry->regs)
);
);


#ifdef CONFIG_PPC_PSERIES
extern void hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void);

TRACE_EVENT_FN(hcall_entry,

	TP_PROTO(unsigned long opcode),

	TP_ARGS(opcode),

	TP_STRUCT__entry(
		__field(unsigned long, opcode)
	),

	TP_fast_assign(
		__entry->opcode = opcode;
	),

	TP_printk("opcode=%lu", __entry->opcode),

	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);

TRACE_EVENT_FN(hcall_exit,

	TP_PROTO(unsigned long opcode, unsigned long retval),

	TP_ARGS(opcode, retval),

	TP_STRUCT__entry(
		__field(unsigned long, opcode)
		__field(unsigned long, retval)
	),

	TP_fast_assign(
		__entry->opcode = opcode;
		__entry->retval = retval;
	),

	TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),

	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
#endif

#endif /* _TRACE_POWERPC_H */
#endif /* _TRACE_POWERPC_H */


#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_PATH
+58 −43
Original line number Original line Diff line number Diff line
@@ -14,19 +14,53 @@
	
	
#define STK_PARM(i)     (48 + ((i)-3)*8)
#define STK_PARM(i)     (48 + ((i)-3)*8)


#ifdef CONFIG_HCALL_STATS
#ifdef CONFIG_TRACEPOINTS

	.section	".toc","aw"

	.globl hcall_tracepoint_refcount
hcall_tracepoint_refcount:
	.llong	0

	.section	".text"

/*
/*
 * precall must preserve all registers.  use unused STK_PARM()
 * precall must preserve all registers.  use unused STK_PARM()
 * areas to save snapshots and opcode.
 * areas to save snapshots and opcode. We branch around this
 * in early init (eg when populating the MMU hashtable) by using an
 * unconditional cpu feature.
 */
 */
#define HCALL_INST_PRECALL					\
#define HCALL_INST_PRECALL					\
	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
	mftb	r0;			/* get timebase and */	\
	std     r0,STK_PARM(r5)(r1);	/* save for later */	\
BEGIN_FTR_SECTION;						\
BEGIN_FTR_SECTION;						\
	mfspr	r0,SPRN_PURR;		/* get PURR and */	\
	b	1f;						\
	std	r0,STK_PARM(r6)(r1);	/* save for later */	\
END_FTR_SECTION(0, 1);						\
END_FTR_SECTION_IFSET(CPU_FTR_PURR);
	ld      r12,hcall_tracepoint_refcount@toc(r2);		\
	cmpdi	r12,0;						\
	beq+	1f;						\
	mflr	r0;						\
	std	r3,STK_PARM(r3)(r1);				\
	std	r4,STK_PARM(r4)(r1);				\
	std	r5,STK_PARM(r5)(r1);				\
	std	r6,STK_PARM(r6)(r1);				\
	std	r7,STK_PARM(r7)(r1);				\
	std	r8,STK_PARM(r8)(r1);				\
	std	r9,STK_PARM(r9)(r1);				\
	std	r10,STK_PARM(r10)(r1);				\
	std	r0,16(r1);					\
	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
	bl	.__trace_hcall_entry;				\
	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
	ld	r0,16(r1);					\
	ld	r3,STK_PARM(r3)(r1);				\
	ld	r4,STK_PARM(r4)(r1);				\
	ld	r5,STK_PARM(r5)(r1);				\
	ld	r6,STK_PARM(r6)(r1);				\
	ld	r7,STK_PARM(r7)(r1);				\
	ld	r8,STK_PARM(r8)(r1);				\
	ld	r9,STK_PARM(r9)(r1);				\
	ld	r10,STK_PARM(r10)(r1);				\
	mtlr	r0;						\
1:


/*
/*
 * postcall is performed immediately before function return which
 * postcall is performed immediately before function return which
@@ -38,40 +72,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR);
BEGIN_FTR_SECTION;						\
BEGIN_FTR_SECTION;						\
	b	1f;						\
	b	1f;						\
END_FTR_SECTION(0, 1);						\
END_FTR_SECTION(0, 1);						\
	ld	r4,STK_PARM(r3)(r1);	/* validate opcode */	\
	ld      r12,hcall_tracepoint_refcount@toc(r2);		\
	cmpldi	cr7,r4,MAX_HCALL_OPCODE;			\
	cmpdi	r12,0;						\
	bgt-	cr7,1f;						\
	beq+	1f;						\
								\
	mflr	r0;						\
	/* get time and PURR snapshots after hcall */		\
	ld	r6,STK_PARM(r3)(r1);				\
	mftb	r7;			/* timebase after */	\
	std	r3,STK_PARM(r3)(r1);				\
BEGIN_FTR_SECTION;						\
	mr	r4,r3;						\
	mfspr	r8,SPRN_PURR;		/* PURR after */	\
	mr	r3,r6;						\
	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
	std	r0,16(r1);					\
	subf	r6,r6,r8;		/* delta */		\
	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
	bl	.__trace_hcall_exit;				\
	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
	subf	r5,r5,r7;		/* time delta */	\
	ld	r0,16(r1);					\
								\
	ld	r3,STK_PARM(r3)(r1);				\
	/* calculate address of stat structure r4 = opcode */	\
	mtlr	r0;						\
	srdi	r4,r4,2;		/* index into array */	\
	mulli	r4,r4,HCALL_STAT_SIZE;				\
	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
	add	r4,r4,r7;					\
	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
	add	r4,r4,r7;					\
								\
	/* update stats	*/					\
	ld	r7,HCALL_STAT_CALLS(r4); /* count */		\
	addi	r7,r7,1;					\
	std	r7,HCALL_STAT_CALLS(r4);			\
	ld      r7,HCALL_STAT_TB(r4);	/* timebase */		\
	add	r7,r7,r5;					\
	std	r7,HCALL_STAT_TB(r4);				\
BEGIN_FTR_SECTION;						\
	ld	r7,HCALL_STAT_PURR(r4);	/* PURR */		\
	add	r7,r7,r6;					\
	std	r7,HCALL_STAT_PURR(r4);				\
END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
1:
1:
#else
#else
#define HCALL_INST_PRECALL
#define HCALL_INST_PRECALL
+37 −0
Original line number Original line Diff line number Diff line
@@ -26,6 +26,7 @@
#include <asm/hvcall.h>
#include <asm/hvcall.h>
#include <asm/firmware.h>
#include <asm/firmware.h>
#include <asm/cputable.h>
#include <asm/cputable.h>
#include <asm/trace.h>


DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);


@@ -100,6 +101,34 @@ static const struct file_operations hcall_inst_seq_fops = {
#define	HCALL_ROOT_DIR		"hcall_inst"
#define	HCALL_ROOT_DIR		"hcall_inst"
#define CPU_NAME_BUF_SIZE	32
#define CPU_NAME_BUF_SIZE	32



static void probe_hcall_entry(unsigned long opcode)
{
	struct hcall_stats *h;

	if (opcode > MAX_HCALL_OPCODE)
		return;

	h = &get_cpu_var(hcall_stats)[opcode / 4];
	h->tb_start = mftb();
	h->purr_start = mfspr(SPRN_PURR);
}

static void probe_hcall_exit(unsigned long opcode, unsigned long retval)
{
	struct hcall_stats *h;

	if (opcode > MAX_HCALL_OPCODE)
		return;

	h = &__get_cpu_var(hcall_stats)[opcode / 4];
	h->num_calls++;
	h->tb_total = mftb() - h->tb_start;
	h->purr_total = mfspr(SPRN_PURR) - h->purr_start;

	put_cpu_var(hcall_stats);
}

static int __init hcall_inst_init(void)
static int __init hcall_inst_init(void)
{
{
	struct dentry *hcall_root;
	struct dentry *hcall_root;
@@ -110,6 +139,14 @@ static int __init hcall_inst_init(void)
	if (!firmware_has_feature(FW_FEATURE_LPAR))
	if (!firmware_has_feature(FW_FEATURE_LPAR))
		return 0;
		return 0;


	if (register_trace_hcall_entry(probe_hcall_entry))
		return -EINVAL;

	if (register_trace_hcall_exit(probe_hcall_exit)) {
		unregister_trace_hcall_entry(probe_hcall_entry);
		return -EINVAL;
	}

	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
	if (!hcall_root)
	if (!hcall_root)
		return -ENOMEM;
		return -ENOMEM;
Loading