Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cc1adb5f authored by Anton Blanchard's avatar Anton Blanchard Committed by Benjamin Herrenschmidt
Browse files

powerpc/pseries: Use jump labels for hcall tracepoints



hcall tracepoints add quite a few instructions to our hcall path:

plpar_hcall:
	mr      r2,r2
	mfcr    r0
	stw     r0,8(r1)
	b       164		<---- start
	ld      r12,0(r2)
	std     r12,32(r1)
	cmpdi   r12,0
	beq     164		<---- end
...

We have an unconditional branch that gets noped out during boot and
a load/compare/branch. We also store the tracepoint value to the
stack for the hcall_exit path to use.

By using jump labels we can simplify this to just a single nop that
gets replaced with a branch when the tracepoint is enabled:

plpar_hcall:
	mr      r2,r2
	mfcr    r0
	stw     r0,8(r1)
	nop			<----
...

If jump labels are not enabled, we fall back to the old method.

Signed-off-by: default avatarAnton Blanchard <anton@samba.org>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 8fa5d454
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
 * 2 of the License, or (at your option) any later version.
 */

#ifndef __ASSEMBLY__
#include <linux/types.h>

#include <asm/feature-fixups.h>
@@ -42,4 +43,12 @@ struct jump_entry {
	jump_label_t key;
};

#else
#define ARCH_STATIC_BRANCH(LABEL, KEY)		\
1098:	nop;					\
	.pushsection __jump_table, "aw";	\
	FTR_ENTRY_LONG 1098b, LABEL, KEY;	\
	.popsection
#endif

#endif /* _ASM_POWERPC_JUMP_LABEL_H */
+110 −34
Original line number Diff line number Diff line
@@ -12,9 +12,13 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/jump_label.h>

	.section	".text"
	
#ifdef CONFIG_TRACEPOINTS

#ifndef CONFIG_JUMP_LABEL
	.section	".toc","aw"

	.globl hcall_tracepoint_refcount
@@ -22,21 +26,13 @@ hcall_tracepoint_refcount:
	.llong	0

	.section	".text"
#endif

/*
 * precall must preserve all registers.  use unused STK_PARAM()
 * areas to save snapshots and opcode. We branch around this
 * in early init (eg when populating the MMU hashtable) by using an
 * unconditional cpu feature.
 * areas to save snapshots and opcode.
 */
#define HCALL_INST_PRECALL(FIRST_REG)				\
BEGIN_FTR_SECTION;						\
	b	1f;						\
END_FTR_SECTION(0, 1);						\
	ld      r12,hcall_tracepoint_refcount@toc(r2);		\
	std	r12,32(r1);					\
	cmpdi	r12,0;						\
	beq+	1f;						\
	mflr	r0;						\
	std	r3,STK_PARAM(R3)(r1);				\
	std	r4,STK_PARAM(R4)(r1);				\
@@ -60,22 +56,13 @@ END_FTR_SECTION(0, 1); \
	ld	r8,STK_PARAM(R8)(r1);				\
	ld	r9,STK_PARAM(R9)(r1);				\
	ld	r10,STK_PARAM(R10)(r1);				\
	mtlr	r0;						\
1:
	mtlr	r0

/*
 * postcall is performed immediately before function return which
 * allows liberal use of volatile registers.  We branch around this
 * in early init (eg when populating the MMU hashtable) by using an
 * unconditional cpu feature.
 * allows liberal use of volatile registers.
 */
#define __HCALL_INST_POSTCALL					\
BEGIN_FTR_SECTION;						\
	b	1f;						\
END_FTR_SECTION(0, 1);						\
	ld      r12,32(r1);					\
	cmpdi	r12,0;						\
	beq+	1f;						\
	mflr	r0;						\
	ld	r6,STK_PARAM(R3)(r1);				\
	std	r3,STK_PARAM(R3)(r1);				\
@@ -87,8 +74,7 @@ END_FTR_SECTION(0, 1); \
	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
	ld	r0,16(r1);					\
	ld	r3,STK_PARAM(R3)(r1);				\
	mtlr	r0;						\
1:
	mtlr	r0

#define HCALL_INST_POSTCALL_NORETS				\
	li	r5,0;						\
@@ -98,37 +84,62 @@ END_FTR_SECTION(0, 1); \
	mr	r5,BUFREG;					\
	__HCALL_INST_POSTCALL

#ifdef CONFIG_JUMP_LABEL
#define HCALL_BRANCH(LABEL)					\
	ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key)
#else

/*
 * We branch around this in early init (eg when populating the MMU
 * hashtable) by using an unconditional cpu feature.
 */
#define HCALL_BRANCH(LABEL)					\
BEGIN_FTR_SECTION;						\
	b	1f;						\
END_FTR_SECTION(0, 1);						\
	ld	r12,hcall_tracepoint_refcount@toc(r2);		\
	std	r12,32(r1);					\
	cmpdi	r12,0;						\
	bne-	LABEL;						\
1:
#endif

#else
#define HCALL_INST_PRECALL(FIRST_ARG)
#define HCALL_INST_POSTCALL_NORETS
#define HCALL_INST_POSTCALL(BUFREG)
#define HCALL_BRANCH(LABEL)
#endif

	.text

_GLOBAL_TOC(plpar_hcall_norets)
	HMT_MEDIUM

	mfcr	r0
	stw	r0,8(r1)

	HCALL_INST_PRECALL(R4)

	HCALL_BRANCH(plpar_hcall_norets_trace)
	HVSC				/* invoke the hypervisor */

	HCALL_INST_POSTCALL_NORETS

	lwz	r0,8(r1)
	mtcrf	0xff,r0
	blr				/* return r3 = status */

#ifdef CONFIG_TRACEPOINTS
plpar_hcall_norets_trace:
	HCALL_INST_PRECALL(R4)
	HVSC
	HCALL_INST_POSTCALL_NORETS
	lwz	r0,8(r1)
	mtcrf	0xff,r0
	blr
#endif

_GLOBAL_TOC(plpar_hcall)
	HMT_MEDIUM

	mfcr	r0
	stw	r0,8(r1)

	HCALL_INST_PRECALL(R5)
	HCALL_BRANCH(plpar_hcall_trace)

	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */

@@ -147,12 +158,40 @@ _GLOBAL_TOC(plpar_hcall)
	std	r6, 16(r12)
	std	r7, 24(r12)

	lwz	r0,8(r1)
	mtcrf	0xff,r0

	blr				/* return r3 = status */

#ifdef CONFIG_TRACEPOINTS
plpar_hcall_trace:
	HCALL_INST_PRECALL(R5)

	std	r4,STK_PARAM(R4)(r1)
	mr	r0,r4

	mr	r4,r5
	mr	r5,r6
	mr	r6,r7
	mr	r7,r8
	mr	r8,r9
	mr	r9,r10

	HVSC

	ld	r12,STK_PARAM(R4)(r1)
	std	r4,0(r12)
	std	r5,8(r12)
	std	r6,16(r12)
	std	r7,24(r12)

	HCALL_INST_POSTCALL(r12)

	lwz	r0,8(r1)
	mtcrf	0xff,r0

	blr				/* return r3 = status */
	blr
#endif

/*
 * plpar_hcall_raw can be called in real mode. kexec/kdump need some
@@ -194,7 +233,7 @@ _GLOBAL_TOC(plpar_hcall9)
	mfcr	r0
	stw	r0,8(r1)

	HCALL_INST_PRECALL(R5)
	HCALL_BRANCH(plpar_hcall9_trace)

	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */

@@ -222,12 +261,49 @@ _GLOBAL_TOC(plpar_hcall9)
	std	r11,56(r12)
	std	r0, 64(r12)

	lwz	r0,8(r1)
	mtcrf	0xff,r0

	blr				/* return r3 = status */

#ifdef CONFIG_TRACEPOINTS
plpar_hcall9_trace:
	HCALL_INST_PRECALL(R5)

	std	r4,STK_PARAM(R4)(r1)
	mr	r0,r4

	mr	r4,r5
	mr	r5,r6
	mr	r6,r7
	mr	r7,r8
	mr	r8,r9
	mr	r9,r10
	ld	r10,STK_PARAM(R11)(r1)
	ld	r11,STK_PARAM(R12)(r1)
	ld	r12,STK_PARAM(R13)(r1)

	HVSC

	mr	r0,r12
	ld	r12,STK_PARAM(R4)(r1)
	std	r4,0(r12)
	std	r5,8(r12)
	std	r6,16(r12)
	std	r7,24(r12)
	std	r8,32(r12)
	std	r9,40(r12)
	std	r10,48(r12)
	std	r11,56(r12)
	std	r0,64(r12)

	HCALL_INST_POSTCALL(r12)

	lwz	r0,8(r1)
	mtcrf	0xff,r0

	blr				/* return r3 = status */
	blr
#endif

/* See plpar_hcall_raw to see why this is needed */
_GLOBAL(plpar_hcall9_raw)
+23 −7
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/dma-mapping.h>
#include <linux/console.h>
#include <linux/export.h>
#include <linux/static_key.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
@@ -649,6 +650,19 @@ EXPORT_SYMBOL(arch_free_page);
#endif

#ifdef CONFIG_TRACEPOINTS
#ifdef CONFIG_JUMP_LABEL
struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;

void hcall_tracepoint_regfunc(void)
{
	static_key_slow_inc(&hcall_tracepoint_key);
}

void hcall_tracepoint_unregfunc(void)
{
	static_key_slow_dec(&hcall_tracepoint_key);
}
#else
/*
 * We optimise our hcall path by placing hcall_tracepoint_refcount
 * directly in the TOC so we can check if the hcall tracepoints are
@@ -658,13 +672,6 @@ EXPORT_SYMBOL(arch_free_page);
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
extern long hcall_tracepoint_refcount;

/* 
 * Since the tracing code might execute hcalls we need to guard against
 * recursion. One example of this are spinlocks calling H_YIELD on
 * shared processor partitions.
 */
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);

void hcall_tracepoint_regfunc(void)
{
	hcall_tracepoint_refcount++;
@@ -674,6 +681,15 @@ void hcall_tracepoint_unregfunc(void)
{
	hcall_tracepoint_refcount--;
}
#endif

/*
 * Since the tracing code might execute hcalls we need to guard against
 * recursion. One example of this are spinlocks calling H_YIELD on
 * shared processor partitions.
 */
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);


void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
{