Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d99015b1 authored by Alexander van Heukelum's avatar Alexander van Heukelum Committed by Ingo Molnar
Browse files

x86: move entry_64.S register saving out of the macros



Here is a combined patch that moves "save_args" out-of-line for
the interrupt macro and moves "error_entry" mostly out-of-line
for the zeroentry and errorentry macros.

The save_args function becomes really straightforward and easy
to understand, with the possible exception of the stack switch
code, which now needs to copy the return address of to the
calling function. Normal interrupts arrive with ((~vector)-0x80)
on the stack, which gets adjusted in common_interrupt:

<common_interrupt>:
(5)  addq   $0xffffffffffffff80,(%rsp)		/* -> ~(vector) */
(4)  sub    $0x50,%rsp				/* space for registers */
(5)  callq  ffffffff80211290 <save_args>
(5)  callq  ffffffff80214290 <do_IRQ>
<ret_from_intr>:
     ...

An apic interrupt stub now look like this:

<thermal_interrupt>:
(5)  pushq  $0xffffffffffffff05			/* ~(vector) */
(4)  sub    $0x50,%rsp				/* space for registers */
(5)  callq  ffffffff80211290 <save_args>
(5)  callq  ffffffff80212b8f <smp_thermal_interrupt>
(5)  jmpq   ffffffff80211f93 <ret_from_intr>

Similarly the exception handler register saving function becomes
simpler, without the need of any parameter shuffling. The stub
for an exception without errorcode looks like this:

<overflow>:
(6)  callq  *0x1cad12(%rip)        # ffffffff803dd448 <pv_irq_ops+0x38>
(2)  pushq  $0xffffffffffffffff			/* no syscall */
(4)  sub    $0x78,%rsp				/* space for registers */
(5)  callq  ffffffff8030e3b0 <error_entry>
(3)  mov    %rsp,%rdi				/* pt_regs pointer */
(2)  xor    %esi,%esi				/* no error code */
(5)  callq  ffffffff80213446 <do_overflow>
(5)  jmpq   ffffffff8030e460 <error_exit>

And one for an exception with errorcode like this:

<segment_not_present>:
(6)  callq  *0x1cab92(%rip)        # ffffffff803dd448 <pv_irq_ops+0x38>
(4)  sub    $0x78,%rsp				/* space for registers */
(5)  callq  ffffffff8030e3b0 <error_entry>
(3)  mov    %rsp,%rdi				/* pt_regs pointer */
(5)  mov    0x78(%rsp),%rsi			/* load error code */
(9)  movq   $0xffffffffffffffff,0x78(%rsp)	/* no syscall */
(5)  callq  ffffffff80213209 <do_segment_not_present>
(5)  jmpq   ffffffff8030e460 <error_exit>

Unfortunately, this last type is more than 32 bytes. But the total space
savings due to this patch is about 2500 bytes on an smp-configuration,
and I think the code is clearer than it was before. The tested kernels
were non-paravirt ones (i.e., without the indirect call at the top of
the exception handlers).

Anyhow, I tested this patch on top of a recent -tip. The machine
was an 2x4-core Xeon at 2333MHz. Measured where the delays between
(almost-)adjacent rdtsc instructions. The graphs show how much
time is spent outside of the program as a function of the measured
delay. The area under the graph represents the total time spent
outside the program. Eight instances of the rdtsctest were
started, each pinned to a single cpu. The histogams are added.
For each kernel two measurements were done: one in mostly idle
condition, the other while running "bonnie++ -f", bound to cpu 0.
Each measurement took 40 minutes runtime. See the attached graphs
for the results. The graphs overlap almost everywhere, but there
are small differences.

Signed-off-by: default avatarAlexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent c032a2de
Loading
Loading
Loading
Loading
+166 −134
Original line number Diff line number Diff line
@@ -242,6 +242,78 @@ ENTRY(native_usergs_sysret64)
	CFI_REL_OFFSET	rsp,RSP
	/*CFI_REL_OFFSET	ss,SS*/
	.endm

/*
 * initial frame state for interrupts and exceptions
 */
	.macro _frame ref
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA rsp,SS+8-\ref
	/*CFI_REL_OFFSET ss,SS-\ref*/
	CFI_REL_OFFSET rsp,RSP-\ref
	/*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
	/*CFI_REL_OFFSET cs,CS-\ref*/
	CFI_REL_OFFSET rip,RIP-\ref
	.endm

/*
 * initial frame state for interrupts (and exceptions without error code)
 */
#define INTR_FRAME _frame RIP
/*
 * initial frame state for exceptions with error code (and interrupts
 * with vector already pushed)
 */
#define XCPT_FRAME _frame ORIG_RAX

/* save partial stack frame */
ENTRY(save_args)
	XCPT_FRAME
	cld
	movq  %rdi, 8*8+16(%rsp)
	CFI_REL_OFFSET rdi, 8*8+16
	movq  %rsi, 7*8+16(%rsp)
	CFI_REL_OFFSET rsi, 7*8+16
	movq  %rdx, 6*8+16(%rsp)
	CFI_REL_OFFSET rdx, 6*8+16
	movq  %rcx, 5*8+16(%rsp)
	CFI_REL_OFFSET rcx, 5*8+16
	movq  %rax, 4*8+16(%rsp)
	CFI_REL_OFFSET rax, 4*8+16
	movq  %r8, 3*8+16(%rsp)
	CFI_REL_OFFSET r8, 3*8+16
	movq  %r9, 2*8+16(%rsp)
	CFI_REL_OFFSET r9, 2*8+16
	movq  %r10, 1*8+16(%rsp)
	CFI_REL_OFFSET r10, 1*8+16
	movq  %r11, 0*8+16(%rsp)
	CFI_REL_OFFSET r11, 0*8+16
	leaq -ARGOFFSET+16(%rsp),%rdi	/* arg1 for handler */
	movq %rbp, 8(%rsp)		/* push %rbp */
	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */
	testl $3, CS(%rdi)
	je 1f
	SWAPGS
	/*
	 * irqcount is used to check if a CPU is already on an interrupt stack
	 * or not. While this is essentially redundant with preempt_count it is
	 * a little cheaper to use a separate counter in the PDA (short of
	 * moving irq_enter into assembly, which would be too much work)
	 */
1:	incl %gs:pda_irqcount
	jne 2f
	pop %rax			/* move return address... */
	mov %gs:pda_irqstackptr,%rsp
	push %rax			/* ... to the new stack */
	/*
	 * We entered an interrupt context - irqs are off:
	 */
2:	TRACE_IRQS_OFF
	ret
	CFI_ENDPROC
END(save_args)

/*
 * A newly forked process directly context switches into this.
 */
@@ -607,26 +679,6 @@ ENTRY(stub_rt_sigreturn)
	CFI_ENDPROC
END(stub_rt_sigreturn)

/*
 * initial frame state for interrupts and exceptions
 */
	.macro _frame ref
	CFI_STARTPROC simple
	CFI_SIGNAL_FRAME
	CFI_DEF_CFA rsp,SS+8-\ref
	/*CFI_REL_OFFSET ss,SS-\ref*/
	CFI_REL_OFFSET rsp,RSP-\ref
	/*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
	/*CFI_REL_OFFSET cs,CS-\ref*/
	CFI_REL_OFFSET rip,RIP-\ref
	.endm

/* initial frame state for interrupts (and exceptions without error code) */
#define INTR_FRAME _frame RIP
/* initial frame state for exceptions with error code (and interrupts with
   vector already pushed) */
#define XCPT_FRAME _frame ORIG_RAX

/*
 * Build the entry stubs and pointer table with some assembler magic.
 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
@@ -677,36 +729,9 @@ END(interrupt)

/* 0(%rsp): ~(interrupt number) */
	.macro interrupt func
	cld
	SAVE_ARGS
	leaq -ARGOFFSET(%rsp),%rdi	/* arg1 for handler */
	pushq %rbp
	/*
	 * Save rbp twice: One is for marking the stack frame, as usual, and the
	 * other, to fill pt_regs properly. This is because bx comes right
	 * before the last saved register in that structure, and not bp. If the
	 * base pointer were in the place bx is today, this would not be needed.
	 */
	movq %rbp, -8(%rsp)
	CFI_ADJUST_CFA_OFFSET	8
	CFI_REL_OFFSET		rbp, 0
	movq %rsp,%rbp
	CFI_DEF_CFA_REGISTER	rbp
	testl $3,CS(%rdi)
	je 1f
	SWAPGS
	/* irqcount is used to check if a CPU is already on an interrupt
	   stack or not. While this is essentially redundant with preempt_count
	   it is a little cheaper to use a separate counter in the PDA
	   (short of moving irq_enter into assembly, which would be too
	    much work) */
1:	incl	%gs:pda_irqcount
	cmoveq %gs:pda_irqstackptr,%rsp
	push    %rbp			# backlink for old unwinder
	/*
	 * We entered an interrupt context - irqs are off:
	 */
	TRACE_IRQS_OFF
	subq $10*8, %rsp
	CFI_ADJUST_CFA_OFFSET 10*8
	call save_args
	call \func
	.endm

@@ -852,6 +877,8 @@ END(common_interrupt)
/*
 * APIC interrupts.
 */
	.p2align 5

	.macro apicinterrupt num,func
	INTR_FRAME
	pushq $~(\num)
@@ -922,24 +949,29 @@ END(spurious_interrupt)
	.macro zeroentry sym
	INTR_FRAME
	PARAVIRT_ADJUST_EXCEPTION_FRAME
	pushq $0	/* push error code/oldrax */
	pushq $-1		/* ORIG_RAX: no syscall to restart */
	CFI_ADJUST_CFA_OFFSET 8
	pushq %rax	/* push real oldrax to the rdi slot */
	CFI_ADJUST_CFA_OFFSET 8
	CFI_REL_OFFSET rax,0
	leaq  \sym(%rip),%rax
	jmp error_entry
	subq $15*8,%rsp
	CFI_ADJUST_CFA_OFFSET 15*8
	call error_entry
	movq %rsp,%rdi		/* pt_regs pointer */
	xorl %esi,%esi		/* no error code */
	call \sym
	jmp error_exit		/* %ebx: no swapgs flag */
	CFI_ENDPROC
	.endm

	.macro errorentry sym
	XCPT_FRAME
	PARAVIRT_ADJUST_EXCEPTION_FRAME
	pushq %rax
	CFI_ADJUST_CFA_OFFSET 8
	CFI_REL_OFFSET rax,0
	leaq  \sym(%rip),%rax
	jmp error_entry
	subq $15*8,%rsp
	CFI_ADJUST_CFA_OFFSET 15*8
	call error_entry
	movq %rsp,%rdi			/* pt_regs pointer */
	movq ORIG_RAX(%rsp),%rsi	/* get error code */
	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
	call \sym
	jmp error_exit			/* %ebx: no swapgs flag */
	CFI_ENDPROC
	.endm

@@ -1043,61 +1075,78 @@ paranoid_schedule\trace:
	.endm

/*
 * Exception entry point. This expects an error code/orig_rax on the stack
 * and the exception handler in %rax.
 * Exception entry point. This expects an error code/orig_rax on the stack.
 * returns in "no swapgs flag" in %ebx.
 */
KPROBE_ENTRY(error_entry)
	_frame RDI
	CFI_REL_OFFSET rax,0
	/* rdi slot contains rax, oldrax contains error code */
	CFI_ADJUST_CFA_OFFSET 15*8
	/* oldrax contains error code */
	cld
	subq  $14*8,%rsp
	CFI_ADJUST_CFA_OFFSET	(14*8)
	movq %rsi,13*8(%rsp)
	CFI_REL_OFFSET	rsi,RSI
	movq 14*8(%rsp),%rsi	/* load rax from rdi slot */
	CFI_REGISTER	rax,rsi
	movq %rdx,12*8(%rsp)
	CFI_REL_OFFSET	rdx,RDX
	movq %rcx,11*8(%rsp)
	CFI_REL_OFFSET	rcx,RCX
	movq %rsi,10*8(%rsp)	/* store rax */
	CFI_REL_OFFSET	rax,RAX
	movq %r8, 9*8(%rsp)
	CFI_REL_OFFSET	r8,R8
	movq %r9, 8*8(%rsp)
	CFI_REL_OFFSET	r9,R9
	movq %r10,7*8(%rsp)
	CFI_REL_OFFSET	r10,R10
	movq %r11,6*8(%rsp)
	CFI_REL_OFFSET	r11,R11
	movq %rbx,5*8(%rsp)
	CFI_REL_OFFSET	rbx,RBX
	movq %rbp,4*8(%rsp)
	CFI_REL_OFFSET	rbp,RBP
	movq %r12,3*8(%rsp)
	CFI_REL_OFFSET	r12,R12
	movq %r13,2*8(%rsp)
	CFI_REL_OFFSET	r13,R13
	movq %r14,1*8(%rsp)
	CFI_REL_OFFSET	r14,R14
	movq %r15,(%rsp)
	CFI_REL_OFFSET	r15,R15
	movq %rdi,14*8+8(%rsp)
	CFI_REL_OFFSET rdi,RDI+8
	movq %rsi,13*8+8(%rsp)
	CFI_REL_OFFSET rsi,RSI+8
	movq %rdx,12*8+8(%rsp)
	CFI_REL_OFFSET rdx,RDX+8
	movq %rcx,11*8+8(%rsp)
	CFI_REL_OFFSET rcx,RCX+8
	movq %rax,10*8+8(%rsp)
	CFI_REL_OFFSET rax,RAX+8
	movq %r8, 9*8+8(%rsp)
	CFI_REL_OFFSET r8,R8+8
	movq %r9, 8*8+8(%rsp)
	CFI_REL_OFFSET r9,R9+8
	movq %r10,7*8+8(%rsp)
	CFI_REL_OFFSET r10,R10+8
	movq %r11,6*8+8(%rsp)
	CFI_REL_OFFSET r11,R11+8
	movq %rbx,5*8+8(%rsp)
	CFI_REL_OFFSET rbx,RBX+8
	movq %rbp,4*8+8(%rsp)
	CFI_REL_OFFSET rbp,RBP+8
	movq %r12,3*8+8(%rsp)
	CFI_REL_OFFSET r12,R12+8
	movq %r13,2*8+8(%rsp)
	CFI_REL_OFFSET r13,R13+8
	movq %r14,1*8+8(%rsp)
	CFI_REL_OFFSET r14,R14+8
	movq %r15,0*8+8(%rsp)
	CFI_REL_OFFSET r15,R15+8
	xorl %ebx,%ebx
	testl $3,CS(%rsp)
	testl $3,CS+8(%rsp)
	je error_kernelspace
error_swapgs:
	SWAPGS
error_sti:
	TRACE_IRQS_OFF
	movq %rdi,RDI(%rsp)
	CFI_REL_OFFSET	rdi,RDI
	movq %rsp,%rdi
	movq ORIG_RAX(%rsp),%rsi	/* get error code */
	movq $-1,ORIG_RAX(%rsp)
	call *%rax
	ret
	CFI_ENDPROC

/*
 * There are two places in the kernel that can potentially fault with
 * usergs. Handle them here. The exception handlers after iret run with
 * kernel gs again, so don't set the user space flag. B stepping K8s
 * sometimes report an truncated RIP for IRET exceptions returning to
 * compat mode. Check for these here too.
 */
error_kernelspace:
	incl %ebx
	leaq irq_return(%rip),%rcx
	cmpq %rcx,RIP+8(%rsp)
	je error_swapgs
	movl %ecx,%ecx	/* zero extend */
	cmpq %rcx,RIP+8(%rsp)
	je error_swapgs
	cmpq $gs_change,RIP+8(%rsp)
        je error_swapgs
	jmp error_sti
KPROBE_END(error_entry)


/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
error_exit:
KPROBE_ENTRY(error_exit)
	_frame R15
	movl %ebx,%eax
	RESTORE_REST
	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1112,24 +1161,7 @@ error_exit:
	jnz retint_careful
	jmp retint_swapgs
	CFI_ENDPROC

error_kernelspace:
	incl %ebx
       /* There are two places in the kernel that can potentially fault with
          usergs. Handle them here. The exception handlers after
	   iret run with kernel gs again, so don't set the user space flag.
	   B stepping K8s sometimes report an truncated RIP for IRET
	   exceptions returning to compat mode. Check for these here too. */
	leaq irq_return(%rip),%rcx
	cmpq %rcx,RIP(%rsp)
	je   error_swapgs
	movl %ecx,%ecx	/* zero extend */
	cmpq %rcx,RIP(%rsp)
	je   error_swapgs
	cmpq $gs_change,RIP(%rsp)
        je   error_swapgs
	jmp  error_sti
KPROBE_END(error_entry)
KPROBE_END(error_exit)

       /* Reload gs selector with exception handling */
       /* edi:  new selector */