Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ef3e035c authored by David S. Miller's avatar David S. Miller
Browse files

sparc64: Fix register corruption in top-most kernel stack frame during boot.



Meelis Roos reported that kernels built with gcc-4.9 do not boot, we
eventually narrowed this down to only impacting machines using
UltraSPARC-III and derivitive cpus.

The crash happens right when the first user process is spawned:

[   54.451346] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004
[   54.451346]
[   54.571516] CPU: 1 PID: 1 Comm: init Not tainted 3.16.0-rc2-00211-gd7933ab #96
[   54.666431] Call Trace:
[   54.698453]  [0000000000762f8c] panic+0xb0/0x224
[   54.759071]  [000000000045cf68] do_exit+0x948/0x960
[   54.823123]  [000000000042cbc0] fault_in_user_windows+0xe0/0x100
[   54.902036]  [0000000000404ad0] __handle_user_windows+0x0/0x10
[   54.978662] Press Stop-A (L1-A) to return to the boot prom
[   55.050713] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004

Further investigation showed that compiling only per_cpu_patch() with
an older compiler fixes the boot.

Detailed analysis showed that the function is not being miscompiled by
gcc-4.9, but it is using a different register allocation ordering.

With the gcc-4.9 compiled function, something during the code patching
causes some of the %i* input registers to get corrupted.  Perhaps
we have a TLB miss path into the firmware that is deep enough to
cause a register window spill and subsequent restore when we get
back from the TLB miss trap.

Let's plug this up by doing two things:

1) Stop using the firmware stack for client interface calls into
   the firmware.  Just use the kernel's stack.

2) As soon as we can, call into a new function "start_early_boot()"
   to put a one-register-window buffer between the firmware's
   deepest stack frame and the top-most initial kernel one.

Reported-by: default avatarMeelis Roos <mroos@linux.ee>
Tested-by: default avatarMeelis Roos <mroos@linux.ee>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 61ed53de
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -62,7 +62,8 @@ struct linux_mem_p1275 {
/* You must call prom_init() before using any of the library services,
 * preferably as early as possible.  Pass it the romvec pointer.
 */
void prom_init(void *cif_handler, void *cif_stack);
void prom_init(void *cif_handler);
void prom_init_report(void);

/* Boot argument acquisition, returns the boot command line string. */
char *prom_getbootargs(void);
+2 −0
Original line number Diff line number Diff line
@@ -48,6 +48,8 @@ unsigned long safe_compute_effective_address(struct pt_regs *, unsigned int);
#endif

#ifdef CONFIG_SPARC64
void __init start_early_boot(void);

/* unaligned_64.c */
int handle_ldf_stq(u32 insn, struct pt_regs *regs);
void handle_ld_nf(u32 insn, struct pt_regs *regs);
+0 −3
Original line number Diff line number Diff line
@@ -65,13 +65,10 @@ struct pause_patch_entry {
extern struct pause_patch_entry __pause_3insn_patch,
	__pause_3insn_patch_end;

void __init per_cpu_patch(void);
void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
			     struct sun4v_1insn_patch_entry *);
void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
			     struct sun4v_2insn_patch_entry *);
void __init sun4v_patch(void);
void __init boot_cpu_id_too_large(int cpu);
extern unsigned int dcache_parity_tl1_occurred;
extern unsigned int icache_parity_tl1_occurred;

+4 −36
Original line number Diff line number Diff line
@@ -672,14 +672,12 @@ tlb_fixup_done:
	sethi	%hi(init_thread_union), %g6
	or	%g6, %lo(init_thread_union), %g6
	ldx	[%g6 + TI_TASK], %g4
	mov	%sp, %l6

	wr	%g0, ASI_P, %asi
	mov	1, %g1
	sllx	%g1, THREAD_SHIFT, %g1
	sub	%g1, (STACKFRAME_SZ + STACK_BIAS), %g1
	add	%g6, %g1, %sp
	mov	0, %fp

	/* Set per-cpu pointer initially to zero, this makes
	 * the boot-cpu use the in-kernel-image per-cpu areas
@@ -706,44 +704,14 @@ tlb_fixup_done:
	 nop
#endif

	mov	%l6, %o1			! OpenPROM stack
	call	prom_init
	 mov	%l7, %o0			! OpenPROM cif handler

	/* Initialize current_thread_info()->cpu as early as possible.
	 * In order to do that accurately we have to patch up the get_cpuid()
	 * assembler sequences.  And that, in turn, requires that we know
	 * if we are on a Starfire box or not.  While we're here, patch up
	 * the sun4v sequences as well.
	/* To create a one-register-window buffer between the kernel's
	 * initial stack and the last stack frame we use from the firmware,
	 * do the rest of the boot from a C helper function.
	 */
	call	check_if_starfire
	 nop
	call	per_cpu_patch
	 nop
	call	sun4v_patch
	 nop

#ifdef CONFIG_SMP
	call	hard_smp_processor_id
	 nop
	cmp	%o0, NR_CPUS
	blu,pt	%xcc, 1f
	 nop
	call	boot_cpu_id_too_large
	 nop
	/* Not reached... */

1:
#else
	mov	0, %o0
#endif
	sth	%o0, [%g6 + TI_CPU]

	call	prom_init_report
	 nop

	/* Off we go.... */
	call	start_kernel
	call	start_early_boot
	 nop
	/* Not reached... */

+0 −1
Original line number Diff line number Diff line
@@ -109,7 +109,6 @@ hv_cpu_startup:
	sllx		%g5, THREAD_SHIFT, %g5
	sub		%g5, (STACKFRAME_SZ + STACK_BIAS), %g5
	add		%g6, %g5, %sp
	mov		0, %fp

	call		init_irqwork_curcpu
	 nop
Loading