Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 10e66760 authored by Vitaly Kuznetsov's avatar Vitaly Kuznetsov Committed by Ingo Molnar
Browse files

x86/smpboot: Unbreak CPU0 hotplug



A hang on CPU0 onlining after a preceding offlining is observed. Trace
shows that CPU0 is stuck in check_tsc_sync_target() waiting for source
CPU to run check_tsc_sync_source() but this never happens. Source CPU,
in its turn, is stuck on synchronize_sched() which is called from
native_cpu_up() -> do_boot_cpu() -> unregister_nmi_handler().

So it's a classic ABBA deadlock, due to the use of synchronize_sched() in
unregister_nmi_handler().

Fix the bug by moving unregister_nmi_handler() from do_boot_cpu() to
native_cpu_up() after cpu onlining is done.

Signed-off-by: default avatarVitaly Kuznetsov <vkuznets@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170803105818.9934-1-vkuznets@redhat.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent e93c1730
Loading
Loading
Loading
Loading
+17 −13
Original line number Diff line number Diff line
@@ -971,7 +971,8 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
 * Returns zero if CPU booted OK, else error code from
 * ->wakeup_secondary_cpu.
 */
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
		       int *cpu0_nmi_registered)
{
	volatile u32 *trampoline_status =
		(volatile u32 *) __va(real_mode_header->trampoline_status);
@@ -979,7 +980,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
	unsigned long start_ip = real_mode_header->trampoline_start;

	unsigned long boot_error = 0;
	int cpu0_nmi_registered = 0;
	unsigned long timeout;

	idle->thread.sp = (unsigned long)task_pt_regs(idle);
@@ -1035,7 +1035,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
	else
		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
						     &cpu0_nmi_registered);
						     cpu0_nmi_registered);

	if (!boot_error) {
		/*
@@ -1080,12 +1080,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
		 */
		smpboot_restore_warm_reset_vector();
	}
	/*
	 * Clean up the nmi handler. Do this after the callin and callout sync
	 * to avoid impact of possible long unregister time.
	 */
	if (cpu0_nmi_registered)
		unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");

	return boot_error;
}
@@ -1093,8 +1087,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
	int apicid = apic->cpu_present_to_apicid(cpu);
	int cpu0_nmi_registered = 0;
	unsigned long flags;
	int err;
	int err, ret = 0;

	WARN_ON(irqs_disabled());

@@ -1131,10 +1126,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)

	common_cpu_up(cpu, tidle);

	err = do_boot_cpu(apicid, cpu, tidle);
	err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
	if (err) {
		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
		return -EIO;
		ret = -EIO;
		goto unreg_nmi;
	}

	/*
@@ -1150,7 +1146,15 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
		touch_nmi_watchdog();
	}

	return 0;
unreg_nmi:
	/*
	 * Clean up the nmi handler. Do this after the callin and callout sync
	 * to avoid impact of possible long unregister time.
	 */
	if (cpu0_nmi_registered)
		unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");

	return ret;
}

/**