Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e1367daf authored by Li Shaohua's avatar Li Shaohua Committed by Linus Torvalds
Browse files

[PATCH] cpu state clean after hot remove



Clean CPU states in order to reuse smp boot code for CPU hotplug.

Signed-off-by: default avatarLi <Shaohua&lt;shaohua.li@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 0bb3184d
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -651,3 +651,15 @@ void __devinit cpu_init(void)
	clear_used_math();
	mxcsr_feature_mask_init();
}

#ifdef CONFIG_HOTPLUG_CPU
void __devinit cpu_uninit(void)
{
	int cpu = raw_smp_processor_id();
	cpu_clear(cpu, cpu_initialized);

	/* lazy TLB state */
	per_cpu(cpu_tlbstate, cpu).state = 0;
	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}
#endif
+5 −0
Original line number Diff line number Diff line
@@ -156,6 +156,11 @@ void irq_ctx_init(int cpu)
		cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
}

void irq_ctx_exit(int cpu)
{
	hardirq_ctx[cpu] = NULL;
}

extern asmlinkage void __do_softirq(void);

asmlinkage void do_softirq(void)
+9 −11
Original line number Diff line number Diff line
@@ -152,21 +152,19 @@ static void poll_idle (void)
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
	/* This must be done before dead CPU ack */
	cpu_exit_clear();
	wbinvd();
	mb();
	/* Ack it */
	__get_cpu_var(cpu_state) = CPU_DEAD;

	/* We shouldn't have to disable interrupts while dead, but
	 * some interrupts just don't seem to go away, and this makes
	 * it "work" for testing purposes. */
	/* Death loop */
	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
		cpu_relax();

	/*
	 * With physical CPU hotplug, we should halt the cpu
	 */
	local_irq_disable();
	__flush_tlb_all();
	cpu_set(smp_processor_id(), cpu_online_map);
	enable_APIC_timer();
	local_irq_enable();
	while (1)
		__asm__ __volatile__("hlt":::"memory");
}
#else
static inline void play_dead(void)
+144 −31
Original line number Diff line number Diff line
@@ -90,6 +90,12 @@ cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
static cpumask_t smp_commenced_mask;

/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
 * is no way to resync one AP against BP. TBD: for prescott and above, we
 * should use IA64's algorithm
 */
static int __devinitdata tsc_sync_disabled;

/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_data);
@@ -427,7 +433,7 @@ static void __devinit smp_callin(void)
	/*
	 *      Synchronize the TSC with the BP
	 */
	if (cpu_has_tsc && cpu_khz)
	if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
		synchronize_tsc_ap();
}

@@ -507,6 +513,7 @@ static void __devinit start_secondary(void *unused)
	lock_ipi_call_lock();
	cpu_set(smp_processor_id(), cpu_online_map);
	unlock_ipi_call_lock();
	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;

	/* We can take interrupts now: we're officially "up". */
	local_irq_enable();
@@ -816,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
#endif	/* WAKE_SECONDARY_VIA_INIT */

extern cpumask_t cpu_initialized;
static inline int alloc_cpu_id(void)
{
	cpumask_t	tmp_map;
	int cpu;
	cpus_complement(tmp_map, cpu_present_map);
	cpu = first_cpu(tmp_map);
	if (cpu >= NR_CPUS)
		return -ENODEV;
	return cpu;
}

#ifdef CONFIG_HOTPLUG_CPU
static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
static inline struct task_struct * alloc_idle_task(int cpu)
{
	struct task_struct *idle;

static int __devinit do_boot_cpu(int apicid)
	if ((idle = cpu_idle_tasks[cpu]) != NULL) {
		/* initialize thread_struct.  we really want to avoid destroy
		 * idle tread
		 */
		idle->thread.esp = (unsigned long)(((struct pt_regs *)
			(THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
		init_idle(idle, cpu);
		return idle;
	}
	idle = fork_idle(cpu);

	if (!IS_ERR(idle))
		cpu_idle_tasks[cpu] = idle;
	return idle;
}
#else
#define alloc_idle_task(cpu) fork_idle(cpu)
#endif

static int __devinit do_boot_cpu(int apicid, int cpu)
/*
 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -826,16 +868,17 @@ static int __devinit do_boot_cpu(int apicid)
{
	struct task_struct *idle;
	unsigned long boot_error;
	int timeout, cpu;
	int timeout;
	unsigned long start_eip;
	unsigned short nmi_high = 0, nmi_low = 0;

	cpu = ++cpucount;
	++cpucount;

	/*
	 * We can't use kernel_thread since we must avoid to
	 * reschedule the child.
	 */
	idle = fork_idle(cpu);
	idle = alloc_idle_task(cpu);
	if (IS_ERR(idle))
		panic("failed fork for CPU %d", cpu);
	idle->thread.eip = (unsigned long) start_secondary;
@@ -902,13 +945,16 @@ static int __devinit do_boot_cpu(int apicid)
			inquire_remote_apic(apicid);
		}
	}
	x86_cpu_to_apicid[cpu] = apicid;

	if (boot_error) {
		/* Try to put things back the way they were before ... */
		unmap_cpu_to_logical_apicid(cpu);
		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
		cpucount--;
	} else {
		x86_cpu_to_apicid[cpu] = apicid;
		cpu_set(cpu, cpu_present_map);
	}

	/* mark "stuck" area as not stuck */
@@ -917,6 +963,75 @@ static int __devinit do_boot_cpu(int apicid)
	return boot_error;
}

#ifdef CONFIG_HOTPLUG_CPU
void cpu_exit_clear(void)
{
	int cpu = raw_smp_processor_id();

	idle_task_exit();

	cpucount --;
	cpu_uninit();
	irq_ctx_exit(cpu);

	cpu_clear(cpu, cpu_callout_map);
	cpu_clear(cpu, cpu_callin_map);
	cpu_clear(cpu, cpu_present_map);

	cpu_clear(cpu, smp_commenced_mask);
	unmap_cpu_to_logical_apicid(cpu);
}

struct warm_boot_cpu_info {
	struct completion *complete;
	int apicid;
	int cpu;
};

static void __devinit do_warm_boot_cpu(void *p)
{
	struct warm_boot_cpu_info *info = p;
	do_boot_cpu(info->apicid, info->cpu);
	complete(info->complete);
}

int __devinit smp_prepare_cpu(int cpu)
{
	DECLARE_COMPLETION(done);
	struct warm_boot_cpu_info info;
	struct work_struct task;
	int	apicid, ret;

	lock_cpu_hotplug();
	apicid = x86_cpu_to_apicid[cpu];
	if (apicid == BAD_APICID) {
		ret = -ENODEV;
		goto exit;
	}

	info.complete = &done;
	info.apicid = apicid;
	info.cpu = cpu;
	INIT_WORK(&task, do_warm_boot_cpu, &info);

	tsc_sync_disabled = 1;

	/* init low mem mapping */
	memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
			sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
	flush_tlb_all();
	schedule_work(&task);
	wait_for_completion(&done);

	tsc_sync_disabled = 0;
	zap_low_mappings();
	ret = 0;
exit:
	unlock_cpu_hotplug();
	return ret;
}
#endif

static void smp_tune_scheduling (void)
{
	unsigned long cachesize;       /* kB   */
@@ -1069,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
		if (max_cpus <= cpucount+1)
			continue;

		if (do_boot_cpu(apicid))
		if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
			printk("CPU #%d not responding - cannot use it.\n",
								apicid);
		else
@@ -1149,25 +1264,24 @@ void __devinit smp_prepare_boot_cpu(void)
{
	cpu_set(smp_processor_id(), cpu_online_map);
	cpu_set(smp_processor_id(), cpu_callout_map);
	cpu_set(smp_processor_id(), cpu_present_map);
	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}

#ifdef CONFIG_HOTPLUG_CPU

/* must be called with the cpucontrol mutex held */
static int __devinit cpu_enable(unsigned int cpu)
static void
remove_siblinginfo(int cpu)
{
	/* get the target out of its holding state */
	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
	wmb();

	/* wait for the processor to ack it. timeout? */
	while (!cpu_online(cpu))
		cpu_relax();
	int sibling;

	fixup_irqs(cpu_online_map);
	/* counter the disable in fixup_irqs() */
	local_irq_enable();
	return 0;
	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
		cpu_clear(cpu, cpu_sibling_map[sibling]);
	for_each_cpu_mask(sibling, cpu_core_map[cpu])
		cpu_clear(cpu, cpu_core_map[sibling]);
	cpus_clear(cpu_sibling_map[cpu]);
	cpus_clear(cpu_core_map[cpu]);
	phys_proc_id[cpu] = BAD_APICID;
	cpu_core_id[cpu] = BAD_APICID;
}

int __cpu_disable(void)
@@ -1193,6 +1307,8 @@ int __cpu_disable(void)
	mdelay(1);
	local_irq_disable();

	remove_siblinginfo(cpu);

	cpu_clear(cpu, map);
	fixup_irqs(map);
	/* It's now safe to remove this processor from the online map */
@@ -1207,8 +1323,10 @@ void __cpu_die(unsigned int cpu)

	for (i = 0; i < 10; i++) {
		/* They ack this in play_dead by setting CPU_DEAD */
		if (per_cpu(cpu_state, cpu) == CPU_DEAD)
		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
			printk ("CPU %d is now offline\n", cpu);
			return;
		}
		current->state = TASK_UNINTERRUPTIBLE;
		schedule_timeout(HZ/10);
	}
@@ -1236,15 +1354,8 @@ int __devinit __cpu_up(unsigned int cpu)
		return -EIO;
	}

#ifdef CONFIG_HOTPLUG_CPU
	/* Already up, and in cpu_quiescent now? */
	if (cpu_isset(cpu, smp_commenced_mask)) {
		cpu_enable(cpu);
		return 0;
	}
#endif

	local_irq_enable();
	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
	/* Unleash the CPU! */
	cpu_set(cpu, smp_commenced_mask);
	while (!cpu_isset(cpu, cpu_online_map))
@@ -1258,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
	setup_ioapic_dest();
#endif
	zap_low_mappings();
#ifndef CONFIG_HOTPLUG_CPU
	/*
	 * Disable executability of the SMP trampoline:
	 */
	set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
#endif
}

void __init smp_intr_init(void)
+7 −1
Original line number Diff line number Diff line
@@ -16,6 +16,10 @@ struct sysdev_class cpu_sysdev_class = {
EXPORT_SYMBOL(cpu_sysdev_class);

#ifdef CONFIG_HOTPLUG_CPU
#ifndef __HAVE_ARCH_SMP_PREPARE_CPU
#define smp_prepare_cpu(cpu) (0)
#endif

static ssize_t show_online(struct sys_device *dev, char *buf)
{
	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
@@ -36,6 +40,8 @@ static ssize_t store_online(struct sys_device *dev, const char *buf,
			kobject_hotplug(&dev->kobj, KOBJ_OFFLINE);
		break;
	case '1':
		ret = smp_prepare_cpu(cpu->sysdev.id);
		if (ret == 0)
			ret = cpu_up(cpu->sysdev.id);
		break;
	default:
Loading