Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0437e109 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

sched: zap the migration init / cache-hot balancing code



the SMP load-balancer uses the boot-time migration-cost estimation
code to attempt to improve the quality of balancing. The reason for
this code is that the discrete priority queues do not preserve
the order of scheduling accurately, so the load-balancer skips
tasks that were running on a CPU 'recently'.

this code is fundamental fragile: the boot-time migration cost detector
doesnt really work on systems that had large L3 caches, it caused boot
delays on large systems and the whole cache-hot concept made the
balancing code pretty undeterministic as well.

(and hey, i wrote most of it, so i can say it out loud that it sucks ;-)

under CFS the same purpose of cache affinity can be achieved without
any special cache-hot special-case: tasks are sorted in the 'timeline'
tree and the SMP balancer picks tasks from the left side of the
tree, thus the most cache-cold task is balanced automatically.

Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 0e6aca43
Loading
Loading
Loading
Loading
+0 −43
Original line number Diff line number Diff line
@@ -1014,49 +1014,6 @@ and is between 256 and 4096 characters. It is defined in the file

	mga=		[HW,DRM]

	migration_cost=
			[KNL,SMP] debug: override scheduler migration costs
			Format: <level-1-usecs>,<level-2-usecs>,...
			This debugging option can be used to override the
			default scheduler migration cost matrix. The numbers
			are indexed by 'CPU domain distance'.
			E.g. migration_cost=1000,2000,3000 on an SMT NUMA
			box will set up an intra-core migration cost of
			1 msec, an inter-core migration cost of 2 msecs,
			and an inter-node migration cost of 3 msecs.

			WARNING: using the wrong values here can break
			scheduler performance, so it's only for scheduler
			development purposes, not production environments.

	migration_debug=
			[KNL,SMP] migration cost auto-detect verbosity
			Format=<0|1|2>
			If a system's migration matrix reported at bootup
			seems erroneous then this option can be used to
			increase verbosity of the detection process.
			We default to 0 (no extra messages), 1 will print
			some more information, and 2 will be really
			verbose (probably only useful if you also have a
			serial console attached to the system).

	migration_factor=
			[KNL,SMP] multiply/divide migration costs by a factor
			Format=<percent>
			This debug option can be used to proportionally
			increase or decrease the auto-detected migration
			costs for all entries of the migration matrix.
			E.g. migration_factor=150 will increase migration
			costs by 50%. (and thus the scheduler will be less
			eager migrating cache-hot tasks)
			migration_factor=80 will decrease migration costs
			by 20%. (thus the scheduler will be more eager to
			migrate tasks)

			WARNING: using the wrong values here can break
			scheduler performance, so it's only for scheduler
			development purposes, not production environments.

	mousedev.tap_time=
			[MOUSE] Maximum time between finger touching and
			leaving touchpad surface for touch to be considered
+0 −12
Original line number Diff line number Diff line
@@ -941,17 +941,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
}
#endif

static void smp_tune_scheduling(void)
{
	if (cpu_khz) {
		/* cache size in kB */
		long cachesize = boot_cpu_data.x86_cache_size;

		if (cachesize > 0)
			max_cache_size = cachesize * 1024;
	}
}

/*
 * Cycle through the processors sending APIC IPIs to boot each.
 */
@@ -980,7 +969,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;

	current_thread_info()->cpu = 0;
	smp_tune_scheduling();

	set_cpu_sibling_map(0);

+0 −6
Original line number Diff line number Diff line
@@ -805,7 +805,6 @@ static void __cpuinit
get_max_cacheline_size (void)
{
	unsigned long line_size, max = 1;
	unsigned int cache_size = 0;
	u64 l, levels, unique_caches;
        pal_cache_config_info_t cci;
        s64 status;
@@ -835,8 +834,6 @@ get_max_cacheline_size (void)
		line_size = 1 << cci.pcci_line_size;
		if (line_size > max)
			max = line_size;
		if (cache_size < cci.pcci_cache_size)
			cache_size = cci.pcci_cache_size;
		if (!cci.pcci_unified) {
			status = ia64_pal_cache_config_info(l,
						    /* cache_type (instruction)= */ 1,
@@ -853,9 +850,6 @@ get_max_cacheline_size (void)
			ia64_i_cache_stride_shift = cci.pcci_stride;
	}
  out:
#ifdef CONFIG_SMP
	max_cache_size = max(max_cache_size, cache_size);
#endif
	if (max > ia64_max_cacheline_size)
		ia64_max_cacheline_size = max;
}
+0 −11
Original line number Diff line number Diff line
@@ -51,16 +51,6 @@ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */
EXPORT_SYMBOL(phys_cpu_present_map);
EXPORT_SYMBOL(cpu_online_map);

/* This happens early in bootup, can't really do it better */
static void smp_tune_scheduling (void)
{
	struct cache_desc *cd = &current_cpu_data.scache;
	unsigned long cachesize = cd->linesz * cd->sets * cd->ways;

	if (cachesize > max_cache_size)
		max_cache_size = cachesize;
}

extern void __init calibrate_delay(void);
extern ATTRIB_NORET void cpu_idle(void);

@@ -228,7 +218,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
{
	init_new_context(current, &init_mm);
	current_thread_info()->cpu = 0;
	smp_tune_scheduling();
	plat_prepare_cpus(max_cpus);
#ifndef CONFIG_HOTPLUG_CPU
	cpu_present_map = cpu_possible_map;
+0 −10
Original line number Diff line number Diff line
@@ -68,16 +68,6 @@ void __cpuinit smp_store_cpu_info(int id)
	cpu_data(id).prom_node = cpu_node;
	cpu_data(id).mid = cpu_get_hwmid(cpu_node);

	/* this is required to tune the scheduler correctly */
	/* is it possible to have CPUs with different cache sizes? */
	if (id == boot_cpu_id) {
		int cache_line,cache_nlines;
		cache_line = 0x20;
		cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line);
		cache_nlines = 0x8000;
		cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines);
		max_cache_size = cache_line * cache_nlines;
	}
	if (cpu_data(id).mid < 0)
		panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
}
Loading