Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 95ffa243 authored by Yinghai Lu's avatar Yinghai Lu Committed by Thomas Gleixner
Browse files

x86: mtrr cleanup for converting continuous to discrete layout, v8



some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

mtrr_gran_size= could be used to send smallest mtrr block to avoid run out of MTRRs

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER
v6: fix 1g, 2g, 512 aligment with extra hole
v7: gran_sizek to prevent running out of MTRRs.
v8: fix hole_basek caculation caused when removing next_basek
    gran_sizek using when basek is 0.

need to apply
	[PATCH] x86: fix trimming e820 with MTRR holes.
right after this one.

Signed-off-by: default avatarYinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 0dbfafa5
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -599,6 +599,20 @@ and is between 256 and 4096 characters. It is defined in the file
			See drivers/char/README.epca and
			Documentation/digiepca.txt.

	disable_mtrr_cleanup [X86]
	enable_mtrr_cleanup [X86]
			The kernel tries to adjust MTRR layout from continuous
			to discrete, to make X server driver able to add WB
			entry later. This parameter enables/disables that.

	mtrr_chunk_size=nn[KMG] [X86]
			used for mtrr cleanup. It is largest continous chunk
			that could hold holes aka. UC entries.

	mtrr_gran_size=nn[KMG] [X86]
			used for mtrr cleanup. It is granity of mtrr block.
			Big value could prevent small alignment use up MTRRs.

	disable_mtrr_trim [X86, Intel and AMD only]
			By default the kernel will trim any uncacheable
			memory out of your available memory pool based on
+26 −0
Original line number Diff line number Diff line
@@ -1092,6 +1092,32 @@ config MTRR

	  See <file:Documentation/mtrr.txt> for more information.

config MTRR_SANITIZER
	def_bool y
	prompt "MTRR cleanup support"
	depends on MTRR
	help
	  Convert MTRR layout from continuous to discrete, so some X driver
	  could add WB entries.

	  Say N here if you see bootup problems (boot crash, boot hang,
	  spontaneous reboots).

	  Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
	  could be used to send largest mtrr entry size for continuous block
	  to hold holes (aka. UC entries)

	  If unsure, say Y.

config MTRR_SANITIZER_ENABLE_DEFAULT
	def_bool y
	prompt "Enable MTRR cleanup by default"
	depends on MTRR_SANITIZER
	help
	  Enable mtrr cleanup by default

	  If unsure, say Y.

config X86_PAT
	bool
	prompt "x86 PAT support"
+23 −9
Original line number Diff line number Diff line
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
static unsigned long smp_changes_mask;
static struct mtrr_state mtrr_state = {};
static int mtrr_state_set;
static u64 tom2;
u64 mtrr_tom2;

#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
		}
	}

	if (tom2) {
		if (start >= (1ULL<<32) && (end < tom2))
	if (mtrr_tom2) {
		if (start >= (1ULL<<32) && (end < mtrr_tom2))
			return MTRR_TYPE_WRBACK;
	}

@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}

/*  fill the MSR pair relating to a var range  */
void fill_mtrr_var_range(unsigned int index,
		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
{
	struct mtrr_var_range *vr;

	vr = mtrr_state.var_ranges;

	vr[index].base_lo = base_lo;
	vr[index].base_hi = base_hi;
	vr[index].mask_lo = mask_lo;
	vr[index].mask_hi = mask_hi;
}

static void
get_fixed_ranges(mtrr_type * frs)
{
@@ -216,10 +230,10 @@ void __init get_mtrr_state(void)
		unsigned low, high;
		/* TOP_MEM2 */
		rdmsr(MSR_K8_TOP_MEM2, low, high);
		tom2 = high;
		tom2 <<= 32;
		tom2 |= low;
		tom2 &= 0xffffff8000000ULL;
		mtrr_tom2 = high;
		mtrr_tom2 <<= 32;
		mtrr_tom2 |= low;
		mtrr_tom2 &= 0xffffff8000000ULL;
	}
	if (mtrr_show) {
		int high_width;
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
			else
				printk(KERN_INFO "MTRR %u disabled\n", i);
		}
		if (tom2) {
		if (mtrr_tom2) {
			printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
					  tom2, tom2>>20);
					  mtrr_tom2, mtrr_tom2>>20);
		}
	}
	mtrr_state_set = 1;
+462 −5
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
#include <linux/sort.h>

#include <asm/e820.h>
#include <asm/mtrr.h>
@@ -609,6 +610,452 @@ static struct sysdev_driver mtrr_sysdev_driver = {
	.resume		= mtrr_restore,
};

#ifdef CONFIG_MTRR_SANITIZER

#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
static int enable_mtrr_cleanup __initdata = 1;
#else
static int enable_mtrr_cleanup __initdata;
#endif

#else

static int enable_mtrr_cleanup __initdata = -1;

#endif

static int __init disable_mtrr_cleanup_setup(char *str)
{
	if (enable_mtrr_cleanup != -1)
		enable_mtrr_cleanup = 0;
	return 0;
}
early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);

static int __init enable_mtrr_cleanup_setup(char *str)
{
	if (enable_mtrr_cleanup != -1)
		enable_mtrr_cleanup = 1;
	return 0;
}
early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);

#define RANGE_NUM 256

struct res_range {
	unsigned long start;
	unsigned long end;
};

static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
			      unsigned long end, int merge)
{
	int i;

	if (!merge)
		goto addit;

	/* try to merge it with old one */
	for (i = 0; i < nr_range; i++) {
		unsigned long final_start, final_end;
		unsigned long common_start, common_end;

		if (!range[i].end)
			continue;

		common_start = max(range[i].start, start);
		common_end = min(range[i].end, end);
		if (common_start > common_end + 1)
			continue;

		final_start = min(range[i].start, start);
		final_end = max(range[i].end, end);

		range[i].start = final_start;
		range[i].end =  final_end;
		return nr_range;
	}

addit:
	/* need to add that */
	if (nr_range >= RANGE_NUM)
		return nr_range;

	range[nr_range].start = start;
	range[nr_range].end = end;

	nr_range++;

	return nr_range;

}
static void __init subtract_range(struct res_range *range, unsigned long start,
				unsigned long end)
{
	int i;
	int j;

	for (j = 0; j < RANGE_NUM; j++) {
		if (!range[j].end)
			continue;

		if (start <= range[j].start && end >= range[j].end) {
			range[j].start = 0;
			range[j].end = 0;
			continue;
		}

		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
			range[j].start = end + 1;
			continue;
		}


		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
			range[j].end = start - 1;
			continue;
		}

		if (start > range[j].start && end < range[j].end) {
			/* find the new spare */
			for (i = 0; i < RANGE_NUM; i++) {
				if (range[i].end == 0)
					break;
			}
			if (i < RANGE_NUM) {
				range[i].end = range[j].end;
				range[i].start = end + 1;
			} else {
				printk(KERN_ERR "run of slot in ranges\n");
			}
			range[j].end = start - 1;
			continue;
		}
	}
}

static int __init cmp_range(const void *x1, const void *x2)
{
	const struct res_range *r1 = x1;
	const struct res_range *r2 = x2;
	long start1, start2;

	start1 = r1->start;
	start2 = r2->start;

	return start1 - start2;
}

struct var_mtrr_state {
	unsigned long range_startk, range_sizek;
	unsigned long chunk_sizek;
	unsigned long gran_sizek;
	unsigned int reg;
	unsigned address_bits;
};

static void __init set_var_mtrr(
	unsigned int reg, unsigned long basek, unsigned long sizek,
	unsigned char type, unsigned address_bits)
{
	u32 base_lo, base_hi, mask_lo, mask_hi;
	unsigned address_mask_high;

	if (!sizek) {
		fill_mtrr_var_range(reg, 0, 0, 0, 0);
		return;
	}

	address_mask_high = ((1u << (address_bits - 32u)) - 1u);

	base_hi = basek >> 22;
	base_lo  = basek << 10;

	if (sizek < 4*1024*1024) {
		mask_hi = address_mask_high;
		mask_lo = ~((sizek << 10) - 1);
	} else {
		mask_hi = address_mask_high & (~((sizek >> 22) - 1));
		mask_lo = 0;
	}

	base_lo |= type;
	mask_lo |= 0x800;
	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
}

static unsigned int __init range_to_mtrr(unsigned int reg,
	unsigned long range_startk, unsigned long range_sizek,
	unsigned char type, unsigned address_bits)
{
	if (!range_sizek || (reg >= num_var_ranges))
		return reg;

	while (range_sizek) {
		unsigned long max_align, align;
		unsigned long sizek;
		/* Compute the maximum size I can make a range */
		if (range_startk)
			max_align = ffs(range_startk) - 1;
		else
			max_align = 32;
		align = fls(range_sizek) - 1;
		if (align > max_align)
			align = max_align;

		sizek = 1 << align;
		printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n",
			reg, range_startk >> 10, sizek >> 10,
			(type == MTRR_TYPE_UNCACHABLE)?"UC":
			    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
			);
		set_var_mtrr(reg++, range_startk, sizek, type, address_bits);
		range_startk += sizek;
		range_sizek -= sizek;
		if (reg >= num_var_ranges)
			break;
	}
	return reg;
}

static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek)
{
	unsigned long hole_basek, hole_sizek;
	unsigned long range0_basek, range0_sizek;
	unsigned long range_basek, range_sizek;
	unsigned long chunk_sizek;
	unsigned long gran_sizek;

	hole_basek = 0;
	hole_sizek = 0;
	chunk_sizek = state->chunk_sizek;
	gran_sizek = state->gran_sizek;

	/* align with gran size, prevent small block used up MTRRs */
	range_basek = ALIGN(state->range_startk, gran_sizek);
	if ((range_basek > basek) && basek)
		return;
	range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek);

	while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) {
		range_sizek -= gran_sizek;
		if (!range_sizek)
			return;
	}
	state->range_startk = range_basek;
	state->range_sizek = range_sizek;

	/* try to append some small hole */
	range0_basek = state->range_startk;
	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
	if ((range0_sizek == state->range_sizek) ||
	    ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) {
			printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10);
			state->reg = range_to_mtrr(state->reg, range0_basek,
				state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
		return;
	}


	range0_sizek -= chunk_sizek;
	printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10);
	state->reg = range_to_mtrr(state->reg, range0_basek,
			range0_sizek, MTRR_TYPE_WRBACK, state->address_bits);

	range_basek = range0_basek + range0_sizek;
	range_sizek = chunk_sizek;
	if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) {
		hole_sizek = range_sizek - (state->range_sizek - range0_sizek);
		hole_basek = range_basek + range_sizek - hole_sizek;
	} else
		range_sizek = state->range_sizek - range0_sizek;

	printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10);
	state->reg = range_to_mtrr(state->reg, range_basek,
			range_sizek, MTRR_TYPE_WRBACK, state->address_bits);
	if (hole_sizek) {
		printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10);
		state->reg = range_to_mtrr(state->reg, hole_basek,
				hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits);
	}
}

static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn)
{
	unsigned long basek, sizek;

	if (state->reg >= num_var_ranges)
		return;

	basek = base_pfn << (PAGE_SHIFT - 10);
	sizek = size_pfn << (PAGE_SHIFT - 10);

	/* See if I can merge with the last range */
	if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) {
		unsigned long endk = basek + sizek;
		state->range_sizek = endk - state->range_startk;
		return;
	}
	/* Write the range mtrrs */
	if (state->range_sizek != 0) {
		range_to_mtrr_with_hole(state, basek);

		state->range_startk = 0;
		state->range_sizek = 0;
	}
	/* Allocate an msr */
	state->range_startk = basek;
	state->range_sizek  = sizek;
}

/* mininum size of mtrr block that can take hole */
static u64 mtrr_chunk_size __initdata = (256ULL<<20);

static int __init parse_mtrr_chunk_size_opt(char *p)
{
	if (!p)
		return -EINVAL;
	mtrr_chunk_size = memparse(p, &p);
	return 0;
}
early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);

/* granity of mtrr of block */
static u64 mtrr_gran_size __initdata = (64ULL<<20);

static int __init parse_mtrr_gran_size_opt(char *p)
{
	if (!p)
		return -EINVAL;
	mtrr_gran_size = memparse(p, &p);
	return 0;
}
early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);

static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits)
{
	struct var_mtrr_state var_state;
	int i;

	var_state.range_startk = 0;
	var_state.range_sizek = 0;
	var_state.reg = 0;
	var_state.address_bits = address_bits;
	var_state.chunk_sizek = mtrr_chunk_size >> 10;
	var_state.gran_sizek = mtrr_gran_size >> 10;

	/* Write the range etc */
	for (i = 0; i < nr_range; i++)
		set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1);

	/* Write the last range */
	range_to_mtrr_with_hole(&var_state, 0);
	printk(KERN_INFO "DONE variable MTRRs\n");
	/* Clear out the extra MTRR's */
	while (var_state.reg < num_var_ranges)
		set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits);
}

static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size)
{
	unsigned long i, base, size;
	mtrr_type type;

	for (i = 0; i < num_var_ranges; i++) {
		mtrr_if->get(i, &base, &size, &type);
		if (type != MTRR_TYPE_WRBACK)
			continue;
		nr_range = add_range(range, nr_range, base, base + size - 1, 1);
	}
	printk(KERN_INFO "After WB checking\n");
	for (i = 0; i < nr_range; i++)
		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);

	/* take out UC ranges */
	for (i = 0; i < num_var_ranges; i++) {
		mtrr_if->get(i, &base, &size, &type);
		if (type != MTRR_TYPE_UNCACHABLE)
			continue;
		if (!size)
			continue;
		subtract_range(range, base, base + size - 1);
	}
	if (extra_remove_size)
		subtract_range(range, extra_remove_base,  extra_remove_base + extra_remove_size  - 1);

	/* get new range num */
	nr_range = 0;
	for (i = 0; i < RANGE_NUM; i++) {
		if (!range[i].end)
			continue;
		nr_range++;
	}
	printk(KERN_INFO "After UC checking\n");
	for (i = 0; i < nr_range; i++)
		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);

	/* sort the ranges */
	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
	printk(KERN_INFO "After sorting\n");
	for (i = 0; i < nr_range; i++)
		printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1);

	return nr_range;
}

static int __init mtrr_cleanup(unsigned address_bits)
{
	unsigned long i, base, size, def, dummy;
	mtrr_type type;
	struct res_range range[RANGE_NUM];
	int nr_range;
	unsigned long extra_remove_base, extra_remove_size;

	/* extra one for all 0 */
	int num[MTRR_NUM_TYPES + 1];

	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
		return 0;
	rdmsr(MTRRdefType_MSR, def, dummy);
	def &= 0xff;
	if (def != MTRR_TYPE_UNCACHABLE)
		return 0;

	/* check entries number */
	memset(num, 0, sizeof(num));
	for (i = 0; i < num_var_ranges; i++) {
		mtrr_if->get(i, &base, &size, &type);
		if (type >= MTRR_NUM_TYPES)
			continue;
		if (!size)
			type = MTRR_NUM_TYPES;
		num[type]++;
	}

	/* check if we got UC entries */
	if (!num[MTRR_TYPE_UNCACHABLE])
		return 0;

	/* check if we only had WB and UC */
	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
		num_var_ranges - num[MTRR_NUM_TYPES])
		return 0;

	memset(range, 0, sizeof(range));
	extra_remove_size = 0;
	if (mtrr_tom2) {
		extra_remove_base = 1 << (32 - PAGE_SHIFT);
		extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base;
	}
	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size);

	/* convert ranges to var ranges state */
	x86_setup_var_mtrrs(range, nr_range, address_bits);

	return 1;

}

static int disable_mtrr_trim;

static int __init disable_mtrr_trim_setup(char *str)
@@ -729,18 +1176,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 */
void __init mtrr_bp_init(void)
{
	u32 phys_addr;
	init_ifs();

	phys_addr = 32;

	if (cpu_has_mtrr) {
		mtrr_if = &generic_mtrr_ops;
		size_or_mask = 0xff000000;	/* 36 bits */
		size_and_mask = 0x00f00000;
		phys_addr = 36;

		/* This is an AMD specific MSR, but we assume(hope?) that
		   Intel will implement it to when they extend the address
		   bus of the Xeon. */
		if (cpuid_eax(0x80000000) >= 0x80000008) {
			u32 phys_addr;
			phys_addr = cpuid_eax(0x80000008) & 0xff;
			/* CPUID workaround for Intel 0F33/0F34 CPU */
			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1208,7 @@ void __init mtrr_bp_init(void)
			   don't support PAE */
			size_or_mask = 0xfff00000;	/* 32 bits */
			size_and_mask = 0;
			phys_addr = 32;
		}
	} else {
		switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1242,13 @@ void __init mtrr_bp_init(void)
	if (mtrr_if) {
		set_num_var_ranges();
		init_table();
		if (use_intel())
		if (use_intel()) {
			get_mtrr_state();

			if (mtrr_cleanup(phys_addr))
				mtrr_if->set_all();

		}
	}
}

@@ -829,9 +1285,10 @@ static int __init mtrr_init_finialize(void)
{
	if (!mtrr_if)
		return 0;
	if (use_intel())
	if (use_intel()) {
		if (enable_mtrr_cleanup < 1)
			mtrr_state_warn();
	else {
	} else {
		/* The CPUs haven't MTRR and seem to not support SMP. They have
		 * specific drivers, we use a tricky method to support
		 * suspend/resume for them.
+3 −0
Original line number Diff line number Diff line
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt);
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);

void fill_mtrr_var_range(unsigned int index,
		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
void get_mtrr_state(void);

extern void set_mtrr_ops(struct mtrr_ops * ops);
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
#define use_intel()	(mtrr_if && mtrr_if->use_intel_if == 1)

extern unsigned int num_var_ranges;
extern u64 mtrr_tom2;

void mtrr_state_warn(void);
const char *mtrr_attrib_to_str(int x);