Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9adcc4a1 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge branch 'x86/numa' into x86/urgent



Merge reason: Topic is ready for upstream.

Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents 30285c6f d906f0eb
Loading
Loading
Loading
Loading
+5 −6
Original line number Original line Diff line number Diff line
@@ -185,17 +185,16 @@ struct bootnode;


#ifdef CONFIG_ACPI_NUMA
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
extern int acpi_numa;
extern int acpi_get_nodes(struct bootnode *physnodes);
extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
				unsigned long end);
extern int acpi_scan_nodes(unsigned long start, unsigned long end);
extern int acpi_scan_nodes(unsigned long start, unsigned long end);
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)

#ifdef CONFIG_NUMA_EMU
extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
				   int num_nodes);
				   int num_nodes);
#else
static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
				   int num_nodes)
{
}
#endif
#endif
#endif /* CONFIG_ACPI_NUMA */


#define acpi_unlazy_tlb(x)	leave_mm(x)
#define acpi_unlazy_tlb(x)	leave_mm(x)


+5 −1
Original line number Original line Diff line number Diff line
@@ -9,10 +9,14 @@ struct bootnode;
extern int early_is_amd_nb(u32 value);
extern int early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
extern void amd_flush_garts(void);
extern int amd_get_nodes(struct bootnode *nodes);
extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int amd_scan_nodes(void);
extern int amd_scan_nodes(void);


#ifdef CONFIG_NUMA_EMU
extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
extern void amd_get_nodes(struct bootnode *nodes);
#endif

struct amd_northbridge {
struct amd_northbridge {
	struct pci_dev *misc;
	struct pci_dev *misc;
};
};
+1 −1
Original line number Original line Diff line number Diff line
@@ -38,7 +38,7 @@ extern void __cpuinit numa_add_cpu(int cpu);
extern void __cpuinit numa_remove_cpu(int cpu);
extern void __cpuinit numa_remove_cpu(int cpu);


#ifdef CONFIG_NUMA_EMU
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE	((u64)64 << 20)
#define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
#define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
#endif /* CONFIG_NUMA_EMU */
#endif /* CONFIG_NUMA_EMU */
#else
#else
+72 −14
Original line number Original line Diff line number Diff line
@@ -27,6 +27,7 @@
#include <asm/amd_nb.h>
#include <asm/amd_nb.h>


static struct bootnode __initdata nodes[8];
static struct bootnode __initdata nodes[8];
static unsigned char __initdata nodeids[8];
static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;


static __init int find_northbridge(void)
static __init int find_northbridge(void)
@@ -68,19 +69,6 @@ static __init void early_get_boot_cpu_id(void)
#endif
#endif
}
}


int __init amd_get_nodes(struct bootnode *physnodes)
{
	int i;
	int ret = 0;

	for_each_node_mask(i, nodes_parsed) {
		physnodes[ret].start = nodes[i].start;
		physnodes[ret].end = nodes[i].end;
		ret++;
	}
	return ret;
}

int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
{
{
	unsigned long start = PFN_PHYS(start_pfn);
	unsigned long start = PFN_PHYS(start_pfn);
@@ -113,7 +101,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
		base = read_pci_config(0, nb, 1, 0x40 + i*8);
		base = read_pci_config(0, nb, 1, 0x40 + i*8);
		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
		limit = read_pci_config(0, nb, 1, 0x44 + i*8);


		nodeid = limit & 7;
		nodeids[i] = nodeid = limit & 7;
		if ((base & 3) == 0) {
		if ((base & 3) == 0) {
			if (i < numnodes)
			if (i < numnodes)
				pr_info("Skipping disabled node %d\n", i);
				pr_info("Skipping disabled node %d\n", i);
@@ -193,6 +181,76 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
	return 0;
	return 0;
}
}


#ifdef CONFIG_NUMA_EMU
static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};

void __init amd_get_nodes(struct bootnode *physnodes)
{
	int i;

	for_each_node_mask(i, nodes_parsed) {
		physnodes[i].start = nodes[i].start;
		physnodes[i].end = nodes[i].end;
	}
}

static int __init find_node_by_addr(unsigned long addr)
{
	int ret = NUMA_NO_NODE;
	int i;

	for (i = 0; i < 8; i++)
		if (addr >= nodes[i].start && addr < nodes[i].end) {
			ret = i;
			break;
		}
	return ret;
}

/*
 * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
 * setup to represent the physical topology but reflect the emulated
 * environment.  For each emulated node, the real node which it appears on is
 * found and a fake pxm to nid mapping is created which mirrors the actual
 * locality.  node_distance() then represents the correct distances between
 * emulated nodes by using the fake acpi mappings to pxms.
 */
void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
{
	unsigned int bits;
	unsigned int cores;
	unsigned int apicid_base = 0;
	int i;

	bits = boot_cpu_data.x86_coreid_bits;
	cores = 1 << bits;
	early_get_boot_cpu_id();
	if (boot_cpu_physical_apicid > 0)
		apicid_base = boot_cpu_physical_apicid;

	for (i = 0; i < nr_nodes; i++) {
		int index;
		int nid;
		int j;

		nid = find_node_by_addr(nodes[i].start);
		if (nid == NUMA_NO_NODE)
			continue;

		index = nodeids[nid] << bits;
		if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
			for (j = apicid_base; j < cores + apicid_base; j++)
				fake_apicid_to_node[index + j] = i;
#ifdef CONFIG_ACPI_NUMA
		__acpi_map_pxm_to_node(nid, i);
#endif
	}
	memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
}
#endif /* CONFIG_NUMA_EMU */

int __init amd_scan_nodes(void)
int __init amd_scan_nodes(void)
{
{
	unsigned int bits;
	unsigned int bits;
+124 −33
Original line number Original line Diff line number Diff line
@@ -260,30 +260,30 @@ void __init numa_init_array(void)
#ifdef CONFIG_NUMA_EMU
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
/* Numa emulation */
static struct bootnode nodes[MAX_NUMNODES] __initdata;
static struct bootnode nodes[MAX_NUMNODES] __initdata;
static struct bootnode physnodes[MAX_NUMNODES] __initdata;
static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
static char *cmdline __initdata;
static char *cmdline __initdata;


static int __init setup_physnodes(unsigned long start, unsigned long end,
static int __init setup_physnodes(unsigned long start, unsigned long end,
					int acpi, int amd)
					int acpi, int amd)
{
{
	int nr_nodes = 0;
	int ret = 0;
	int ret = 0;
	int i;
	int i;


	memset(physnodes, 0, sizeof(physnodes));
#ifdef CONFIG_ACPI_NUMA
#ifdef CONFIG_ACPI_NUMA
	if (acpi)
	if (acpi)
		nr_nodes = acpi_get_nodes(physnodes);
		acpi_get_nodes(physnodes, start, end);
#endif
#endif
#ifdef CONFIG_AMD_NUMA
#ifdef CONFIG_AMD_NUMA
	if (amd)
	if (amd)
		nr_nodes = amd_get_nodes(physnodes);
		amd_get_nodes(physnodes);
#endif
#endif
	/*
	/*
	 * Basic sanity checking on the physical node map: there may be errors
	 * Basic sanity checking on the physical node map: there may be errors
	 * if the SRAT or AMD code incorrectly reported the topology or the mem=
	 * if the SRAT or AMD code incorrectly reported the topology or the mem=
	 * kernel parameter is used.
	 * kernel parameter is used.
	 */
	 */
	for (i = 0; i < nr_nodes; i++) {
	for (i = 0; i < MAX_NUMNODES; i++) {
		if (physnodes[i].start == physnodes[i].end)
		if (physnodes[i].start == physnodes[i].end)
			continue;
			continue;
		if (physnodes[i].start > end) {
		if (physnodes[i].start > end) {
@@ -298,17 +298,6 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
			physnodes[i].start = start;
			physnodes[i].start = start;
		if (physnodes[i].end > end)
		if (physnodes[i].end > end)
			physnodes[i].end = end;
			physnodes[i].end = end;
	}

	/*
	 * Remove all nodes that have no memory or were truncated because of the
	 * limited address range.
	 */
	for (i = 0; i < nr_nodes; i++) {
		if (physnodes[i].start == physnodes[i].end)
			continue;
		physnodes[ret].start = physnodes[i].start;
		physnodes[ret].end = physnodes[i].end;
		ret++;
		ret++;
	}
	}


@@ -324,6 +313,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
	return ret;
	return ret;
}
}


static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
{
	int i;

	BUG_ON(acpi && amd);
#ifdef CONFIG_ACPI_NUMA
	if (acpi)
		acpi_fake_nodes(nodes, nr_nodes);
#endif
#ifdef CONFIG_AMD_NUMA
	if (amd)
		amd_fake_nodes(nodes, nr_nodes);
#endif
	if (!acpi && !amd)
		for (i = 0; i < nr_cpu_ids; i++)
			numa_set_node(i, 0);
}

/*
/*
 * Setups up nid to range from addr to addr + size.  If the end
 * Setups up nid to range from addr to addr + size.  If the end
 * boundary is greater than max_addr, then max_addr is used instead.
 * boundary is greater than max_addr, then max_addr is used instead.
@@ -352,8 +359,7 @@ static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
 * to max_addr.  The return value is the number of nodes allocated.
 * to max_addr.  The return value is the number of nodes allocated.
 */
 */
static int __init split_nodes_interleave(u64 addr, u64 max_addr,
static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
						int nr_phys_nodes, int nr_nodes)
{
{
	nodemask_t physnode_mask = NODE_MASK_NONE;
	nodemask_t physnode_mask = NODE_MASK_NONE;
	u64 size;
	u64 size;
@@ -384,7 +390,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
		return -1;
		return -1;
	}
	}


	for (i = 0; i < nr_phys_nodes; i++)
	for (i = 0; i < MAX_NUMNODES; i++)
		if (physnodes[i].start != physnodes[i].end)
		if (physnodes[i].start != physnodes[i].end)
			node_set(i, physnode_mask);
			node_set(i, physnode_mask);


@@ -553,11 +559,9 @@ static int __init numa_emulation(unsigned long start_pfn,
{
{
	u64 addr = start_pfn << PAGE_SHIFT;
	u64 addr = start_pfn << PAGE_SHIFT;
	u64 max_addr = last_pfn << PAGE_SHIFT;
	u64 max_addr = last_pfn << PAGE_SHIFT;
	int num_phys_nodes;
	int num_nodes;
	int num_nodes;
	int i;
	int i;


	num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);
	/*
	/*
	 * If the numa=fake command-line contains a 'M' or 'G', it represents
	 * If the numa=fake command-line contains a 'M' or 'G', it represents
	 * the fixed node size.  Otherwise, if it is just a single number N,
	 * the fixed node size.  Otherwise, if it is just a single number N,
@@ -572,7 +576,7 @@ static int __init numa_emulation(unsigned long start_pfn,
		unsigned long n;
		unsigned long n;


		n = simple_strtoul(cmdline, NULL, 0);
		n = simple_strtoul(cmdline, NULL, 0);
		num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
		num_nodes = split_nodes_interleave(addr, max_addr, n);
	}
	}


	if (num_nodes < 0)
	if (num_nodes < 0)
@@ -595,7 +599,8 @@ static int __init numa_emulation(unsigned long start_pfn,
						nodes[i].end >> PAGE_SHIFT);
						nodes[i].end >> PAGE_SHIFT);
		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
	}
	}
	acpi_fake_nodes(nodes, num_nodes);
	setup_physnodes(addr, max_addr, acpi, amd);
	fake_physnodes(acpi, amd, num_nodes);
	numa_init_array();
	numa_init_array();
	return 0;
	return 0;
}
}
@@ -610,8 +615,12 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
	nodes_clear(node_online_map);
	nodes_clear(node_online_map);


#ifdef CONFIG_NUMA_EMU
#ifdef CONFIG_NUMA_EMU
	setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
			acpi, amd);
	if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
	if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
		return;
		return;
	setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
			acpi, amd);
	nodes_clear(node_possible_map);
	nodes_clear(node_possible_map);
	nodes_clear(node_online_map);
	nodes_clear(node_online_map);
#endif
#endif
@@ -767,6 +776,7 @@ void __cpuinit numa_clear_node(int cpu)


#ifndef CONFIG_DEBUG_PER_CPU_MAPS
#ifndef CONFIG_DEBUG_PER_CPU_MAPS


#ifndef CONFIG_NUMA_EMU
void __cpuinit numa_add_cpu(int cpu)
void __cpuinit numa_add_cpu(int cpu)
{
{
	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
@@ -776,34 +786,115 @@ void __cpuinit numa_remove_cpu(int cpu)
{
{
	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
}
#else
void __cpuinit numa_add_cpu(int cpu)
{
	unsigned long addr;
	u16 apicid;
	int physnid;
	int nid = NUMA_NO_NODE;


#else /* CONFIG_DEBUG_PER_CPU_MAPS */
	apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
	if (apicid != BAD_APICID)
		nid = apicid_to_node[apicid];
	if (nid == NUMA_NO_NODE)
		nid = early_cpu_to_node(cpu);
	BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));


	/*
	/*
 * --------- debug versions of the numa functions ---------
	 * Use the starting address of the emulated node to find which physical
	 * node it is allocated on.
	 */
	 */
static void __cpuinit numa_set_cpumask(int cpu, int enable)
	addr = node_start_pfn(nid) << PAGE_SHIFT;
	for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
		if (addr >= physnodes[physnid].start &&
		    addr < physnodes[physnid].end)
			break;

	/*
	 * Map the cpu to each emulated node that is allocated on the physical
	 * node of the cpu's apic id.
	 */
	for_each_online_node(nid) {
		addr = node_start_pfn(nid) << PAGE_SHIFT;
		if (addr >= physnodes[physnid].start &&
		    addr < physnodes[physnid].end)
			cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
	}
}

void __cpuinit numa_remove_cpu(int cpu)
{
	int i;

	for_each_online_node(i)
		cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
}
#endif /* !CONFIG_NUMA_EMU */

#else /* CONFIG_DEBUG_PER_CPU_MAPS */
static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
{
{
	int node = early_cpu_to_node(cpu);
	int node = early_cpu_to_node(cpu);
	struct cpumask *mask;
	struct cpumask *mask;
	char buf[64];
	char buf[64];


	mask = node_to_cpumask_map[node];
	mask = node_to_cpumask_map[node];
	if (mask == NULL) {
	if (!mask) {
		printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
		pr_err("node_to_cpumask_map[%i] NULL\n", node);
		dump_stack();
		dump_stack();
		return;
		return NULL;
	}
	}


	cpulist_scnprintf(buf, sizeof(buf), mask);
	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
		enable ? "numa_add_cpu" : "numa_remove_cpu",
		cpu, node, buf);
	return mask;
}

/*
 * --------- debug versions of the numa functions ---------
 */
#ifndef CONFIG_NUMA_EMU
static void __cpuinit numa_set_cpumask(int cpu, int enable)
{
	struct cpumask *mask;

	mask = debug_cpumask_set_cpu(cpu, enable);
	if (!mask)
		return;

	if (enable)
	if (enable)
		cpumask_set_cpu(cpu, mask);
		cpumask_set_cpu(cpu, mask);
	else
	else
		cpumask_clear_cpu(cpu, mask);
		cpumask_clear_cpu(cpu, mask);
}
#else
static void __cpuinit numa_set_cpumask(int cpu, int enable)
{
	int node = early_cpu_to_node(cpu);
	struct cpumask *mask;
	int i;


	cpulist_scnprintf(buf, sizeof(buf), mask);
	for_each_online_node(i) {
	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
		unsigned long addr;
		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);

		addr = node_start_pfn(i) << PAGE_SHIFT;
		if (addr < physnodes[node].start ||
					addr >= physnodes[node].end)
			continue;
		mask = debug_cpumask_set_cpu(cpu, enable);
		if (!mask)
			return;

		if (enable)
			cpumask_set_cpu(cpu, mask);
		else
			cpumask_clear_cpu(cpu, mask);
	}
	}
}
#endif /* CONFIG_NUMA_EMU */


void __cpuinit numa_add_cpu(int cpu)
void __cpuinit numa_add_cpu(int cpu)
{
{
Loading