Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6ec6e0d9 authored by Suresh Siddha's avatar Suresh Siddha Committed by Ingo Molnar
Browse files

srat, x86: add support for nodes spanning other nodes



For example, If the physical address layout on a two node system with 8 GB
memory is something like:
node 0: 0-2GB, 4-6GB
node 1: 2-4GB, 6-8GB

Current kernels fail to boot/detect this NUMA topology.

ACPI SRAT tables can expose such a topology which needs to be supported.

Signed-off-by: default avatarSuresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 8705a49c
Loading
Loading
Loading
Loading
+9 −0
Original line number Original line Diff line number Diff line
@@ -903,6 +903,15 @@ config X86_64_ACPI_NUMA
	help
	help
	  Enable ACPI SRAT based node topology detection.
	  Enable ACPI SRAT based node topology detection.


# Some NUMA nodes have memory ranges that span
# other nodes.  Even though a pfn is valid and
# between a node's start and end pfns, it may not
# reside on that node.  See memmap_init_zone()
# for details.
config NODES_SPAN_OTHER_NODES
	def_bool y
	depends on X86_64_ACPI_NUMA

config NUMA_EMU
config NUMA_EMU
	bool "NUMA emulation"
	bool "NUMA emulation"
	depends on X86_64 && NUMA
	depends on X86_64 && NUMA
+1 −1
Original line number Original line Diff line number Diff line
@@ -164,7 +164,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
	if (!found)
	if (!found)
		return -1;
		return -1;


	memnode_shift = compute_hash_shift(nodes, 8);
	memnode_shift = compute_hash_shift(nodes, 8, NULL);
	if (memnode_shift < 0) {
	if (memnode_shift < 0) {
		printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
		printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
		return -1;
		return -1;
+11 −5
Original line number Original line Diff line number Diff line
@@ -60,7 +60,7 @@ unsigned long __initdata nodemap_size;
 * -1 if node overlap or lost ram (shift too big)
 * -1 if node overlap or lost ram (shift too big)
 */
 */
static int __init populate_memnodemap(const struct bootnode *nodes,
static int __init populate_memnodemap(const struct bootnode *nodes,
				      int numnodes, int shift)
				      int numnodes, int shift, int *nodeids)
{
{
	unsigned long addr, end;
	unsigned long addr, end;
	int i, res = -1;
	int i, res = -1;
@@ -76,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
		do {
		do {
			if (memnodemap[addr >> shift] != NUMA_NO_NODE)
			if (memnodemap[addr >> shift] != NUMA_NO_NODE)
				return -1;
				return -1;

			if (!nodeids)
				memnodemap[addr >> shift] = i;
				memnodemap[addr >> shift] = i;
			else
				memnodemap[addr >> shift] = nodeids[i];

			addr += (1UL << shift);
			addr += (1UL << shift);
		} while (addr < end);
		} while (addr < end);
		res = 1;
		res = 1;
@@ -139,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
	return i;
	return i;
}
}


int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
			      int *nodeids)
{
{
	int shift;
	int shift;


@@ -149,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
	printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
	printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
		shift);
		shift);


	if (populate_memnodemap(nodes, numnodes, shift) != 1) {
	if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
		printk(KERN_INFO "Your memory is not aligned you need to "
		printk(KERN_INFO "Your memory is not aligned you need to "
		       "rebuild your kernel with a bigger NODEMAPSIZE "
		       "rebuild your kernel with a bigger NODEMAPSIZE "
		       "shift=%d\n", shift);
		       "shift=%d\n", shift);
@@ -462,7 +468,7 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
		}
		}
	}
	}
out:
out:
	memnode_shift = compute_hash_shift(nodes, num_nodes);
	memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
	if (memnode_shift < 0) {
	if (memnode_shift < 0) {
		memnode_shift = 0;
		memnode_shift = 0;
		printk(KERN_ERR "No NUMA hash function found.  NUMA emulation "
		printk(KERN_ERR "No NUMA hash function found.  NUMA emulation "
+21 −11
Original line number Original line Diff line number Diff line
@@ -32,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES];
static int found_add_area __initdata;
static int found_add_area __initdata;
int hotadd_percent __initdata = 0;
int hotadd_percent __initdata = 0;


static int num_node_memblks __initdata;
static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;

/* Too small nodes confuse the VM badly. Usually they result
/* Too small nodes confuse the VM badly. Usually they result
   from BIOS bugs. */
   from BIOS bugs. */
#define NODE_MIN_SIZE (4*1024*1024)
#define NODE_MIN_SIZE (4*1024*1024)
@@ -41,17 +45,17 @@ static __init int setup_node(int pxm)
	return acpi_map_pxm_to_node(pxm);
	return acpi_map_pxm_to_node(pxm);
}
}


static __init int conflicting_nodes(unsigned long start, unsigned long end)
static __init int conflicting_memblks(unsigned long start, unsigned long end)
{
{
	int i;
	int i;
	for_each_node_mask(i, nodes_parsed) {
	for (i = 0; i < num_node_memblks; i++) {
		struct bootnode *nd = &nodes[i];
		struct bootnode *nd = &node_memblk_range[i];
		if (nd->start == nd->end)
		if (nd->start == nd->end)
			continue;
			continue;
		if (nd->end > start && nd->start < end)
		if (nd->end > start && nd->start < end)
			return i;
			return memblk_nodeid[i];
		if (nd->end == end && nd->start == start)
		if (nd->end == end && nd->start == start)
			return i;
			return memblk_nodeid[i];
	}
	}
	return -1;
	return -1;
}
}
@@ -258,7 +262,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
		bad_srat();
		bad_srat();
		return;
		return;
	}
	}
	i = conflicting_nodes(start, end);
	i = conflicting_memblks(start, end);
	if (i == node) {
	if (i == node) {
		printk(KERN_WARNING
		printk(KERN_WARNING
		"SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
		"SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
@@ -283,10 +287,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
			nd->end = end;
			nd->end = end;
	}
	}


	printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
	       nd->start, nd->end);
	       start, end);
	e820_register_active_regions(node, nd->start >> PAGE_SHIFT,
	e820_register_active_regions(node, start >> PAGE_SHIFT,
						nd->end >> PAGE_SHIFT);
				     end >> PAGE_SHIFT);
	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
						nd->end >> PAGE_SHIFT);
						nd->end >> PAGE_SHIFT);


@@ -298,6 +302,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
		if ((nd->start | nd->end) == 0)
		if ((nd->start | nd->end) == 0)
			node_clear(node, nodes_parsed);
			node_clear(node, nodes_parsed);
	}
	}

	node_memblk_range[num_node_memblks].start = start;
	node_memblk_range[num_node_memblks].end = end;
	memblk_nodeid[num_node_memblks] = node;
	num_node_memblks++;
}
}


/* Sanity check to catch more bad SRATs (they are amazingly common).
/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -368,7 +377,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
		return -1;
		return -1;
	}
	}


	memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
	memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
					   memblk_nodeid);
	if (memnode_shift < 0) {
	if (memnode_shift < 0) {
		printk(KERN_ERR
		printk(KERN_ERR
		     "SRAT: No NUMA node hash function found. Contact maintainer\n");
		     "SRAT: No NUMA node hash function found. Contact maintainer\n");
+2 −1
Original line number Original line Diff line number Diff line
@@ -9,7 +9,8 @@ struct bootnode {
	u64 end;
	u64 end;
};
};


extern int compute_hash_shift(struct bootnode *nodes, int numnodes);
extern int compute_hash_shift(struct bootnode *nodes, int numblks,
			      int *nodeids);


#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))