Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ef396ec9 authored by Tejun Heo's avatar Tejun Heo
Browse files

x86-64, NUMA: Factor out memblk handling into numa_{add|register}_memblk()



Factor out memblk handling from srat_64.c into two functions in
numa_64.c.  This patch doesn't introduce any behavior change.  The
next patch will make all init methods use these functions.

- v2: Fixed build failure on 32bit due to misplaced NR_NODE_MEMBLKS.
      Reported by Ingo.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
parent 19095548
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -187,7 +187,6 @@ struct bootnode;
extern int acpi_numa;
extern int x86_acpi_numa_init(void);
extern int acpi_scan_nodes(void);
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)

#ifdef CONFIG_NUMA_EMU
extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
+3 −0
Original line number Diff line number Diff line
@@ -5,6 +5,9 @@
#include <asm/apicdef.h>

#ifdef CONFIG_NUMA

#define NR_NODE_MEMBLKS		(MAX_NUMNODES*2)

/*
 * __apicid_to_node[] stores the raw mapping between physical apicid and
 * node and is used to initialize cpu_to_node mapping.
+2 −0
Original line number Diff line number Diff line
@@ -32,6 +32,8 @@ extern nodemask_t mem_nodes_parsed __initdata;
extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata;

extern int __cpuinit numa_cpu_node(int cpu);
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
extern int __init numa_register_memblks(void);

#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
+109 −0
Original line number Diff line number Diff line
@@ -33,6 +33,10 @@ struct memnode memnode;
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;

static int num_node_memblks __initdata;
static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;

struct bootnode numa_nodes[MAX_NUMNODES] __initdata;

/*
@@ -184,6 +188,43 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
	return NULL;
}

static __init int conflicting_memblks(unsigned long start, unsigned long end)
{
	int i;
	for (i = 0; i < num_node_memblks; i++) {
		struct bootnode *nd = &node_memblk_range[i];
		if (nd->start == nd->end)
			continue;
		if (nd->end > start && nd->start < end)
			return memblk_nodeid[i];
		if (nd->end == end && nd->start == start)
			return memblk_nodeid[i];
	}
	return -1;
}

int __init numa_add_memblk(int nid, u64 start, u64 end)
{
	int i;

	i = conflicting_memblks(start, end);
	if (i == nid) {
		printk(KERN_WARNING "NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
		       nid, start, end, numa_nodes[i].start, numa_nodes[i].end);
	} else if (i >= 0) {
		printk(KERN_ERR "NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
		       nid, start, end, i,
		       numa_nodes[i].start, numa_nodes[i].end);
		return -EINVAL;
	}

	node_memblk_range[num_node_memblks].start = start;
	node_memblk_range[num_node_memblks].end = end;
	memblk_nodeid[num_node_memblks] = nid;
	num_node_memblks++;
	return 0;
}

static __init void cutoff_node(int i, unsigned long start, unsigned long end)
{
	struct bootnode *nd = &numa_nodes[i];
@@ -246,6 +287,71 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
	node_set_online(nodeid);
}

int __init numa_register_memblks(void)
{
	int i;

	/*
	 * Join together blocks on the same node, holes between
	 * which don't overlap with memory on other nodes.
	 */
	for (i = 0; i < num_node_memblks; ++i) {
		int j, k;

		for (j = i + 1; j < num_node_memblks; ++j) {
			unsigned long start, end;

			if (memblk_nodeid[i] != memblk_nodeid[j])
				continue;
			start = min(node_memblk_range[i].end,
			            node_memblk_range[j].end);
			end = max(node_memblk_range[i].start,
			          node_memblk_range[j].start);
			for (k = 0; k < num_node_memblks; ++k) {
				if (memblk_nodeid[i] == memblk_nodeid[k])
					continue;
				if (start < node_memblk_range[k].end &&
				    end > node_memblk_range[k].start)
					break;
			}
			if (k < num_node_memblks)
				continue;
			start = min(node_memblk_range[i].start,
			            node_memblk_range[j].start);
			end = max(node_memblk_range[i].end,
			          node_memblk_range[j].end);
			printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
			       memblk_nodeid[i],
			       node_memblk_range[i].start,
			       node_memblk_range[i].end,
			       node_memblk_range[j].start,
			       node_memblk_range[j].end,
			       start, end);
			node_memblk_range[i].start = start;
			node_memblk_range[i].end = end;
			k = --num_node_memblks - j;
			memmove(memblk_nodeid + j, memblk_nodeid + j+1,
				k * sizeof(*memblk_nodeid));
			memmove(node_memblk_range + j, node_memblk_range + j+1,
				k * sizeof(*node_memblk_range));
			--j;
		}
	}

	memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
					   memblk_nodeid);
	if (memnode_shift < 0) {
		printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
		return -EINVAL;
	}

	for (i = 0; i < num_node_memblks; i++)
		memblock_x86_register_active_regions(memblk_nodeid[i],
				node_memblk_range[i].start >> PAGE_SHIFT,
				node_memblk_range[i].end >> PAGE_SHIFT);
	return 0;
}

#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
static struct bootnode nodes[MAX_NUMNODES] __initdata;
@@ -653,6 +759,9 @@ void __init initmem_init(void)
		nodes_clear(mem_nodes_parsed);
		nodes_clear(node_possible_map);
		nodes_clear(node_online_map);
		num_node_memblks = 0;
		memset(node_memblk_range, 0, sizeof(node_memblk_range));
		memset(memblk_nodeid, 0, sizeof(memblk_nodeid));
		memset(numa_nodes, 0, sizeof(numa_nodes));

		if (numa_init[i]() < 0)
+3 −93
Original line number Diff line number Diff line
@@ -30,30 +30,11 @@ static struct acpi_table_slit *acpi_slit;

static struct bootnode nodes_add[MAX_NUMNODES];

static int num_node_memblks __initdata;
static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;

static __init int setup_node(int pxm)
{
	return acpi_map_pxm_to_node(pxm);
}

static __init int conflicting_memblks(unsigned long start, unsigned long end)
{
	int i;
	for (i = 0; i < num_node_memblks; i++) {
		struct bootnode *nd = &node_memblk_range[i];
		if (nd->start == nd->end)
			continue;
		if (nd->end > start && nd->start < end)
			return memblk_nodeid[i];
		if (nd->end == end && nd->start == start)
			return memblk_nodeid[i];
	}
	return -1;
}

static __init void bad_srat(void)
{
	int i;
@@ -233,7 +214,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
	struct bootnode *nd;
	unsigned long start, end;
	int node, pxm;
	int i;

	if (srat_disabled())
		return;
@@ -255,16 +235,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
		bad_srat();
		return;
	}
	i = conflicting_memblks(start, end);
	if (i == node) {
		printk(KERN_WARNING
		"SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
		       pxm, start, end, numa_nodes[i].start, numa_nodes[i].end);
	} else if (i >= 0) {
		printk(KERN_ERR
		       "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
		       pxm, start, end, node_to_pxm(i),
		       numa_nodes[i].start, numa_nodes[i].end);

	if (numa_add_memblk(node, start, end) < 0) {
		bad_srat();
		return;
	}
@@ -285,11 +257,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
		}
	} else
		update_nodes_add(node, start, end);

	node_memblk_range[num_node_memblks].start = start;
	node_memblk_range[num_node_memblks].end = end;
	memblk_nodeid[num_node_memblks] = node;
	num_node_memblks++;
}

/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -341,68 +308,11 @@ int __init acpi_scan_nodes(void)
	if (acpi_numa <= 0)
		return -1;

	/*
	 * Join together blocks on the same node, holes between
	 * which don't overlap with memory on other nodes.
	 */
	for (i = 0; i < num_node_memblks; ++i) {
		int j, k;

		for (j = i + 1; j < num_node_memblks; ++j) {
			unsigned long start, end;

			if (memblk_nodeid[i] != memblk_nodeid[j])
				continue;
			start = min(node_memblk_range[i].end,
			            node_memblk_range[j].end);
			end = max(node_memblk_range[i].start,
			          node_memblk_range[j].start);
			for (k = 0; k < num_node_memblks; ++k) {
				if (memblk_nodeid[i] == memblk_nodeid[k])
					continue;
				if (start < node_memblk_range[k].end &&
				    end > node_memblk_range[k].start)
					break;
			}
			if (k < num_node_memblks)
				continue;
			start = min(node_memblk_range[i].start,
			            node_memblk_range[j].start);
			end = max(node_memblk_range[i].end,
			          node_memblk_range[j].end);
			printk(KERN_INFO "SRAT: Node %d "
			       "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
			       memblk_nodeid[i],
			       node_memblk_range[i].start,
			       node_memblk_range[i].end,
			       node_memblk_range[j].start,
			       node_memblk_range[j].end,
			       start, end);
			node_memblk_range[i].start = start;
			node_memblk_range[i].end = end;
			k = --num_node_memblks - j;
			memmove(memblk_nodeid + j, memblk_nodeid + j+1,
				k * sizeof(*memblk_nodeid));
			memmove(node_memblk_range + j, node_memblk_range + j+1,
				k * sizeof(*node_memblk_range));
			--j;
		}
	}

	memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
					   memblk_nodeid);
	if (memnode_shift < 0) {
		printk(KERN_ERR
		     "SRAT: No NUMA node hash function found. Contact maintainer\n");
	if (numa_register_memblks() < 0) {
		bad_srat();
		return -1;
	}

	for (i = 0; i < num_node_memblks; i++)
		memblock_x86_register_active_regions(memblk_nodeid[i],
				node_memblk_range[i].start >> PAGE_SHIFT,
				node_memblk_range[i].end >> PAGE_SHIFT);

	/* for out of order entries in SRAT */
	sort_node_map();
	if (!nodes_cover_memory(numa_nodes)) {