Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 91556237 authored by Tejun Heo's avatar Tejun Heo
Browse files

x86-64, NUMA: Kill numa_nodes[]



numa_nodes[] doesn't carry any information which isn't present in
numa_meminfo.  Each entry is simply min/max range of all the memblks
for the node.  This is not only redundant but also inaccurate when
memblks for different nodes interleave - for example,
find_node_by_addr() can return the wrong nodeid.

Kill numa_nodes[] and always use numa_meminfo instead.

* nodes_cover_memory() is renamed to numa_meminfo_cover_memory() and
  now operations on numa_meminfo and returns bool.

* setup_node_bootmem() needs min/max range.  Compute the range on the
  fly.  setup_node_bootmem() invocation is restructured to use outer
  loop instead of hardcoding the double invocations.

* find_node_by_addr() now operates on numa_meminfo.

* setup_physnodes() builds physnodes[] from memblks.  This will go
  away when emulation code is updated to use struct numa_meminfo.

This patch also makes the following misc changes.

* Clearing of nodes_add[] clearing is converted to memset().

* numa_add_memblk() in amd_numa_init() is moved down a bit for
  consistency.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
parent a844ef46
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -26,7 +26,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,

extern nodemask_t cpu_nodes_parsed __initdata;
extern nodemask_t mem_nodes_parsed __initdata;
extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata;

extern int __cpuinit numa_cpu_node(int cpu);
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+1 −5
Original line number Diff line number Diff line
@@ -165,12 +165,8 @@ int __init amd_numa_init(void)
		pr_info("Node %d MemBase %016lx Limit %016lx\n",
			nodeid, base, limit);

		numa_nodes[nodeid].start = base;
		numa_nodes[nodeid].end = limit;
		numa_add_memblk(nodeid, base, limit);

		prevbase = base;

		numa_add_memblk(nodeid, base, limit);
		node_set(nodeid, mem_nodes_parsed);
		node_set(nodeid, cpu_nodes_parsed);
	}
+48 −34
Original line number Diff line number Diff line
@@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size;

static struct numa_meminfo numa_meminfo __initdata;

struct bootnode numa_nodes[MAX_NUMNODES] __initdata;

/*
 * Given a shift value, try to populate memnodemap[]
 * Returns :
@@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 * Sanity check to catch more bad NUMA configurations (they are amazingly
 * common).  Make sure the nodes cover all memory.
 */
static int __init nodes_cover_memory(const struct bootnode *nodes)
static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
{
	unsigned long numaram, e820ram;
	int i;

	numaram = 0;
	for_each_node_mask(i, mem_nodes_parsed) {
		unsigned long s = nodes[i].start >> PAGE_SHIFT;
		unsigned long e = nodes[i].end >> PAGE_SHIFT;
	for (i = 0; i < mi->nr_blks; i++) {
		unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
		unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
		numaram += e - s;
		numaram -= __absent_pages_in_range(i, s, e);
		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
		if ((long)numaram < 0)
			numaram = 0;
	}
@@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
		printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
		       (numaram << PAGE_SHIFT) >> 20,
		       (e820ram << PAGE_SHIFT) >> 20);
		return 0;
		return false;
	}
	return 1;
	return true;
}

static int __init numa_register_memblks(struct numa_meminfo *mi)
{
	int i;
	int i, j, nid;

	/* Account for nodes with cpus and no memory */
	nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed);
@@ -398,23 +396,34 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)

	/* for out of order entries */
	sort_node_map();
	if (!nodes_cover_memory(numa_nodes))
	if (!numa_meminfo_cover_memory(mi))
		return -EINVAL;

	init_memory_mapping_high();

	/* Finally register nodes. */
	for_each_node_mask(i, node_possible_map)
		setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);

	/*
	 * Try again in case setup_node_bootmem missed one due to missing
	 * bootmem.
	 * Finally register nodes.  Do it twice in case setup_node_bootmem
	 * missed one due to missing bootmem.
	 */
	for_each_node_mask(i, node_possible_map)
		if (!node_online(i))
			setup_node_bootmem(i, numa_nodes[i].start,
					   numa_nodes[i].end);
	for (i = 0; i < 2; i++) {
		for_each_node_mask(nid, node_possible_map) {
			u64 start = (u64)max_pfn << PAGE_SHIFT;
			u64 end = 0;

			if (node_online(nid))
				continue;

			for (j = 0; j < mi->nr_blks; j++) {
				if (nid != mi->blk[j].nid)
					continue;
				start = min(mi->blk[j].start, start);
				end = max(mi->blk[j].end, end);
			}

			if (start < end)
				setup_node_bootmem(nid, start, end);
		}
	}

	return 0;
}
@@ -432,33 +441,41 @@ void __init numa_emu_cmdline(char *str)

int __init find_node_by_addr(unsigned long addr)
{
	int ret = NUMA_NO_NODE;
	const struct numa_meminfo *mi = &numa_meminfo;
	int i;

	for_each_node_mask(i, mem_nodes_parsed) {
	for (i = 0; i < mi->nr_blks; i++) {
		/*
		 * Find the real node that this emulated node appears on.  For
		 * the sake of simplicity, we only use a real node's starting
		 * address to determine which emulated node it appears on.
		 */
		if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) {
			ret = i;
			break;
		}
		if (addr >= mi->blk[i].start && addr < mi->blk[i].end)
			return mi->blk[i].nid;
	}
	return ret;
	return NUMA_NO_NODE;
}

static int __init setup_physnodes(unsigned long start, unsigned long end)
{
	const struct numa_meminfo *mi = &numa_meminfo;
	int ret = 0;
	int i;

	memset(physnodes, 0, sizeof(physnodes));

	for_each_node_mask(i, mem_nodes_parsed) {
		physnodes[i].start = numa_nodes[i].start;
		physnodes[i].end = numa_nodes[i].end;
	for (i = 0; i < mi->nr_blks; i++) {
		int nid = mi->blk[i].nid;

		if (physnodes[nid].start == physnodes[nid].end) {
			physnodes[nid].start = mi->blk[i].start;
			physnodes[nid].end = mi->blk[i].end;
		} else {
			physnodes[nid].start = min(physnodes[nid].start,
						   mi->blk[i].start);
			physnodes[nid].end = max(physnodes[nid].end,
						 mi->blk[i].end);
		}
	}

	/*
@@ -809,8 +826,6 @@ static int dummy_numa_init(void)
	node_set(0, cpu_nodes_parsed);
	node_set(0, mem_nodes_parsed);
	numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
	numa_nodes[0].start = 0;
	numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT;

	return 0;
}
@@ -841,7 +856,6 @@ void __init initmem_init(void)
		nodes_clear(node_possible_map);
		nodes_clear(node_online_map);
		memset(&numa_meminfo, 0, sizeof(numa_meminfo));
		memset(numa_nodes, 0, sizeof(numa_nodes));
		remove_all_active_ranges();

		if (numa_init[i]() < 0)
+4 −18
Original line number Diff line number Diff line
@@ -37,13 +37,9 @@ static __init int setup_node(int pxm)

static __init void bad_srat(void)
{
	int i;
	printk(KERN_ERR "SRAT: SRAT not used.\n");
	acpi_numa = -1;
	for (i = 0; i < MAX_NUMNODES; i++) {
		numa_nodes[i].start = numa_nodes[i].end = 0;
		nodes_add[i].start = nodes_add[i].end = 0;
	}
	memset(nodes_add, 0, sizeof(nodes_add));
}

static __init inline int srat_disabled(void)
@@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
	struct bootnode *nd;
	unsigned long start, end;
	int node, pxm;

@@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
	       start, end);

	if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
		nd = &numa_nodes[node];
		if (!node_test_and_set(node, mem_nodes_parsed)) {
			nd->start = start;
			nd->end = end;
		} else {
			if (start < nd->start)
				nd->start = start;
			if (nd->end < end)
				nd->end = end;
		}
	} else
	if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE))
		node_set(node, mem_nodes_parsed);
	else
		update_nodes_add(node, start, end);
}