Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c88aea7a authored by Tejun Heo's avatar Tejun Heo
Browse files

x86-64, NUMA: Make emulation code build numa_meminfo and share the registration path



NUMA emulation code built nodes[] array and had its own registration
path to set up the emulated nodes.  Update it such that it generates
emulated numa_meminfo and returns control to initmem_init() and shares
the same registration path with non-emulated cases.

Because {acpi|amd}_fake_nodes() expect nodes[] parameter,
fake_physnodes() now generates nodes[] from numa_meminfo.  This will
go away with further updates.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
parent 9d073cae
Loading
Loading
Loading
Loading
+86 −87
Original line number Original line Diff line number Diff line
@@ -541,7 +541,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)


#ifdef CONFIG_NUMA_EMU
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
/* Numa emulation */
static struct bootnode nodes[MAX_NUMNODES] __initdata;
static struct bootnode physnodes[MAX_NUMNODES] __initdata;
static struct bootnode physnodes[MAX_NUMNODES] __initdata;


static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
@@ -626,9 +625,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end)
	return ret;
	return ret;
}
}


static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
static void __init fake_physnodes(int acpi, int amd,
				  const struct numa_meminfo *ei)
{
{
	int i;
	static struct bootnode nodes[MAX_NUMNODES] __initdata;
	int i, nr_nodes = 0;

	for (i = 0; i < ei->nr_blks; i++) {
		int nid = ei->blk[i].nid;

		if (nodes[nid].start == nodes[nid].end) {
			nodes[nid].start = ei->blk[i].start;
			nodes[nid].end = ei->blk[i].end;
			nr_nodes++;
		} else {
			nodes[nid].start = min(ei->blk[i].start, nodes[nid].start);
			nodes[nid].end = max(ei->blk[i].end, nodes[nid].end);
		}
	}


	BUG_ON(acpi && amd);
	BUG_ON(acpi && amd);
#ifdef CONFIG_ACPI_NUMA
#ifdef CONFIG_ACPI_NUMA
@@ -645,45 +659,44 @@ static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
}
}


/*
/*
 * Setups up nid to range from addr to addr + size.  If the end
 * Sets up nid to range from @start to @end.  The return value is -errno if
 * boundary is greater than max_addr, then max_addr is used instead.
 * something went wrong, 0 otherwise.
 * The return value is 0 if there is additional memory left for
 * allocation past addr and -1 otherwise.  addr is adjusted to be at
 * the end of the node.
 */
 */
static int __init setup_node_range(int nid, int physnid,
static int __init emu_setup_memblk(struct numa_meminfo *ei,
				   u64 *addr, u64 size, u64 max_addr)
				   int nid, int physnid, u64 start, u64 end)
{
{
	int ret = 0;
	struct numa_memblk *eb = &ei->blk[ei->nr_blks];
	nodes[nid].start = *addr;

	*addr += size;
	if (ei->nr_blks >= NR_NODE_MEMBLKS) {
	if (*addr >= max_addr) {
		pr_err("NUMA: Too many emulated memblks, failing emulation\n");
		*addr = max_addr;
		return -EINVAL;
		ret = -1;
	}
	}
	nodes[nid].end = *addr;

	node_set(nid, node_possible_map);
	ei->nr_blks++;
	eb->start = start;
	eb->end = end;
	eb->nid = nid;


	if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
	if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
		emu_nid_to_phys[nid] = physnid;
		emu_nid_to_phys[nid] = physnid;


	printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
	printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
	       nodes[nid].start, nodes[nid].end,
	       eb->start, eb->end, (eb->end - eb->start) >> 20);
	       (nodes[nid].end - nodes[nid].start) >> 20);
	return 0;
	return ret;
}
}


/*
/*
 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
 * to max_addr.  The return value is the number of nodes allocated.
 * to max_addr.  The return value is the number of nodes allocated.
 */
 */
static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
static int __init split_nodes_interleave(struct numa_meminfo *ei,
					 u64 addr, u64 max_addr, int nr_nodes)
{
{
	nodemask_t physnode_mask = NODE_MASK_NONE;
	nodemask_t physnode_mask = NODE_MASK_NONE;
	u64 size;
	u64 size;
	int big;
	int big;
	int ret = 0;
	int nid = 0;
	int i;
	int i, ret;


	if (nr_nodes <= 0)
	if (nr_nodes <= 0)
		return -1;
		return -1;
@@ -721,7 +734,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
			u64 end = physnodes[i].start + size;
			u64 end = physnodes[i].start + size;
			u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
			u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);


			if (ret < big)
			if (nid < big)
				end += FAKE_NODE_MIN_SIZE;
				end += FAKE_NODE_MIN_SIZE;


			/*
			/*
@@ -760,16 +773,21 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
			 * happen as a result of rounding down each node's size
			 * happen as a result of rounding down each node's size
			 * to FAKE_NODE_MIN_SIZE.
			 * to FAKE_NODE_MIN_SIZE.
			 */
			 */
			if (nodes_weight(physnode_mask) + ret >= nr_nodes)
			if (nodes_weight(physnode_mask) + nid >= nr_nodes)
				end = physnodes[i].end;
				end = physnodes[i].end;


			if (setup_node_range(ret++, i, &physnodes[i].start,
			ret = emu_setup_memblk(ei, nid++, i,
						end - physnodes[i].start,
					       physnodes[i].start,
						physnodes[i].end) < 0)
					       min(end, physnodes[i].end));
			if (ret < 0)
				return ret;

			physnodes[i].start = min(end, physnodes[i].end);
			if (physnodes[i].start == physnodes[i].end)
				node_clear(i, physnode_mask);
				node_clear(i, physnode_mask);
		}
		}
	}
	}
	return ret;
	return 0;
}
}


/*
/*
@@ -794,12 +812,13 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
 * `addr' to `max_addr'.  The return value is the number of nodes allocated.
 * `addr' to `max_addr'.  The return value is the number of nodes allocated.
 */
 */
static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
					      u64 addr, u64 max_addr, u64 size)
{
{
	nodemask_t physnode_mask = NODE_MASK_NONE;
	nodemask_t physnode_mask = NODE_MASK_NONE;
	u64 min_size;
	u64 min_size;
	int ret = 0;
	int nid = 0;
	int i;
	int i, ret;


	if (!size)
	if (!size)
		return -1;
		return -1;
@@ -854,30 +873,31 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
			    memblock_x86_hole_size(end, physnodes[i].end) < size)
			    memblock_x86_hole_size(end, physnodes[i].end) < size)
				end = physnodes[i].end;
				end = physnodes[i].end;


			/*
			ret = emu_setup_memblk(ei, nid++, i,
			 * Setup the fake node that will be allocated as bootmem
					       physnodes[i].start,
			 * later.  If setup_node_range() returns non-zero, there
					       min(end, physnodes[i].end));
			 * is no more memory available on this physical node.
			if (ret < 0)
			 */
				return ret;
			if (setup_node_range(ret++, i, &physnodes[i].start,

						end - physnodes[i].start,
			physnodes[i].start = min(end, physnodes[i].end);
						physnodes[i].end) < 0)
			if (physnodes[i].start == physnodes[i].end)
				node_clear(i, physnode_mask);
				node_clear(i, physnode_mask);
		}
		}
	}
	}
	return ret;
	return 0;
}
}


/*
/*
 * Sets up the system RAM area from start_pfn to last_pfn according to the
 * Sets up the system RAM area from start_pfn to last_pfn according to the
 * numa=fake command-line option.
 * numa=fake command-line option.
 */
 */
static int __init numa_emulation(int acpi, int amd)
static bool __init numa_emulation(int acpi, int amd)
{
{
	static struct numa_meminfo ei __initdata;
	static struct numa_meminfo ei __initdata;
	const u64 max_addr = max_pfn << PAGE_SHIFT;
	const u64 max_addr = max_pfn << PAGE_SHIFT;
	int num_nodes;
	int i, ret;
	int i;

	memset(&ei, 0, sizeof(ei));


	for (i = 0; i < MAX_NUMNODES; i++)
	for (i = 0; i < MAX_NUMNODES; i++)
		emu_nid_to_phys[i] = NUMA_NO_NODE;
		emu_nid_to_phys[i] = NUMA_NO_NODE;
@@ -891,52 +911,33 @@ static int __init numa_emulation(int acpi, int amd)
		u64 size;
		u64 size;


		size = memparse(emu_cmdline, &emu_cmdline);
		size = memparse(emu_cmdline, &emu_cmdline);
		num_nodes = split_nodes_size_interleave(0, max_addr, size);
		ret = split_nodes_size_interleave(&ei, 0, max_addr, size);
	} else {
	} else {
		unsigned long n;
		unsigned long n;


		n = simple_strtoul(emu_cmdline, NULL, 0);
		n = simple_strtoul(emu_cmdline, NULL, 0);
		num_nodes = split_nodes_interleave(0, max_addr, n);
		ret = split_nodes_interleave(&ei, 0, max_addr, n);
	}

	if (ret < 0)
		return false;

	if (numa_cleanup_meminfo(&ei) < 0) {
		pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
		return false;
	}
	}


	if (num_nodes < 0)
	/* commit */
		return num_nodes;
	numa_meminfo = ei;


	/* make sure all emulated nodes are mapped to a physical node */
	/* make sure all emulated nodes are mapped to a physical node */
	for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
	for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
		if (emu_nid_to_phys[i] == NUMA_NO_NODE)
		if (emu_nid_to_phys[i] == NUMA_NO_NODE)
			emu_nid_to_phys[i] = 0;
			emu_nid_to_phys[i] = 0;


	ei.nr_blks = num_nodes;
	fake_physnodes(acpi, amd, &ei);
	for (i = 0; i < ei.nr_blks; i++) {
		ei.blk[i].start = nodes[i].start;
		ei.blk[i].end = nodes[i].end;
		ei.blk[i].nid = i;
	}

	memnode_shift = compute_hash_shift(&ei);
	if (memnode_shift < 0) {
		memnode_shift = 0;
		printk(KERN_ERR "No NUMA hash function found.  NUMA emulation "
		       "disabled.\n");
		return -1;
	}

	/*
	 * We need to vacate all active ranges that may have been registered for
	 * the e820 memory map.
	 */
	remove_all_active_ranges();
	for_each_node_mask(i, node_possible_map)
		memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
						nodes[i].end >> PAGE_SHIFT);
	init_memory_mapping_high();
	for_each_node_mask(i, node_possible_map)
		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
	fake_physnodes(acpi, amd, num_nodes);
	numa_init_array();
	numa_emu_dist = true;
	numa_emu_dist = true;
	return 0;
	return true;
}
}
#endif /* CONFIG_NUMA_EMU */
#endif /* CONFIG_NUMA_EMU */


@@ -988,15 +989,13 @@ void __init initmem_init(void)
			continue;
			continue;
#ifdef CONFIG_NUMA_EMU
#ifdef CONFIG_NUMA_EMU
		setup_physnodes(0, max_pfn << PAGE_SHIFT);
		setup_physnodes(0, max_pfn << PAGE_SHIFT);
		if (emu_cmdline && !numa_emulation(i == 0, i == 1))
		/*
			return;
		 * If requested, try emulation.  If emulation is not used,

		 * build identity emu_nid_to_phys[] for numa_add_cpu()
		/* not emulating, build identity mapping for numa_add_cpu() */
		 */
		if (!emu_cmdline || !numa_emulation(i == 0, i == 1))
			for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
			for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
				emu_nid_to_phys[j] = j;
				emu_nid_to_phys[j] = j;

		nodes_clear(node_possible_map);
		nodes_clear(node_online_map);
#endif
#endif
		if (numa_register_memblks(&numa_meminfo) < 0)
		if (numa_register_memblks(&numa_meminfo) < 0)
			continue;
			continue;