Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 45fb6cea authored by Anton Blanchard's avatar Anton Blanchard Committed by Paul Mackerras
Browse files

[PATCH] ppc64: Convert NUMA to sparsemem (3)



Convert to sparsemem and remove all the discontigmem code in the
process. This has a few advantages:

- The old numa_memory_lookup_table can go away
- All the arch specific discontigmem magic can go away

We also remove the triple pass of memory properties and instead create a
list of per node extents that we iterate through. A final cleanup would
be to change our lmb code to store extents per node, then we can reuse
that information in the numa code.

Signed-off-by: default avatarAnton Blanchard <anton@samba.org>
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent 3e66c4de
Loading
Loading
Loading
Loading
+3 −8
Original line number Original line Diff line number Diff line
@@ -581,17 +581,12 @@ config ARCH_FLATMEM_ENABLE
       def_bool y
       def_bool y
       depends on PPC64 && !NUMA
       depends on PPC64 && !NUMA


config ARCH_DISCONTIGMEM_ENABLE
config ARCH_SPARSEMEM_ENABLE
	def_bool y
	depends on SMP && PPC_PSERIES

config ARCH_DISCONTIGMEM_DEFAULT
	def_bool y
	def_bool y
	depends on ARCH_DISCONTIGMEM_ENABLE


config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
	def_bool y
	def_bool y
	depends on ARCH_DISCONTIGMEM_ENABLE
	depends on SMP && PPC_PSERIES


source "mm/Kconfig"
source "mm/Kconfig"


+159 −206
Original line number Original line Diff line number Diff line
@@ -17,9 +17,8 @@
#include <linux/nodemask.h>
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/notifier.h>
#include <asm/sparsemem.h>
#include <asm/lmb.h>
#include <asm/lmb.h>
#include <asm/machdep.h>
#include <asm/abs_addr.h>
#include <asm/system.h>
#include <asm/system.h>
#include <asm/smp.h>
#include <asm/smp.h>


@@ -28,43 +27,114 @@ static int numa_enabled = 1;
static int numa_debug;
static int numa_debug;
#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }


#ifdef DEBUG_NUMA
int numa_cpu_lookup_table[NR_CPUS];
#define ARRAY_INITIALISER -1
#else
#define ARRAY_INITIALISER 0
#endif

int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] =
	ARRAY_INITIALISER};
char *numa_memory_lookup_table;
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];

struct pglist_data *node_data[MAX_NUMNODES];
struct pglist_data *node_data[MAX_NUMNODES];
bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];

EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(numa_cpumask_lookup_table);
EXPORT_SYMBOL(node_data);

static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
static int min_common_depth;
static int min_common_depth;


/*
/*
 * We need somewhere to store start/span for each node until we have
 * We need somewhere to store start/end/node for each region until we have
 * allocated the real node_data structures.
 * allocated the real node_data structures.
 */
 */
#define MAX_REGIONS	(MAX_LMB_REGIONS*2)
static struct {
static struct {
	unsigned long node_start_pfn;
	unsigned long start_pfn;
	unsigned long node_end_pfn;
	unsigned long end_pfn;
	unsigned long node_present_pages;
	int nid;
} init_node_data[MAX_NUMNODES] __initdata;
} init_node_data[MAX_REGIONS] __initdata;


EXPORT_SYMBOL(node_data);
int __init early_pfn_to_nid(unsigned long pfn)
EXPORT_SYMBOL(numa_cpu_lookup_table);
{
EXPORT_SYMBOL(numa_memory_lookup_table);
	unsigned int i;
EXPORT_SYMBOL(numa_cpumask_lookup_table);

	for (i = 0; init_node_data[i].end_pfn; i++) {
		unsigned long start_pfn = init_node_data[i].start_pfn;
		unsigned long end_pfn = init_node_data[i].end_pfn;

		if ((start_pfn <= pfn) && (pfn < end_pfn))
			return init_node_data[i].nid;
	}

	return -1;
}

void __init add_region(unsigned int nid, unsigned long start_pfn,
		       unsigned long pages)
{
	unsigned int i;

	dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n",
		nid, start_pfn, pages);

	for (i = 0; init_node_data[i].end_pfn; i++) {
		if (init_node_data[i].nid != nid)
			continue;
		if (init_node_data[i].end_pfn == start_pfn) {
			init_node_data[i].end_pfn += pages;
			return;
		}
		if (init_node_data[i].start_pfn == (start_pfn + pages)) {
			init_node_data[i].start_pfn -= pages;
			return;
		}
	}

	/*
	 * Leave last entry NULL so we dont iterate off the end (we use
	 * entry.end_pfn to terminate the walk).
	 */
	if (i >= (MAX_REGIONS - 1)) {
		printk(KERN_ERR "WARNING: too many memory regions in "
				"numa code, truncating\n");
		return;
	}

	init_node_data[i].start_pfn = start_pfn;
	init_node_data[i].end_pfn = start_pfn + pages;
	init_node_data[i].nid = nid;
}

/* We assume init_node_data has no overlapping regions */
void __init get_region(unsigned int nid, unsigned long *start_pfn,
		       unsigned long *end_pfn, unsigned long *pages_present)
{
	unsigned int i;

	*start_pfn = -1UL;
	*end_pfn = *pages_present = 0;

	for (i = 0; init_node_data[i].end_pfn; i++) {
		if (init_node_data[i].nid != nid)
			continue;

		*pages_present += init_node_data[i].end_pfn -
			init_node_data[i].start_pfn;

		if (init_node_data[i].start_pfn < *start_pfn)
			*start_pfn = init_node_data[i].start_pfn;

		if (init_node_data[i].end_pfn > *end_pfn)
			*end_pfn = init_node_data[i].end_pfn;
	}

	/* We didnt find a matching region, return start/end as 0 */
	if (*start_pfn == -1UL)
		start_pfn = 0;
}


static inline void map_cpu_to_node(int cpu, int node)
static inline void map_cpu_to_node(int cpu, int node)
{
{
	numa_cpu_lookup_table[cpu] = node;
	numa_cpu_lookup_table[cpu] = node;
	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) {

	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
		cpu_set(cpu, numa_cpumask_lookup_table[node]);
		cpu_set(cpu, numa_cpumask_lookup_table[node]);
}
}
}


#ifdef CONFIG_HOTPLUG_CPU
#ifdef CONFIG_HOTPLUG_CPU
static void unmap_cpu_from_node(unsigned long cpu)
static void unmap_cpu_from_node(unsigned long cpu)
@@ -82,7 +152,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
}
}
#endif /* CONFIG_HOTPLUG_CPU */
#endif /* CONFIG_HOTPLUG_CPU */


static struct device_node * __devinit find_cpu_node(unsigned int cpu)
static struct device_node *find_cpu_node(unsigned int cpu)
{
{
	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
	struct device_node *cpu_node = NULL;
	struct device_node *cpu_node = NULL;
@@ -209,7 +279,7 @@ static int __init get_mem_size_cells(void)
	return rc;
	return rc;
}
}


static unsigned long read_n_cells(int n, unsigned int **buf)
static unsigned long __init read_n_cells(int n, unsigned int **buf)
{
{
	unsigned long result = 0;
	unsigned long result = 0;


@@ -291,7 +361,8 @@ static int cpu_numa_callback(struct notifier_block *nfb,
 * or zero. If the returned value of size is 0 the region should be
 * or zero. If the returned value of size is 0 the region should be
 * discarded as it lies wholy above the memory limit.
 * discarded as it lies wholy above the memory limit.
 */
 */
static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size)
static unsigned long __init numa_enforce_memory_limit(unsigned long start,
						      unsigned long size)
{
{
	/*
	/*
	 * We use lmb_end_of_DRAM() in here instead of memory_limit because
	 * We use lmb_end_of_DRAM() in here instead of memory_limit because
@@ -316,8 +387,7 @@ static int __init parse_numa_properties(void)
	struct device_node *cpu = NULL;
	struct device_node *cpu = NULL;
	struct device_node *memory = NULL;
	struct device_node *memory = NULL;
	int addr_cells, size_cells;
	int addr_cells, size_cells;
	int max_domain = 0;
	int max_domain;
	long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
	unsigned long i;
	unsigned long i;


	if (numa_enabled == 0) {
	if (numa_enabled == 0) {
@@ -325,13 +395,6 @@ static int __init parse_numa_properties(void)
		return -1;
		return -1;
	}
	}


	numa_memory_lookup_table =
		(char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
	memset(numa_memory_lookup_table, 0, entries * sizeof(char));

	for (i = 0; i < entries ; i++)
		numa_memory_lookup_table[i] = ARRAY_INITIALISER;

	min_common_depth = find_min_common_depth();
	min_common_depth = find_min_common_depth();


	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
@@ -383,9 +446,6 @@ static int __init parse_numa_properties(void)
		start = read_n_cells(addr_cells, &memcell_buf);
		start = read_n_cells(addr_cells, &memcell_buf);
		size = read_n_cells(size_cells, &memcell_buf);
		size = read_n_cells(size_cells, &memcell_buf);


		start = _ALIGN_DOWN(start, MEMORY_INCREMENT);
		size = _ALIGN_UP(size, MEMORY_INCREMENT);

		numa_domain = of_node_numa_domain(memory);
		numa_domain = of_node_numa_domain(memory);


		if (numa_domain >= MAX_NUMNODES) {
		if (numa_domain >= MAX_NUMNODES) {
@@ -406,37 +466,8 @@ static int __init parse_numa_properties(void)
				continue;
				continue;
		}
		}


		/*
		add_region(numa_domain, start >> PAGE_SHIFT,
		 * Initialize new node struct, or add to an existing one.
			   size >> PAGE_SHIFT);
		 */
		if (init_node_data[numa_domain].node_end_pfn) {
			if ((start / PAGE_SIZE) <
			    init_node_data[numa_domain].node_start_pfn)
				init_node_data[numa_domain].node_start_pfn =
					start / PAGE_SIZE;
			if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) >
			    init_node_data[numa_domain].node_end_pfn)
				init_node_data[numa_domain].node_end_pfn =
					(start / PAGE_SIZE) +
					(size / PAGE_SIZE);

			init_node_data[numa_domain].node_present_pages +=
				size / PAGE_SIZE;
		} else {
			node_set_online(numa_domain);

			init_node_data[numa_domain].node_start_pfn =
				start / PAGE_SIZE;
			init_node_data[numa_domain].node_end_pfn =
				init_node_data[numa_domain].node_start_pfn +
				size / PAGE_SIZE;
			init_node_data[numa_domain].node_present_pages =
				size / PAGE_SIZE;
		}

		for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
			numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
				numa_domain;


		if (--ranges)
		if (--ranges)
			goto new_range;
			goto new_range;
@@ -452,32 +483,15 @@ static void __init setup_nonnuma(void)
{
{
	unsigned long top_of_ram = lmb_end_of_DRAM();
	unsigned long top_of_ram = lmb_end_of_DRAM();
	unsigned long total_ram = lmb_phys_mem_size();
	unsigned long total_ram = lmb_phys_mem_size();
	unsigned long i;


	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
	       top_of_ram, total_ram);
	       top_of_ram, total_ram);
	printk(KERN_INFO "Memory hole size: %ldMB\n",
	printk(KERN_INFO "Memory hole size: %ldMB\n",
	       (top_of_ram - total_ram) >> 20);
	       (top_of_ram - total_ram) >> 20);


	if (!numa_memory_lookup_table) {
		long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT;
		numa_memory_lookup_table =
			(char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
		memset(numa_memory_lookup_table, 0, entries * sizeof(char));
		for (i = 0; i < entries ; i++)
			numa_memory_lookup_table[i] = ARRAY_INITIALISER;
	}

	map_cpu_to_node(boot_cpuid, 0);
	map_cpu_to_node(boot_cpuid, 0);

	add_region(0, 0, lmb_end_of_DRAM() >> PAGE_SHIFT);
	node_set_online(0);
	node_set_online(0);

	init_node_data[0].node_start_pfn = 0;
	init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE;
	init_node_data[0].node_present_pages = total_ram / PAGE_SIZE;

	for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
		numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
}
}


static void __init dump_numa_topology(void)
static void __init dump_numa_topology(void)
@@ -495,8 +509,9 @@ static void __init dump_numa_topology(void)


		count = 0;
		count = 0;


		for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) {
		for (i = 0; i < lmb_end_of_DRAM();
			if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) {
		     i += (1 << SECTION_SIZE_BITS)) {
			if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
				if (count == 0)
				if (count == 0)
					printk(" 0x%lx", i);
					printk(" 0x%lx", i);
				++count;
				++count;
@@ -521,10 +536,12 @@ static void __init dump_numa_topology(void)
 *
 *
 * Returns the physical address of the memory.
 * Returns the physical address of the memory.
 */
 */
static unsigned long careful_allocation(int nid, unsigned long size,
static void __init *careful_allocation(int nid, unsigned long size,
					unsigned long align, unsigned long end)
				       unsigned long align,
				       unsigned long end_pfn)
{
{
	unsigned long ret = lmb_alloc_base(size, align, end);
	int new_nid;
	unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);


	/* retry over all memory */
	/* retry over all memory */
	if (!ret)
	if (!ret)
@@ -538,28 +555,27 @@ static unsigned long careful_allocation(int nid, unsigned long size,
	 * If the memory came from a previously allocated node, we must
	 * If the memory came from a previously allocated node, we must
	 * retry with the bootmem allocator.
	 * retry with the bootmem allocator.
	 */
	 */
	if (pa_to_nid(ret) < nid) {
	new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
		nid = pa_to_nid(ret);
	if (new_nid < nid) {
		ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid),
		ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
				size, align, 0);
				size, align, 0);


		if (!ret)
		if (!ret)
			panic("numa.c: cannot allocate %lu bytes on node %d",
			panic("numa.c: cannot allocate %lu bytes on node %d",
			      size, nid);
			      size, new_nid);


		ret = virt_to_abs(ret);
		ret = __pa(ret);


		dbg("alloc_bootmem %lx %lx\n", ret, size);
		dbg("alloc_bootmem %lx %lx\n", ret, size);
	}
	}


	return ret;
	return (void *)ret;
}
}


void __init do_init_bootmem(void)
void __init do_init_bootmem(void)
{
{
	int nid;
	int nid;
	int addr_cells, size_cells;
	unsigned int i;
	struct device_node *memory = NULL;
	static struct notifier_block ppc64_numa_nb = {
	static struct notifier_block ppc64_numa_nb = {
		.notifier_call = cpu_numa_callback,
		.notifier_call = cpu_numa_callback,
		.priority = 1 /* Must run before sched domains notifier. */
		.priority = 1 /* Must run before sched domains notifier. */
@@ -577,99 +593,66 @@ void __init do_init_bootmem(void)
	register_cpu_notifier(&ppc64_numa_nb);
	register_cpu_notifier(&ppc64_numa_nb);


	for_each_online_node(nid) {
	for_each_online_node(nid) {
		unsigned long start_paddr, end_paddr;
		unsigned long start_pfn, end_pfn, pages_present;
		int i;
		unsigned long bootmem_paddr;
		unsigned long bootmem_paddr;
		unsigned long bootmap_pages;
		unsigned long bootmap_pages;


		start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE;
		get_region(nid, &start_pfn, &end_pfn, &pages_present);
		end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE;


		/* Allocate the node structure node local if possible */
		/* Allocate the node structure node local if possible */
		NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid,
		NODE_DATA(nid) = careful_allocation(nid,
					sizeof(struct pglist_data),
					sizeof(struct pglist_data),
					SMP_CACHE_BYTES, end_paddr);
					SMP_CACHE_BYTES, end_pfn);
		NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid));
		NODE_DATA(nid) = __va(NODE_DATA(nid));
		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));


  		dbg("node %d\n", nid);
  		dbg("node %d\n", nid);
		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));


		NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
		NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
		NODE_DATA(nid)->node_start_pfn =
		NODE_DATA(nid)->node_start_pfn = start_pfn;
			init_node_data[nid].node_start_pfn;
		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
		NODE_DATA(nid)->node_spanned_pages =
			end_paddr - start_paddr;


		if (NODE_DATA(nid)->node_spanned_pages == 0)
		if (NODE_DATA(nid)->node_spanned_pages == 0)
  			continue;
  			continue;


  		dbg("start_paddr = %lx\n", start_paddr);
  		dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
  		dbg("end_paddr = %lx\n", end_paddr);
  		dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);

		bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT);


		bootmem_paddr = careful_allocation(nid,
		bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
		bootmem_paddr = (unsigned long)careful_allocation(nid,
					bootmap_pages << PAGE_SHIFT,
					bootmap_pages << PAGE_SHIFT,
				PAGE_SIZE, end_paddr);
					PAGE_SIZE, end_pfn);
		memset(abs_to_virt(bootmem_paddr), 0,
		memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
		       bootmap_pages << PAGE_SHIFT);

		dbg("bootmap_paddr = %lx\n", bootmem_paddr);
		dbg("bootmap_paddr = %lx\n", bootmem_paddr);


		init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
		init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
				  start_paddr >> PAGE_SHIFT,
				  start_pfn, end_pfn);
				  end_paddr >> PAGE_SHIFT);

		/*
		 * We need to do another scan of all memory sections to
		 * associate memory with the correct node.
		 */
		addr_cells = get_mem_addr_cells();
		size_cells = get_mem_size_cells();
		memory = NULL;
		while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
			unsigned long mem_start, mem_size;
			int numa_domain, ranges;
			unsigned int *memcell_buf;
			unsigned int len;

			memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
			if (!memcell_buf || len <= 0)
				continue;


			ranges = memory->n_addrs;	/* ranges in cell */
		/* Add free regions on this node */
new_range:
		for (i = 0; init_node_data[i].end_pfn; i++) {
			mem_start = read_n_cells(addr_cells, &memcell_buf);
			unsigned long start, end;
			mem_size = read_n_cells(size_cells, &memcell_buf);
			if (numa_enabled) {
				numa_domain = of_node_numa_domain(memory);
				if (numa_domain  >= MAX_NUMNODES)
					numa_domain = 0;
			} else
				numa_domain =  0;


			if (numa_domain != nid)
			if (init_node_data[i].nid != nid)
				continue;
				continue;


			mem_size = numa_enforce_memory_limit(mem_start, mem_size);
			start = init_node_data[i].start_pfn << PAGE_SHIFT;
  			if (mem_size) {
			end = init_node_data[i].end_pfn << PAGE_SHIFT;
  				dbg("free_bootmem %lx %lx\n", mem_start, mem_size);
  				free_bootmem_node(NODE_DATA(nid), mem_start, mem_size);
			}


			if (--ranges)		/* process all ranges in cell */
			dbg("free_bootmem %lx %lx\n", start, end - start);
				goto new_range;
  			free_bootmem_node(NODE_DATA(nid), start, end - start);
		}
		}


		/*
		/* Mark reserved regions on this node */
		 * Mark reserved regions on this node
		 */
		for (i = 0; i < lmb.reserved.cnt; i++) {
		for (i = 0; i < lmb.reserved.cnt; i++) {
			unsigned long physbase = lmb.reserved.region[i].base;
			unsigned long physbase = lmb.reserved.region[i].base;
			unsigned long size = lmb.reserved.region[i].size;
			unsigned long size = lmb.reserved.region[i].size;
			unsigned long start_paddr = start_pfn << PAGE_SHIFT;
			unsigned long end_paddr = end_pfn << PAGE_SHIFT;


			if (pa_to_nid(physbase) != nid &&
			if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid &&
			    pa_to_nid(physbase+size-1) != nid)
			    early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid)
				continue;
				continue;


			if (physbase < end_paddr &&
			if (physbase < end_paddr &&
@@ -689,46 +672,19 @@ void __init do_init_bootmem(void)
						     size);
						     size);
			}
			}
		}
		}
		/*
		 * This loop may look famaliar, but we have to do it again
		 * after marking our reserved memory to mark memory present
		 * for sparsemem.
		 */
		addr_cells = get_mem_addr_cells();
		size_cells = get_mem_size_cells();
		memory = NULL;
		while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
			unsigned long mem_start, mem_size;
			int numa_domain, ranges;
			unsigned int *memcell_buf;
			unsigned int len;

			memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
			if (!memcell_buf || len <= 0)
				continue;


			ranges = memory->n_addrs;	/* ranges in cell */
		/* Add regions into sparsemem */
new_range2:
		for (i = 0; init_node_data[i].end_pfn; i++) {
			mem_start = read_n_cells(addr_cells, &memcell_buf);
			unsigned long start, end;
			mem_size = read_n_cells(size_cells, &memcell_buf);
			if (numa_enabled) {
				numa_domain = of_node_numa_domain(memory);
				if (numa_domain  >= MAX_NUMNODES)
					numa_domain = 0;
			} else
				numa_domain =  0;


			if (numa_domain != nid)
			if (init_node_data[i].nid != nid)
				continue;
				continue;


			mem_size = numa_enforce_memory_limit(mem_start, mem_size);
			start = init_node_data[i].start_pfn;
			memory_present(numa_domain, mem_start >> PAGE_SHIFT,
			end = init_node_data[i].end_pfn;
				       (mem_start + mem_size) >> PAGE_SHIFT);


			if (--ranges)		/* process all ranges in cell */
			memory_present(nid, start, end);
				goto new_range2;
		}
		}

	}
	}
}
}


@@ -742,21 +698,18 @@ void __init paging_init(void)
	memset(zholes_size, 0, sizeof(zholes_size));
	memset(zholes_size, 0, sizeof(zholes_size));


	for_each_online_node(nid) {
	for_each_online_node(nid) {
		unsigned long start_pfn;
		unsigned long start_pfn, end_pfn, pages_present;
		unsigned long end_pfn;


		start_pfn = init_node_data[nid].node_start_pfn;
		get_region(nid, &start_pfn, &end_pfn, &pages_present);
		end_pfn = init_node_data[nid].node_end_pfn;


		zones_size[ZONE_DMA] = end_pfn - start_pfn;
		zones_size[ZONE_DMA] = end_pfn - start_pfn;
		zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
		zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present;
			init_node_data[nid].node_present_pages;


		dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
		dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
		    zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
		    zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);


		free_area_init_node(nid, NODE_DATA(nid), zones_size,
		free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn,
							start_pfn, zholes_size);
				    zholes_size);
	}
	}
}
}


+3 −8
Original line number Original line Diff line number Diff line
@@ -279,17 +279,12 @@ config ARCH_FLATMEM_ENABLE
       def_bool y
       def_bool y
       depends on !NUMA
       depends on !NUMA


config ARCH_DISCONTIGMEM_ENABLE
config ARCH_SPARSEMEM_ENABLE
	def_bool y
	depends on SMP && PPC_PSERIES

config ARCH_DISCONTIGMEM_DEFAULT
	def_bool y
	def_bool y
	depends on ARCH_DISCONTIGMEM_ENABLE


config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
	def_bool y
	def_bool y
	depends on ARCH_DISCONTIGMEM_ENABLE
	depends on NUMA


source "mm/Kconfig"
source "mm/Kconfig"


+1 −9
Original line number Original line Diff line number Diff line
@@ -9,15 +9,7 @@


static inline int cpu_to_node(int cpu)
static inline int cpu_to_node(int cpu)
{
{
	int node;
	return numa_cpu_lookup_table[cpu];

	node = numa_cpu_lookup_table[cpu];

#ifdef DEBUG_NUMA
	BUG_ON(node == -1);
#endif

	return node;
}
}


#define parent_node(node)	(node)
#define parent_node(node)	(node)
+3 −60
Original line number Original line Diff line number Diff line
@@ -8,15 +8,14 @@
#define _ASM_MMZONE_H_
#define _ASM_MMZONE_H_


#include <linux/config.h>
#include <linux/config.h>
#include <asm/smp.h>


/* generic non-linear memory support:
/*
 * generic non-linear memory support:
 *
 *
 * 1) we will not split memory into more chunks than will fit into the
 * 1) we will not split memory into more chunks than will fit into the
 *    flags field of the struct page
 *    flags field of the struct page
 */
 */



#ifdef CONFIG_NEED_MULTIPLE_NODES
#ifdef CONFIG_NEED_MULTIPLE_NODES


extern struct pglist_data *node_data[];
extern struct pglist_data *node_data[];
@@ -30,41 +29,11 @@ extern struct pglist_data *node_data[];
 */
 */


extern int numa_cpu_lookup_table[];
extern int numa_cpu_lookup_table[];
extern char *numa_memory_lookup_table;
extern cpumask_t numa_cpumask_lookup_table[];
extern cpumask_t numa_cpumask_lookup_table[];
#ifdef CONFIG_MEMORY_HOTPLUG
#ifdef CONFIG_MEMORY_HOTPLUG
extern unsigned long max_pfn;
extern unsigned long max_pfn;
#endif
#endif


/* 16MB regions */
#define MEMORY_INCREMENT_SHIFT 24
#define MEMORY_INCREMENT (1UL << MEMORY_INCREMENT_SHIFT)

/* NUMA debugging, will not work on a DLPAR machine */
#undef DEBUG_NUMA

static inline int pa_to_nid(unsigned long pa)
{
	int nid;

#ifdef CONFIG_MEMORY_HOTPLUG
	/* kludge hot added sections default to node 0 */
	if (pa >= (max_pfn << PAGE_SHIFT))
		return 0;
#endif
	nid = numa_memory_lookup_table[pa >> MEMORY_INCREMENT_SHIFT];

#ifdef DEBUG_NUMA
	/* the physical address passed in is not in the map for the system */
	if (nid == -1) {
		printk("bad address: %lx\n", pa);
		BUG();
	}
#endif

	return nid;
}

/*
/*
 * Following are macros that each numa implmentation must define.
 * Following are macros that each numa implmentation must define.
 */
 */
@@ -72,36 +41,10 @@ static inline int pa_to_nid(unsigned long pa)
#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid)	(NODE_DATA(nid)->node_end_pfn)
#define node_end_pfn(nid)	(NODE_DATA(nid)->node_end_pfn)


#ifdef CONFIG_DISCONTIGMEM

#define node_localnr(pfn, nid)	((pfn) - NODE_DATA(nid)->node_start_pfn)

#define pfn_to_nid(pfn)		pa_to_nid((unsigned long)(pfn) << PAGE_SHIFT)

/* Written this way to avoid evaluating arguments twice */
#define discontigmem_pfn_to_page(pfn) \
({ \
	unsigned long __tmp = pfn; \
	(NODE_DATA(pfn_to_nid(__tmp))->node_mem_map + \
	 node_localnr(__tmp, pfn_to_nid(__tmp))); \
})

#define discontigmem_page_to_pfn(p) \
({ \
	struct page *__tmp = p; \
	(((__tmp) - page_zone(__tmp)->zone_mem_map) + \
	 page_zone(__tmp)->zone_start_pfn); \
})

/* XXX fix for discontiguous physical memory */
#define discontigmem_pfn_valid(pfn)		((pfn) < num_physpages)

#endif /* CONFIG_DISCONTIGMEM */

#endif /* CONFIG_NEED_MULTIPLE_NODES */
#endif /* CONFIG_NEED_MULTIPLE_NODES */


#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
#define early_pfn_to_nid(pfn)  pa_to_nid(((unsigned long)pfn) << PAGE_SHIFT)
extern int __init early_pfn_to_nid(unsigned long pfn);
#endif
#endif


#endif /* _ASM_MMZONE_H_ */
#endif /* _ASM_MMZONE_H_ */
Loading