Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 22adb358 authored by David S. Miller's avatar David S. Miller
Browse files

[SPARC64]: Eliminate NR_CPUS limitations.



Cheetah systems can have cpuids as large as 1023, although physical
systems don't have that many cpus.

Only three limitations existed in the kernel preventing arbitrary
NR_CPUS values:

1) dcache dirty cpu state stored in page->flags on
   D-cache aliasing platforms.  With some build time
   calculations and some build-time BUG checks on
   page->flags layout, this one was easily solved.

2) The cheetah XCALL delivery code could only handle
   a cpumask with up to 32 cpus set.  Some simple looping
   logic clears that up too.

3) thread_info->cpu was a u8, easily changed to a u16.

There are a few spots in the kernel that still put NR_CPUS
sized arrays on the kernel stack, but that's not a sparc64
specific problem.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5cbc3073
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -147,10 +147,10 @@ config SMP
	  If you don't know what to do here, say N.

config NR_CPUS
	int "Maximum number of CPUs (2-64)"
	range 2 64
	int "Maximum number of CPUs (2-1024)"
	range 2 1024
	depends on SMP
	default "32"
	default "64"

source "drivers/cpufreq/Kconfig"

+1 −1
Original line number Diff line number Diff line
@@ -523,7 +523,7 @@ tlb_fixup_done:
#else
	mov	0, %o0
#endif
	stb	%o0, [%g6 + TI_CPU]
	sth	%o0, [%g6 + TI_CPU]

	/* Off we go.... */
	call	start_kernel
+18 −1
Original line number Diff line number Diff line
@@ -400,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
{
	u64 pstate, ver;
	int nack_busy_id, is_jbus;
	int nack_busy_id, is_jbus, need_more;

	if (cpus_empty(mask))
		return;
@@ -416,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));

retry:
	need_more = 0;
	__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
			     : : "r" (pstate), "i" (PSTATE_IE));

@@ -444,6 +445,10 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
				: /* no outputs */
				: "r" (target), "i" (ASI_INTR_W));
			nack_busy_id++;
			if (nack_busy_id == 32) {
				need_more = 1;
				break;
			}
		}
	}

@@ -460,6 +465,16 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
			if (dispatch_stat == 0UL) {
				__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
						     : : "r" (pstate));
				if (unlikely(need_more)) {
					int i, cnt = 0;
					for_each_cpu_mask(i, mask) {
						cpu_clear(i, mask);
						cnt++;
						if (cnt == 32)
							break;
					}
					goto retry;
				}
				return;
			}
			if (!--stuck)
@@ -497,6 +512,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
				if ((dispatch_stat & check_mask) == 0)
					cpu_clear(i, mask);
				this_busy_nack += 2;
				if (this_busy_nack == 64)
					break;
			}

			goto retry;
+16 −6
Original line number Diff line number Diff line
@@ -191,12 +191,9 @@ inline void flush_dcache_page_impl(struct page *page)
}

#define PG_dcache_dirty		PG_arch_1
#define PG_dcache_cpu_shift	24UL
#define PG_dcache_cpu_mask	(256UL - 1UL)

#if NR_CPUS > 256
#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus
#endif
#define PG_dcache_cpu_shift	32UL
#define PG_dcache_cpu_mask	\
	((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)

#define dcache_dirty_cpu(page) \
	(((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
@@ -1349,6 +1346,19 @@ void __init paging_init(void)
	unsigned long end_pfn, pages_avail, shift, phys_base;
	unsigned long real_end, i;

	/* These build time checkes make sure that the dcache_dirty_cpu()
	 * page->flags usage will work.
	 *
	 * When a page gets marked as dcache-dirty, we store the
	 * cpu number starting at bit 32 in the page->flags.  Also,
	 * functions like clear_dcache_dirty_cpu use the cpu mask
	 * in 13-bit signed-immediate instruction fields.
	 */
	BUILD_BUG_ON(FLAGS_RESERVED != 32);
	BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
		     ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED);
	BUILD_BUG_ON(NR_CPUS > 4096);

	kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
	kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;

+1 −1
Original line number Diff line number Diff line
@@ -202,7 +202,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
 * the calculations done by the macro mid-stream.
 */
#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)	\
	ldub	[THR + TI_CPU], REG1;			\
	lduh	[THR + TI_CPU], REG1;			\
	sethi	%hi(__per_cpu_shift), REG3;		\
	sethi	%hi(__per_cpu_base), REG2;		\
	ldx	[REG3 + %lo(__per_cpu_shift)], REG3;	\
Loading