Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a0663a79 authored by David S. Miller's avatar David S. Miller Committed by David S. Miller
Browse files

[SPARC64]: Fix TLB context allocation with SMT style shared TLBs.



The context allocation scheme we use depends upon there being a 1<-->1
mapping from cpu to physical TLB for correctness.  Chips like Niagara
break this assumption.

So what we do is notify all cpus with a cross call when the context
version number changes, and if necessary this makes them allocate
a valid context for the address space they are running at the time.

Stress tested with make -j1024, make -j2048, and make -j4096 kernel
builds on a 32-strand, 8 core, T2000 with 16GB of ram.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 074d82cf
Loading
Loading
Loading
Loading
+29 −11
Original line number Diff line number Diff line
@@ -885,26 +885,44 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
	put_cpu();
}

static void __smp_receive_signal_mask(cpumask_t mask)
{
	smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
}

void smp_receive_signal(int cpu)
{
	cpumask_t mask = cpumask_of_cpu(cpu);

	if (cpu_online(cpu)) {
		u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff);

		if (tlb_type == spitfire)
			spitfire_xcall_deliver(data0, 0, 0, mask);
		else if (tlb_type == cheetah || tlb_type == cheetah_plus)
			cheetah_xcall_deliver(data0, 0, 0, mask);
		else if (tlb_type == hypervisor)
			hypervisor_xcall_deliver(data0, 0, 0, mask);
	}
	if (cpu_online(cpu))
		__smp_receive_signal_mask(mask);
}

void smp_receive_signal_client(int irq, struct pt_regs *regs)
{
	/* Just return, rtrap takes care of the rest. */
	struct mm_struct *mm;

	clear_softint(1 << irq);

	/* See if we need to allocate a new TLB context because
	 * the version of the one we are using is now out of date.
	 */
	mm = current->active_mm;
	if (likely(mm)) {
		if (unlikely(!CTX_VALID(mm->context))) {
			unsigned long flags;

			spin_lock_irqsave(&mm->context.lock, flags);
			get_new_mmu_context(mm);
			load_secondary_context(mm);
			spin_unlock_irqrestore(&mm->context.lock, flags);
		}
	}
}

void smp_new_mmu_context_version(void)
{
	__smp_receive_signal_mask(cpu_online_map);
}

void smp_report_regs(void)
+8 −1
Original line number Diff line number Diff line
@@ -629,17 +629,20 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
 * let the user have CTX 0 (nucleus) or we ever use a CTX
 * version of zero (and thus NO_CONTEXT would not be caught
 * by version mis-match tests in mmu_context.h).
 *
 * Always invoked with interrupts disabled.
 */
void get_new_mmu_context(struct mm_struct *mm)
{
	unsigned long ctx, new_ctx;
	unsigned long orig_pgsz_bits;
	
	int new_version;

	spin_lock(&ctx_alloc_lock);
	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
	new_version = 0;
	if (new_ctx >= (1 << CTX_NR_BITS)) {
		new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
		if (new_ctx >= ctx) {
@@ -662,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm)
				mmu_context_bmap[i + 2] = 0;
				mmu_context_bmap[i + 3] = 0;
			}
			new_version = 1;
			goto out;
		}
	}
@@ -671,6 +675,9 @@ out:
	tlb_context_cache = new_ctx;
	mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
	spin_unlock(&ctx_alloc_lock);

	if (unlikely(new_version))
		smp_new_mmu_context_version();
}

void sparc_ultra_dump_itlb(void)
+1 −0
Original line number Diff line number Diff line
@@ -102,6 +102,7 @@ extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte
extern void tsb_flush(unsigned long ent, unsigned long tag);

typedef struct {
	spinlock_t		lock;
	unsigned long		sparc64_ctx_val;
	struct tsb		*tsb;
	unsigned long		tsb_rss_limit;
+12 −13
Original line number Diff line number Diff line
@@ -19,6 +19,12 @@ extern unsigned long tlb_context_cache;
extern unsigned long mmu_context_bmap[];

extern void get_new_mmu_context(struct mm_struct *mm);
#ifdef CONFIG_SMP
extern void smp_new_mmu_context_version(void);
#else
#define smp_new_mmu_context_version() do { } while (0)
#endif

extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
extern void destroy_context(struct mm_struct *mm);

@@ -58,21 +64,17 @@ extern void smp_tsb_sync(struct mm_struct *mm);

extern void __flush_tlb_mm(unsigned long, unsigned long);

/* Switch the current MM context. */
/* Switch the current MM context.  Interrupts are disabled.  */
static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
{
	unsigned long ctx_valid;
	int cpu;

	/* Note: page_table_lock is used here to serialize switch_mm
	 * and activate_mm, and their calls to get_new_mmu_context.
	 * This use of page_table_lock is unrelated to its other uses.
	 */ 
	spin_lock(&mm->page_table_lock);
	spin_lock(&mm->context.lock);
	ctx_valid = CTX_VALID(mm->context);
	if (!ctx_valid)
		get_new_mmu_context(mm);
	spin_unlock(&mm->page_table_lock);
	spin_unlock(&mm->context.lock);

	if (!ctx_valid || (old_mm != mm)) {
		load_secondary_context(mm);
@@ -98,19 +100,16 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
/* Activate a new MM instance for the current task. */
static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
{
	unsigned long flags;
	int cpu;

	/* Note: page_table_lock is used here to serialize switch_mm
	 * and activate_mm, and their calls to get_new_mmu_context.
	 * This use of page_table_lock is unrelated to its other uses.
	 */ 
	spin_lock(&mm->page_table_lock);
	spin_lock_irqsave(&mm->context.lock, flags);
	if (!CTX_VALID(mm->context))
		get_new_mmu_context(mm);
	cpu = smp_processor_id();
	if (!cpu_isset(cpu, mm->cpu_vm_mask))
		cpu_set(cpu, mm->cpu_vm_mask);
	spin_unlock(&mm->page_table_lock);
	spin_unlock_irqrestore(&mm->context.lock, flags);

	load_secondary_context(mm);
	__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);