Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 64f31d58 authored by Martin Schwidefsky's avatar Martin Schwidefsky
Browse files

s390/mm: simplify the TLB flushing code



ptep_flush_lazy and pmdp_flush_lazy use mm->context.attach_count to
decide between a lazy TLB flush vs an immediate TLB flush. The field
contains two 16-bit counters, the number of CPUs that have the mm
attached and can create TLB entries for it and the number of CPUs in
the middle of a page table update.

The __tlb_flush_asce, ptep_flush_direct and pmdp_flush_direct functions
use the attach counter and a mask check with mm_cpumask(mm) to decide
between a local flush local of the current CPU and a global flush.

For all these functions the decision between lazy vs immediate and
local vs global TLB flush can be based on CPU masks. There are two
masks:  the mm->context.cpu_attach_mask with the CPUs that are actively
using the mm, and the mm_cpumask(mm) with the CPUs that have used the
mm since the last full flush. The decision between lazy vs immediate
flush is based on the mm->context.cpu_attach_mask, to decide between
local vs global flush the mm_cpumask(mm) is used.

With this patch all checks will use the CPU masks, the old counter
mm->context.attach_count with its two 16-bit values is turned into a
single counter mm->context.flush_count that keeps track of the number
of CPUs with incomplete page table updates. The sole user of this
counter is finish_arch_post_lock_switch() which waits for the end of
all page table updates.

Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent 7dd96816
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@

typedef struct {
	cpumask_t cpu_attach_mask;
	atomic_t attach_count;
	atomic_t flush_count;
	unsigned int flush_mm;
	spinlock_t list_lock;
	struct list_head pgtable_list;
+5 −10
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ static inline int init_new_context(struct task_struct *tsk,
	INIT_LIST_HEAD(&mm->context.pgtable_list);
	INIT_LIST_HEAD(&mm->context.gmap_list);
	cpumask_clear(&mm->context.cpu_attach_mask);
	atomic_set(&mm->context.attach_count, 0);
	atomic_set(&mm->context.flush_count, 0);
	mm->context.flush_mm = 0;
#ifdef CONFIG_PGSTE
	mm->context.alloc_pgste = page_table_allocate_pgste;
@@ -90,14 +90,11 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
	S390_lowcore.user_asce = next->context.asce;
	if (prev == next)
		return;
	if (MACHINE_HAS_TLB_LC)
	cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
	cpumask_set_cpu(cpu, mm_cpumask(next));
	/* Clear old ASCE by loading the kernel ASCE. */
	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
	atomic_inc(&next->context.attach_count);
	atomic_dec(&prev->context.attach_count);
	if (MACHINE_HAS_TLB_LC)
	cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
}

@@ -110,10 +107,9 @@ static inline void finish_arch_post_lock_switch(void)
	load_kernel_asce();
	if (mm) {
		preempt_disable();
		while (atomic_read(&mm->context.attach_count) >> 16)
		while (atomic_read(&mm->context.flush_count))
			cpu_relax();

		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
		if (mm->context.flush_mm)
			__tlb_flush_mm(mm);
		preempt_enable();
@@ -128,7 +124,6 @@ static inline void activate_mm(struct mm_struct *prev,
                               struct mm_struct *next)
{
	switch_mm(prev, next, current);
	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
	set_user_asce(next);
}

+5 −8
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ static inline void __tlb_flush_global(void)
static inline void __tlb_flush_full(struct mm_struct *mm)
{
	preempt_disable();
	atomic_add(0x10000, &mm->context.attach_count);
	atomic_inc(&mm->context.flush_count);
	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
		/* Local TLB flush */
		__tlb_flush_local();
@@ -69,7 +69,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
			cpumask_copy(mm_cpumask(mm),
				     &mm->context.cpu_attach_mask);
	}
	atomic_sub(0x10000, &mm->context.attach_count);
	atomic_dec(&mm->context.flush_count);
	preempt_enable();
}

@@ -78,12 +78,9 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
 */
static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
{
	int active, count;

	preempt_disable();
	active = (mm == current->active_mm) ? 1 : 0;
	count = atomic_add_return(0x10000, &mm->context.attach_count);
	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
	atomic_inc(&mm->context.flush_count);
	if (MACHINE_HAS_TLB_LC &&
	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
		__tlb_flush_idte_local(asce);
	} else {
@@ -96,7 +93,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
			cpumask_copy(mm_cpumask(mm),
				     &mm->context.cpu_attach_mask);
	}
	atomic_sub(0x10000, &mm->context.attach_count);
	atomic_dec(&mm->context.flush_count);
	preempt_enable();
}

+2 −6
Original line number Diff line number Diff line
@@ -242,10 +242,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
	struct lowcore *lc = pcpu->lowcore;

	if (MACHINE_HAS_TLB_LC)
	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
	atomic_inc(&init_mm.context.attach_count);
	lc->cpu_nr = cpu;
	lc->spinlock_lockval = arch_spin_lockval(cpu);
	lc->percpu_offset = __per_cpu_offset[cpu];
@@ -876,9 +874,7 @@ void __cpu_die(unsigned int cpu)
	while (!pcpu_stopped(pcpu))
		cpu_relax();
	pcpu_free_lowcore(pcpu);
	atomic_dec(&init_mm.context.attach_count);
	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
	if (MACHINE_HAS_TLB_LC)
	cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
}

+1 −3
Original line number Diff line number Diff line
@@ -118,10 +118,8 @@ void mark_rodata_ro(void)

void __init mem_init(void)
{
	if (MACHINE_HAS_TLB_LC)
	cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
	cpumask_set_cpu(0, mm_cpumask(&init_mm));
	atomic_set(&init_mm.context.attach_count, 1);

	set_max_mapnr(max_low_pfn);
        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
Loading