Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9939ddaf authored by Tejun Heo's avatar Tejun Heo Committed by Ingo Molnar
Browse files

x86: merge 64 and 32 SMP percpu handling



Now that pda is allocated as part of percpu, percpu doesn't need to be
accessed through pda.  Unify x86_64 SMP percpu access with x86_32 SMP
one.  Other than the segment register, operand size and the base of
percpu symbols, they behave identical now.

This patch replaces now unnecessary pda->data_offset with a dummy
field which is necessary to keep stack_canary at its place.  This
patch also moves per_cpu_offset initialization out of init_gdt() into
setup_per_cpu_areas().  Note that this change also necessitates
explicit per_cpu_offset initializations in voyager_smp.c.

With this change, x86_OP_percpu()'s are as efficient on x86_64 as on
x86_32 and also x86_64 can use assembly PER_CPU macros.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 1a51e3a0
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -11,8 +11,7 @@
/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
	struct task_struct *pcurrent;	/* 0  Current process */
	unsigned long data_offset;	/* 8 Per cpu data offset from linker
					   address */
	unsigned long dummy;
	unsigned long kernelstack;	/* 16 top of kernel stack for current */
	unsigned long oldrsp;		/* 24 user rsp for system call */
	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
+39 −88
Original line number Diff line number Diff line
#ifndef _ASM_X86_PERCPU_H
#define _ASM_X86_PERCPU_H

#ifndef __ASSEMBLY__
#ifdef CONFIG_X86_64
extern void load_pda_offset(int cpu);
#define __percpu_seg		gs
#define __percpu_mov_op		movq
#else
static inline void load_pda_offset(int cpu) { }
#endif
#endif

#ifdef CONFIG_X86_64
#include <linux/compiler.h>

/* Same as asm-generic/percpu.h, except that we store the per cpu offset
   in the PDA. Longer term the PDA and every per cpu variable
   should be just put into a single section and referenced directly
   from %gs */

#ifdef CONFIG_SMP
#include <asm/pda.h>

#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
#define __my_cpu_offset read_pda(data_offset)

#define per_cpu_offset(x) (__per_cpu_offset(x))

#define __percpu_seg		fs
#define __percpu_mov_op		movl
#endif
#include <asm-generic/percpu.h>

DECLARE_PER_CPU(struct x8664_pda, pda);

/*
 * These are supposed to be implemented as a single instruction which
 * operates on the per-cpu data base segment.  x86-64 doesn't have
 * that yet, so this is a fairly inefficient workaround for the
 * meantime.  The single instruction is atomic with respect to
 * preemption and interrupts, so we need to explicitly disable
 * interrupts here to achieve the same effect.  However, because it
 * can be used from within interrupt-disable/enable, we can't actually
 * disable interrupts; disabling preemption is enough.
 */
#define x86_read_percpu(var)						\
	({								\
		typeof(per_cpu_var(var)) __tmp;				\
		preempt_disable();					\
		__tmp = __get_cpu_var(var);				\
		preempt_enable();					\
		__tmp;							\
	})

#define x86_write_percpu(var, val)					\
	do {								\
		preempt_disable();					\
		__get_cpu_var(var) = (val);				\
		preempt_enable();					\
	} while(0)

#else /* CONFIG_X86_64 */

#ifdef __ASSEMBLY__

@@ -74,41 +25,25 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
 */
#ifdef CONFIG_SMP
#define PER_CPU(var, reg)						\
	movl %fs:per_cpu__##this_cpu_off, reg;		\
	__percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg;	\
	lea per_cpu__##var(reg), reg
#define PER_CPU_VAR(var)	%fs:per_cpu__##var
#define PER_CPU_VAR(var)	%__percpu_seg:per_cpu__##var
#else /* ! SMP */
#define PER_CPU(var, reg)						\
	movl $per_cpu__##var, reg
	__percpu_mov_op $per_cpu__##var, reg
#define PER_CPU_VAR(var)	per_cpu__##var
#endif	/* SMP */

#else /* ...!ASSEMBLY */

/*
 * PER_CPU finds an address of a per-cpu variable.
 *
 * Args:
 *    var - variable name
 *    cpu - 32bit register containing the current CPU number
 *
 * The resulting address is stored in the "cpu" argument.
 *
 * Example:
 *    PER_CPU(cpu_gdt_descr, %ebx)
 */
#ifdef CONFIG_SMP
#include <linux/stringify.h>

#ifdef CONFIG_SMP
#define __percpu_seg_str	"%%"__stringify(__percpu_seg)":"
#define __my_cpu_offset		x86_read_percpu(this_cpu_off)

/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
#define __percpu_seg "%%fs:"

#else  /* !SMP */

#define __percpu_seg ""

#endif	/* SMP */
#else
#define __percpu_seg_str
#endif

#include <asm-generic/percpu.h>

@@ -128,20 +63,25 @@ do { \
	}						\
	switch (sizeof(var)) {				\
	case 1:						\
		asm(op "b %1,"__percpu_seg"%0"		\
		asm(op "b %1,"__percpu_seg_str"%0"	\
		    : "+m" (var)			\
		    : "ri" ((T__)val));			\
		break;					\
	case 2:						\
		asm(op "w %1,"__percpu_seg"%0"		\
		asm(op "w %1,"__percpu_seg_str"%0"	\
		    : "+m" (var)			\
		    : "ri" ((T__)val));			\
		break;					\
	case 4:						\
		asm(op "l %1,"__percpu_seg"%0"		\
		asm(op "l %1,"__percpu_seg_str"%0"	\
		    : "+m" (var)			\
		    : "ri" ((T__)val));			\
		break;					\
	case 8:						\
		asm(op "q %1,"__percpu_seg_str"%0"	\
		    : "+m" (var)			\
		    : "r" ((T__)val));			\
		break;					\
	default: __bad_percpu_size();			\
	}						\
} while (0)
@@ -151,17 +91,22 @@ do { \
	typeof(var) ret__;				\
	switch (sizeof(var)) {				\
	case 1:						\
		asm(op "b "__percpu_seg"%1,%0"		\
		asm(op "b "__percpu_seg_str"%1,%0"	\
		    : "=r" (ret__)			\
		    : "m" (var));			\
		break;					\
	case 2:						\
		asm(op "w "__percpu_seg"%1,%0"		\
		asm(op "w "__percpu_seg_str"%1,%0"	\
		    : "=r" (ret__)			\
		    : "m" (var));			\
		break;					\
	case 4:						\
		asm(op "l "__percpu_seg"%1,%0"		\
		asm(op "l "__percpu_seg_str"%1,%0"	\
		    : "=r" (ret__)			\
		    : "m" (var));			\
		break;					\
	case 8:						\
		asm(op "q "__percpu_seg_str"%1,%0"	\
		    : "=r" (ret__)			\
		    : "m" (var));			\
		break;					\
@@ -175,8 +120,14 @@ do { \
#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)

#ifdef CONFIG_X86_64
extern void load_pda_offset(int cpu);
#else
static inline void load_pda_offset(int cpu) { }
#endif

#endif /* !__ASSEMBLY__ */
#endif /* !CONFIG_X86_64 */

#ifdef CONFIG_SMP

+0 −1
Original line number Diff line number Diff line
@@ -55,7 +55,6 @@ int main(void)
	ENTRY(irqcount);
	ENTRY(cpunumber);
	ENTRY(irqstackptr);
	ENTRY(data_offset);
	DEFINE(pda_size, sizeof(struct x8664_pda));
	BLANK();
#undef ENTRY
+4 −3
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@
#include <asm/irqflags.h>
#include <asm/paravirt.h>
#include <asm/ftrace.h>
#include <asm/percpu.h>

/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
#include <linux/elf-em.h>
@@ -1072,10 +1073,10 @@ ENTRY(\sym)
	TRACE_IRQS_OFF
	movq %rsp,%rdi		/* pt_regs pointer */
	xorl %esi,%esi		/* no error code */
	movq %gs:pda_data_offset, %rbp
	subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
	PER_CPU(init_tss, %rbp)
	subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
	call \do_sym
	addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
	addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
	jmp paranoid_exit	/* %ebx: no swapgs flag */
	CFI_ENDPROC
END(\sym)
+0 −2
Original line number Diff line number Diff line
@@ -38,8 +38,6 @@ void __init x86_64_init_pda(void)
#else
	cpu_pda(0) = &_boot_cpu_pda;
#endif
	cpu_pda(0)->data_offset =
		(unsigned long)(__per_cpu_load - __per_cpu_start);
	pda_init(0);
}

Loading