Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 72eb6a79 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (30 commits)
  gameport: use this_cpu_read instead of lookup
  x86: udelay: Use this_cpu_read to avoid address calculation
  x86: Use this_cpu_inc_return for nmi counter
  x86: Replace uses of current_cpu_data with this_cpu ops
  x86: Use this_cpu_ops to optimize code
  vmstat: User per cpu atomics to avoid interrupt disable / enable
  irq_work: Use per cpu atomics instead of regular atomics
  cpuops: Use cmpxchg for xchg to avoid lock semantics
  x86: this_cpu_cmpxchg and this_cpu_xchg operations
  percpu: Generic this_cpu_cmpxchg() and this_cpu_xchg support
  percpu,x86: relocate this_cpu_add_return() and friends
  connector: Use this_cpu operations
  xen: Use this_cpu_inc_return
  taskstats: Use this_cpu_ops
  random: Use this_cpu_inc_return
  fs: Use this_cpu_inc_return in buffer.c
  highmem: Use this_cpu_xx_return() operations
  vmstat: Use this_cpu_inc_return for vm statistics
  x86: Support for this_cpu_add, sub, dec, inc_return
  percpu: Generic support for this_cpu_add, sub, dec, inc_return
  ...

Fixed up conflicts: in arch/x86/kernel/{apic/nmi.c, apic/x2apic_uv_x.c, process.c}
as per Tejun.
parents 23d69b09 55ee4ef3
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -4653,6 +4653,16 @@ S: Maintained
F:	crypto/pcrypt.c
F:	include/crypto/pcrypt.h

PER-CPU MEMORY ALLOCATOR
M:	Tejun Heo <tj@kernel.org>
M:	Christoph Lameter <cl@linux-foundation.org>
L:	linux-kernel@vger.kernel.org
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
S:	Maintained
F:	include/linux/percpu*.h
F:	mm/percpu*.c
F:	arch/*/include/asm/percpu.h

PER-TASK DELAY ACCOUNTING
M:	Balbir Singh <balbir@linux.vnet.ibm.com>
S:	Maintained
+3 −0
Original line number Diff line number Diff line
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_CMPXCHG
	def_bool X86_64 || (X86_32 && !M386)

config CMPXCHG_LOCAL
	def_bool X86_64 || (X86_32 && !M386)

config X86_L1_CACHE_SHIFT
	int
	default "7" if MPENTIUM4 || MPSC
+1 −1
Original line number Diff line number Diff line
@@ -94,7 +94,7 @@ static inline void hw_breakpoint_disable(void)

static inline int hw_breakpoint_active(void)
{
	return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
	return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
}

extern void aout_dump_debugregs(struct user *dump);
+157 −1
Original line number Diff line number Diff line
@@ -229,6 +229,125 @@ do { \
	}						\
})

/*
 * Add return operation
 */
#define percpu_add_return_op(var, val)					\
({									\
	typeof(var) paro_ret__ = val;					\
	switch (sizeof(var)) {						\
	case 1:								\
		asm("xaddb %0, "__percpu_arg(1)				\
			    : "+q" (paro_ret__), "+m" (var)		\
			    : : "memory");				\
		break;							\
	case 2:								\
		asm("xaddw %0, "__percpu_arg(1)				\
			    : "+r" (paro_ret__), "+m" (var)		\
			    : : "memory");				\
		break;							\
	case 4:								\
		asm("xaddl %0, "__percpu_arg(1)				\
			    : "+r" (paro_ret__), "+m" (var)		\
			    : : "memory");				\
		break;							\
	case 8:								\
		asm("xaddq %0, "__percpu_arg(1)				\
			    : "+re" (paro_ret__), "+m" (var)		\
			    : : "memory");				\
		break;							\
	default: __bad_percpu_size();					\
	}								\
	paro_ret__ += val;						\
	paro_ret__;							\
})

/*
 * xchg is implemented using cmpxchg without a lock prefix. xchg is
 * expensive due to the implied lock prefix.  The processor cannot prefetch
 * cachelines if xchg is used.
 */
#define percpu_xchg_op(var, nval)					\
({									\
	typeof(var) pxo_ret__;						\
	typeof(var) pxo_new__ = (nval);					\
	switch (sizeof(var)) {						\
	case 1:								\
		asm("\n1:mov "__percpu_arg(1)",%%al"			\
		    "\n\tcmpxchgb %2, "__percpu_arg(1)			\
		    "\n\tjnz 1b"					\
			    : "=a" (pxo_ret__), "+m" (var)		\
			    : "q" (pxo_new__)				\
			    : "memory");				\
		break;							\
	case 2:								\
		asm("\n1:mov "__percpu_arg(1)",%%ax"			\
		    "\n\tcmpxchgw %2, "__percpu_arg(1)			\
		    "\n\tjnz 1b"					\
			    : "=a" (pxo_ret__), "+m" (var)		\
			    : "r" (pxo_new__)				\
			    : "memory");				\
		break;							\
	case 4:								\
		asm("\n1:mov "__percpu_arg(1)",%%eax"			\
		    "\n\tcmpxchgl %2, "__percpu_arg(1)			\
		    "\n\tjnz 1b"					\
			    : "=a" (pxo_ret__), "+m" (var)		\
			    : "r" (pxo_new__)				\
			    : "memory");				\
		break;							\
	case 8:								\
		asm("\n1:mov "__percpu_arg(1)",%%rax"			\
		    "\n\tcmpxchgq %2, "__percpu_arg(1)			\
		    "\n\tjnz 1b"					\
			    : "=a" (pxo_ret__), "+m" (var)		\
			    : "r" (pxo_new__)				\
			    : "memory");				\
		break;							\
	default: __bad_percpu_size();					\
	}								\
	pxo_ret__;							\
})

/*
 * cmpxchg has no such implied lock semantics as a result it is much
 * more efficient for cpu local operations.
 */
#define percpu_cmpxchg_op(var, oval, nval)				\
({									\
	typeof(var) pco_ret__;						\
	typeof(var) pco_old__ = (oval);					\
	typeof(var) pco_new__ = (nval);					\
	switch (sizeof(var)) {						\
	case 1:								\
		asm("cmpxchgb %2, "__percpu_arg(1)			\
			    : "=a" (pco_ret__), "+m" (var)		\
			    : "q" (pco_new__), "0" (pco_old__)		\
			    : "memory");				\
		break;							\
	case 2:								\
		asm("cmpxchgw %2, "__percpu_arg(1)			\
			    : "=a" (pco_ret__), "+m" (var)		\
			    : "r" (pco_new__), "0" (pco_old__)		\
			    : "memory");				\
		break;							\
	case 4:								\
		asm("cmpxchgl %2, "__percpu_arg(1)			\
			    : "=a" (pco_ret__), "+m" (var)		\
			    : "r" (pco_new__), "0" (pco_old__)		\
			    : "memory");				\
		break;							\
	case 8:								\
		asm("cmpxchgq %2, "__percpu_arg(1)			\
			    : "=a" (pco_ret__), "+m" (var)		\
			    : "r" (pco_new__), "0" (pco_old__)		\
			    : "memory");				\
		break;							\
	default: __bad_percpu_size();					\
	}								\
	pco_ret__;							\
})

/*
 * percpu_read() makes gcc load the percpu variable every time it is
 * accessed while percpu_read_stable() allows the value to be cached.
@@ -267,6 +386,12 @@ do { \
#define __this_cpu_xor_1(pcp, val)	percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_2(pcp, val)	percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_4(pcp, val)	percpu_to_op("xor", (pcp), val)
/*
 * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
 * faster than an xchg with forced lock semantics.
 */
#define __this_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
#define __this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)

#define this_cpu_read_1(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_2(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
@@ -286,6 +411,11 @@ do { \
#define this_cpu_xor_1(pcp, val)	percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_2(pcp, val)	percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_4(pcp, val)	percpu_to_op("xor", (pcp), val)
#define this_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)

#define irqsafe_cpu_add_1(pcp, val)	percpu_add_op((pcp), val)
#define irqsafe_cpu_add_2(pcp, val)	percpu_add_op((pcp), val)
@@ -299,6 +429,31 @@ do { \
#define irqsafe_cpu_xor_1(pcp, val)	percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_2(pcp, val)	percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xor_4(pcp, val)	percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xchg_1(pcp, nval)	percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_xchg_2(pcp, nval)	percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_xchg_4(pcp, nval)	percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_xchg_8(pcp, nval)	percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)

#ifndef CONFIG_M386
#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
#define __this_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
#define __this_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)

#define this_cpu_add_return_1(pcp, val)	percpu_add_return_op(pcp, val)
#define this_cpu_add_return_2(pcp, val)	percpu_add_return_op(pcp, val)
#define this_cpu_add_return_4(pcp, val)	percpu_add_return_op(pcp, val)
#define this_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)

#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)
#endif /* !CONFIG_M386 */

/*
 * Per cpu atomic 64 bit operations are only available under 64 bit.
@@ -311,6 +466,7 @@ do { \
#define __this_cpu_and_8(pcp, val)	percpu_to_op("and", (pcp), val)
#define __this_cpu_or_8(pcp, val)	percpu_to_op("or", (pcp), val)
#define __this_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val)
#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)

#define this_cpu_read_8(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_write_8(pcp, val)	percpu_to_op("mov", (pcp), val)
@@ -318,12 +474,12 @@ do { \
#define this_cpu_and_8(pcp, val)	percpu_to_op("and", (pcp), val)
#define this_cpu_or_8(pcp, val)		percpu_to_op("or", (pcp), val)
#define this_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val)
#define this_cpu_add_return_8(pcp, val)	percpu_add_return_op(pcp, val)

#define irqsafe_cpu_add_8(pcp, val)	percpu_add_op((pcp), val)
#define irqsafe_cpu_and_8(pcp, val)	percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_or_8(pcp, val)	percpu_to_op("or", (pcp), val)
#define irqsafe_cpu_xor_8(pcp, val)	percpu_to_op("xor", (pcp), val)

#endif

/* This is not atomic against other CPUs -- CPU preemption needs to be off */
+1 −2
Original line number Diff line number Diff line
@@ -141,10 +141,9 @@ extern __u32 cpu_caps_set[NCAPINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu)		per_cpu(cpu_info, cpu)
#define current_cpu_data	__get_cpu_var(cpu_info)
#else
#define cpu_info		boot_cpu_data
#define cpu_data(cpu)		boot_cpu_data
#define current_cpu_data	boot_cpu_data
#endif

extern const struct seq_operations cpuinfo_op;
Loading