Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3d7e5fc3 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/asm changes from Ingo Molnar:
 "Main changes:

   - Apply low level mutex optimization on x86-64, by Wedson Almeida
     Filho.

   - Change bitops to be naturally 'long', by H Peter Anvin.

   - Add TSX-NI opcodes support to the x86 (instrumentation) decoder, by
     Masami Hiramatsu.

   - Add clang compatibility adjustments/workarounds, by Jan-Simon
     Möller"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, doc: Update uaccess.h comment to reflect clang changes
  x86, asm: Fix a compilation issue with clang
  x86, asm: Extend definitions of _ASM_* with a raw format
  x86, insn: Add new opcodes as of June, 2013
  x86/ia32/asm: Remove unused argument in macro
  x86, bitops: Change bitops to be native operand size
  x86: Use asm-goto to implement mutex fast path on x86-64
parents 6924a467 f69fa9a9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -452,7 +452,7 @@ ia32_badsys:

	CFI_ENDPROC
	
	.macro PTREGSCALL label, func, arg
	.macro PTREGSCALL label, func
	ALIGN
GLOBAL(\label)
	leaq \func(%rip),%rax
+5 −1
Original line number Diff line number Diff line
@@ -3,21 +3,25 @@

#ifdef __ASSEMBLY__
# define __ASM_FORM(x)	x
# define __ASM_FORM_RAW(x)     x
# define __ASM_FORM_COMMA(x) x,
#else
# define __ASM_FORM(x)	" " #x " "
# define __ASM_FORM_RAW(x)     #x
# define __ASM_FORM_COMMA(x) " " #x ","
#endif

#ifdef CONFIG_X86_32
# define __ASM_SEL(a,b) __ASM_FORM(a)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
#else
# define __ASM_SEL(a,b) __ASM_FORM(b)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
#endif

#define __ASM_SIZE(inst, ...)	__ASM_SEL(inst##l##__VA_ARGS__, \
					  inst##q##__VA_ARGS__)
#define __ASM_REG(reg)		__ASM_SEL(e##reg, r##reg)
#define __ASM_REG(reg)         __ASM_SEL_RAW(e##reg, r##reg)

#define _ASM_PTR	__ASM_SEL(.long, .quad)
#define _ASM_ALIGN	__ASM_SEL(.balign 4, .balign 8)
+27 −19
Original line number Diff line number Diff line
@@ -15,6 +15,14 @@
#include <linux/compiler.h>
#include <asm/alternative.h>

#if BITS_PER_LONG == 32
# define _BITOPS_LONG_SHIFT 5
#elif BITS_PER_LONG == 64
# define _BITOPS_LONG_SHIFT 6
#else
# error "Unexpected BITS_PER_LONG"
#endif

#define BIT_64(n)			(U64_C(1) << (n))

/*
@@ -59,7 +67,7 @@
 * restricted to acting on a single-word quantity.
 */
static __always_inline void
set_bit(unsigned int nr, volatile unsigned long *addr)
set_bit(long nr, volatile unsigned long *addr)
{
	if (IS_IMMEDIATE(nr)) {
		asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -81,7 +89,7 @@ set_bit(unsigned int nr, volatile unsigned long *addr)
 * If it's called on the same region of memory simultaneously, the effect
 * may be that only one operation succeeds.
 */
static inline void __set_bit(int nr, volatile unsigned long *addr)
static inline void __set_bit(long nr, volatile unsigned long *addr)
{
	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
}
@@ -97,7 +105,7 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
 * in order to ensure changes are visible on other processors.
 */
static __always_inline void
clear_bit(int nr, volatile unsigned long *addr)
clear_bit(long nr, volatile unsigned long *addr)
{
	if (IS_IMMEDIATE(nr)) {
		asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -118,13 +126,13 @@ clear_bit(int nr, volatile unsigned long *addr)
 * clear_bit() is atomic and implies release semantics before the memory
 * operation. It can be used for an unlock.
 */
static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
	barrier();
	clear_bit(nr, addr);
}

static inline void __clear_bit(int nr, volatile unsigned long *addr)
static inline void __clear_bit(long nr, volatile unsigned long *addr)
{
	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
}
@@ -141,7 +149,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
 * No memory barrier is required here, because x86 cannot reorder stores past
 * older loads. Same principle as spin_unlock.
 */
static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
	barrier();
	__clear_bit(nr, addr);
@@ -159,7 +167,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
 * If it's called on the same region of memory simultaneously, the effect
 * may be that only one operation succeeds.
 */
static inline void __change_bit(int nr, volatile unsigned long *addr)
static inline void __change_bit(long nr, volatile unsigned long *addr)
{
	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
}
@@ -173,7 +181,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
 * Note that @nr may be almost arbitrarily large; this function is not
 * restricted to acting on a single-word quantity.
 */
static inline void change_bit(int nr, volatile unsigned long *addr)
static inline void change_bit(long nr, volatile unsigned long *addr)
{
	if (IS_IMMEDIATE(nr)) {
		asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -194,7 +202,7 @@ static inline void change_bit(int nr, volatile unsigned long *addr)
 * This operation is atomic and cannot be reordered.
 * It also implies a memory barrier.
 */
static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
	int oldbit;

@@ -212,7 +220,7 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
 * This is the same as test_and_set_bit on x86.
 */
static __always_inline int
test_and_set_bit_lock(int nr, volatile unsigned long *addr)
test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
	return test_and_set_bit(nr, addr);
}
@@ -226,7 +234,7 @@ test_and_set_bit_lock(int nr, volatile unsigned long *addr)
 * If two examples of this operation race, one can appear to succeed
 * but actually fail.  You must protect multiple accesses with a lock.
 */
static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
	int oldbit;

@@ -245,7 +253,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
 * This operation is atomic and cannot be reordered.
 * It also implies a memory barrier.
 */
static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
	int oldbit;

@@ -272,7 +280,7 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
 * accessed from a hypervisor on the same CPU if running in a VM: don't change
 * this without also updating arch/x86/kernel/kvm.c
 */
static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
	int oldbit;

@@ -284,7 +292,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
}

/* WARNING: non atomic and it can be reordered! */
static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
	int oldbit;

@@ -304,7 +312,7 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
 * This operation is atomic and cannot be reordered.
 * It also implies a memory barrier.
 */
static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
	int oldbit;

@@ -315,13 +323,13 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
	return oldbit;
}

static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
{
	return ((1UL << (nr % BITS_PER_LONG)) &
		(addr[nr / BITS_PER_LONG])) != 0;
	return ((1UL << (nr & (BITS_PER_LONG-1))) &
		(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}

static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
	int oldbit;

+30 −0
Original line number Diff line number Diff line
@@ -16,6 +16,20 @@
 *
 * Atomically decrements @v and calls <fail_fn> if the result is negative.
 */
#ifdef CC_HAVE_ASM_GOTO
static inline void __mutex_fastpath_lock(atomic_t *v,
					 void (*fail_fn)(atomic_t *))
{
	asm volatile goto(LOCK_PREFIX "   decl %0\n"
			  "   jns %l[exit]\n"
			  : : "m" (v->counter)
			  : "memory", "cc"
			  : exit);
	fail_fn(v);
exit:
	return;
}
#else
#define __mutex_fastpath_lock(v, fail_fn)			\
do {								\
	unsigned long dummy;					\
@@ -32,6 +46,7 @@ do { \
		     : "rax", "rsi", "rdx", "rcx",		\
		       "r8", "r9", "r10", "r11", "memory");	\
} while (0)
#endif

/**
 *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
@@ -56,6 +71,20 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
 *
 * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
 */
#ifdef CC_HAVE_ASM_GOTO
static inline void __mutex_fastpath_unlock(atomic_t *v,
					   void (*fail_fn)(atomic_t *))
{
	asm volatile goto(LOCK_PREFIX "   incl %0\n"
			  "   jg %l[exit]\n"
			  : : "m" (v->counter)
			  : "memory", "cc"
			  : exit);
	fail_fn(v);
exit:
	return;
}
#else
#define __mutex_fastpath_unlock(v, fail_fn)			\
do {								\
	unsigned long dummy;					\
@@ -72,6 +101,7 @@ do { \
		     : "rax", "rsi", "rdx", "rcx",		\
		       "r8", "r9", "r10", "r11", "memory");	\
} while (0)
#endif

#define __mutex_slowpath_needs_to_unlock()	1

+12 −12
Original line number Diff line number Diff line
@@ -26,9 +26,9 @@
 * Note that @nr may be almost arbitrarily large; this function is not
 * restricted to acting on a single-word quantity.
 */
static inline void sync_set_bit(int nr, volatile unsigned long *addr)
static inline void sync_set_bit(long nr, volatile unsigned long *addr)
{
	asm volatile("lock; btsl %1,%0"
	asm volatile("lock; bts %1,%0"
		     : "+m" (ADDR)
		     : "Ir" (nr)
		     : "memory");
@@ -44,9 +44,9 @@ static inline void sync_set_bit(int nr, volatile unsigned long *addr)
 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
 * in order to ensure changes are visible on other processors.
 */
static inline void sync_clear_bit(int nr, volatile unsigned long *addr)
static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
{
	asm volatile("lock; btrl %1,%0"
	asm volatile("lock; btr %1,%0"
		     : "+m" (ADDR)
		     : "Ir" (nr)
		     : "memory");
@@ -61,9 +61,9 @@ static inline void sync_clear_bit(int nr, volatile unsigned long *addr)
 * Note that @nr may be almost arbitrarily large; this function is not
 * restricted to acting on a single-word quantity.
 */
static inline void sync_change_bit(int nr, volatile unsigned long *addr)
static inline void sync_change_bit(long nr, volatile unsigned long *addr)
{
	asm volatile("lock; btcl %1,%0"
	asm volatile("lock; btc %1,%0"
		     : "+m" (ADDR)
		     : "Ir" (nr)
		     : "memory");
@@ -77,11 +77,11 @@ static inline void sync_change_bit(int nr, volatile unsigned long *addr)
 * This operation is atomic and cannot be reordered.
 * It also implies a memory barrier.
 */
static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr)
static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
{
	int oldbit;

	asm volatile("lock; btsl %2,%1\n\tsbbl %0,%0"
	asm volatile("lock; bts %2,%1\n\tsbbl %0,%0"
		     : "=r" (oldbit), "+m" (ADDR)
		     : "Ir" (nr) : "memory");
	return oldbit;
@@ -95,11 +95,11 @@ static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr)
 * This operation is atomic and cannot be reordered.
 * It also implies a memory barrier.
 */
static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr)
static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
	int oldbit;

	asm volatile("lock; btrl %2,%1\n\tsbbl %0,%0"
	asm volatile("lock; btr %2,%1\n\tsbbl %0,%0"
		     : "=r" (oldbit), "+m" (ADDR)
		     : "Ir" (nr) : "memory");
	return oldbit;
@@ -113,11 +113,11 @@ static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr)
 * This operation is atomic and cannot be reordered.
 * It also implies a memory barrier.
 */
static inline int sync_test_and_change_bit(int nr, volatile unsigned long *addr)
static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
{
	int oldbit;

	asm volatile("lock; btcl %2,%1\n\tsbbl %0,%0"
	asm volatile("lock; btc %2,%1\n\tsbbl %0,%0"
		     : "=r" (oldbit), "+m" (ADDR)
		     : "Ir" (nr) : "memory");
	return oldbit;
Loading