Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2ff07209 authored by Russell King's avatar Russell King
Browse files

Merge branch 'cmpxchg64' of git://git.linaro.org/people/nico/linux into devel-stable

parents b0af8dfd 40fb79c8
Loading
Loading
Loading
Loading
+267 −0
Original line number Diff line number Diff line
Kernel-provided User Helpers
============================

These are segment of kernel provided user code reachable from user space
at a fixed address in kernel memory.  This is used to provide user space
with some operations which require kernel help because of unimplemented
native feature and/or instructions in many ARM CPUs. The idea is for this
code to be executed directly in user mode for best efficiency but which is
too intimate with the kernel counter part to be left to user libraries.
In fact this code might even differ from one CPU to another depending on
the available instruction set, or whether it is a SMP systems. In other
words, the kernel reserves the right to change this code as needed without
warning. Only the entry points and their results as documented here are
guaranteed to be stable.

This is different from (but doesn't preclude) a full blown VDSO
implementation, however a VDSO would prevent some assembly tricks with
constants that allows for efficient branching to those code segments. And
since those code segments only use a few cycles before returning to user
code, the overhead of a VDSO indirect far call would add a measurable
overhead to such minimalistic operations.

User space is expected to bypass those helpers and implement those things
inline (either in the code emitted directly by the compiler, or part of
the implementation of a library call) when optimizing for a recent enough
processor that has the necessary native support, but only if resulting
binaries are already to be incompatible with earlier ARM processors due to
useage of similar native instructions for other things.  In other words
don't make binaries unable to run on earlier processors just for the sake
of not using these kernel helpers if your compiled code is not going to
use new instructions for other purpose.

New helpers may be added over time, so an older kernel may be missing some
helpers present in a newer kernel.  For this reason, programs must check
the value of __kuser_helper_version (see below) before assuming that it is
safe to call any particular helper.  This check should ideally be
performed only once at process startup time, and execution aborted early
if the required helpers are not provided by the kernel version that
process is running on.

kuser_helper_version
--------------------

Location:	0xffff0ffc

Reference declaration:

  extern int32_t __kuser_helper_version;

Definition:

  This field contains the number of helpers being implemented by the
  running kernel.  User space may read this to determine the availability
  of a particular helper.

Usage example:

#define __kuser_helper_version (*(int32_t *)0xffff0ffc)

void check_kuser_version(void)
{
	if (__kuser_helper_version < 2) {
		fprintf(stderr, "can't do atomic operations, kernel too old\n");
		abort();
	}
}

Notes:

  User space may assume that the value of this field never changes
  during the lifetime of any single process.  This means that this
  field can be read once during the initialisation of a library or
  startup phase of a program.

kuser_get_tls
-------------

Location:	0xffff0fe0

Reference prototype:

  void * __kuser_get_tls(void);

Input:

  lr = return address

Output:

  r0 = TLS value

Clobbered registers:

  none

Definition:

  Get the TLS value as previously set via the __ARM_NR_set_tls syscall.

Usage example:

typedef void * (__kuser_get_tls_t)(void);
#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)

void foo()
{
	void *tls = __kuser_get_tls();
	printf("TLS = %p\n", tls);
}

Notes:

  - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).

kuser_cmpxchg
-------------

Location:	0xffff0fc0

Reference prototype:

  int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);

Input:

  r0 = oldval
  r1 = newval
  r2 = ptr
  lr = return address

Output:

  r0 = success code (zero or non-zero)
  C flag = set if r0 == 0, clear if r0 != 0

Clobbered registers:

  r3, ip, flags

Definition:

  Atomically store newval in *ptr only if *ptr is equal to oldval.
  Return zero if *ptr was changed or non-zero if no exchange happened.
  The C flag is also set if *ptr was changed to allow for assembly
  optimization in the calling code.

Usage example:

typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)

int atomic_add(volatile int *ptr, int val)
{
	int old, new;

	do {
		old = *ptr;
		new = old + val;
	} while(__kuser_cmpxchg(old, new, ptr));

	return new;
}

Notes:

  - This routine already includes memory barriers as needed.

  - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).

kuser_memory_barrier
--------------------

Location:	0xffff0fa0

Reference prototype:

  void __kuser_memory_barrier(void);

Input:

  lr = return address

Output:

  none

Clobbered registers:

  none

Definition:

  Apply any needed memory barrier to preserve consistency with data modified
  manually and __kuser_cmpxchg usage.

Usage example:

typedef void (__kuser_dmb_t)(void);
#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)

Notes:

  - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).

kuser_cmpxchg64
---------------

Location:	0xffff0f60

Reference prototype:

  int __kuser_cmpxchg64(const int64_t *oldval,
                        const int64_t *newval,
                        volatile int64_t *ptr);

Input:

  r0 = pointer to oldval
  r1 = pointer to newval
  r2 = pointer to target value
  lr = return address

Output:

  r0 = success code (zero or non-zero)
  C flag = set if r0 == 0, clear if r0 != 0

Clobbered registers:

  r3, lr, flags

Definition:

  Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
  is equal to the 64-bit value pointed by *oldval.  Return zero if *ptr was
  changed or non-zero if no exchange happened.

  The C flag is also set if *ptr was changed to allow for assembly
  optimization in the calling code.

Usage example:

typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
                                  const int64_t *newval,
                                  volatile int64_t *ptr);
#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)

int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
{
	int64_t old, new;

	do {
		old = *ptr;
		new = old + val;
	} while(__kuser_cmpxchg64(&old, &new, ptr));

	return new;
}

Notes:

  - This routine already includes memory barriers as needed.

  - Due to the length of this sequence, this spans 2 conventional kuser
    "slots", therefore 0xffff0f80 is not used as a valid entry point.

  - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
+94 −152
Original line number Diff line number Diff line
@@ -383,7 +383,7 @@ ENDPROC(__pabt_svc)
	.endm

	.macro	kuser_cmpxchg_check
#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
#if !defined(CONFIG_CPU_32v6K) && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
#ifndef CONFIG_MMU
#warning "NPTL on non MMU needs fixing"
#else
@@ -392,7 +392,7 @@ ENDPROC(__pabt_svc)
	@ perform a quick test inline since it should be false
	@ 99.9999% of the time.  The rest is done out of line.
	cmp	r2, #TASK_SIZE
	blhs	kuser_cmpxchg_fixup
	blhs	kuser_cmpxchg64_fixup
#endif
#endif
	.endm
@@ -758,31 +758,12 @@ ENDPROC(__switch_to)
/*
 * User helpers.
 *
 * These are segment of kernel provided user code reachable from user space
 * at a fixed address in kernel memory.  This is used to provide user space
 * with some operations which require kernel help because of unimplemented
 * native feature and/or instructions in many ARM CPUs. The idea is for
 * this code to be executed directly in user mode for best efficiency but
 * which is too intimate with the kernel counter part to be left to user
 * libraries.  In fact this code might even differ from one CPU to another
 * depending on the available  instruction set and restrictions like on
 * SMP systems.  In other words, the kernel reserves the right to change
 * this code as needed without warning. Only the entry points and their
 * results are guaranteed to be stable.
 *
 * Each segment is 32-byte aligned and will be moved to the top of the high
 * vector page.  New segments (if ever needed) must be added in front of
 * existing ones.  This mechanism should be used only for things that are
 * really small and justified, and not be abused freely.
 *
 * User space is expected to implement those things inline when optimizing
 * for a processor that has the necessary native support, but only if such
 * resulting binaries are already to be incompatible with earlier ARM
 * processors due to the use of unsupported instructions other than what
 * is provided here.  In other words don't make binaries unable to run on
 * earlier processors just for the sake of not using these kernel helpers
 * if your compiled code is not going to use the new instructions for other
 * purpose.
 * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
 */
 THUMB(	.arm	)

@@ -799,96 +780,103 @@ ENDPROC(__switch_to)
__kuser_helper_start:

/*
 * Reference prototype:
 *
 *	void __kernel_memory_barrier(void)
 *
 * Input:
 *
 *	lr = return address
 *
 * Output:
 *
 *	none
 *
 * Clobbered:
 *
 *	none
 *
 * Definition and user space usage example:
 *
 *	typedef void (__kernel_dmb_t)(void);
 *	#define __kernel_dmb (*(__kernel_dmb_t *)0xffff0fa0)
 *
 * Apply any needed memory barrier to preserve consistency with data modified
 * manually and __kuser_cmpxchg usage.
 *
 * This could be used as follows:
 *
 * #define __kernel_dmb() \
 *         asm volatile ( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #95" \
 *	        : : : "r0", "lr","cc" )
 * Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular
 * kuser "slots", therefore 0xffff0f80 is not used as a valid entry point.
 */

__kuser_memory_barrier:				@ 0xffff0fa0
__kuser_cmpxchg64:				@ 0xffff0f60

#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)

	/*
	 * Poor you.  No fast solution possible...
	 * The kernel itself must perform the operation.
	 * A special ghost syscall is used for that (see traps.c).
	 */
	stmfd	sp!, {r7, lr}
	ldr	r7, 1f			@ it's 20 bits
	swi	__ARM_NR_cmpxchg64
	ldmfd	sp!, {r7, pc}
1:	.word	__ARM_NR_cmpxchg64

#elif defined(CONFIG_CPU_32v6K)

	stmfd	sp!, {r4, r5, r6, r7}
	ldrd	r4, r5, [r0]			@ load old val
	ldrd	r6, r7, [r1]			@ load new val
	smp_dmb	arm
	usr_ret	lr
1:	ldrexd	r0, r1, [r2]			@ load current val
	eors	r3, r0, r4			@ compare with oldval (1)
	eoreqs	r3, r1, r5			@ compare with oldval (2)
	strexdeq r3, r6, r7, [r2]		@ store newval if eq
	teqeq	r3, #1				@ success?
	beq	1b				@ if no then retry
	smp_dmb	arm
	rsbs	r0, r3, #0			@ set returned val and C flag
	ldmfd	sp!, {r4, r5, r6, r7}
	bx	lr

	.align	5
#elif !defined(CONFIG_SMP)

#ifdef CONFIG_MMU

	/*
 * Reference prototype:
 *
 *	int __kernel_cmpxchg(int oldval, int newval, int *ptr)
 *
 * Input:
 *
 *	r0 = oldval
 *	r1 = newval
 *	r2 = ptr
 *	lr = return address
 *
 * Output:
 *
 *	r0 = returned value (zero or non-zero)
 *	C flag = set if r0 == 0, clear if r0 != 0
 *
 * Clobbered:
 *
 *	r3, ip, flags
 *
 * Definition and user space usage example:
 *
 *	typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
 *	#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
 *
 * Atomically store newval in *ptr if *ptr is equal to oldval for user space.
 * Return zero if *ptr was changed or non-zero if no exchange happened.
 * The C flag is also set if *ptr was changed to allow for assembly
 * optimization in the calling code.
 *
 * Notes:
 *
 *    - This routine already includes memory barriers as needed.
 *
 * For example, a user space atomic_add implementation could look like this:
 *
 * #define atomic_add(ptr, val) \
 *	({ register unsigned int *__ptr asm("r2") = (ptr); \
 *	   register unsigned int __result asm("r1"); \
 *	   asm volatile ( \
 *	       "1: @ atomic_add\n\t" \
 *	       "ldr	r0, [r2]\n\t" \
 *	       "mov	r3, #0xffff0fff\n\t" \
 *	       "add	lr, pc, #4\n\t" \
 *	       "add	r1, r0, %2\n\t" \
 *	       "add	pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \
 *	       "bcc	1b" \
 *	       : "=&r" (__result) \
 *	       : "r" (__ptr), "rIL" (val) \
 *	       : "r0","r3","ip","lr","cc","memory" ); \
 *	   __result; })
	 * The only thing that can break atomicity in this cmpxchg64
	 * implementation is either an IRQ or a data abort exception
	 * causing another process/thread to be scheduled in the middle of
	 * the critical sequence.  The same strategy as for cmpxchg is used.
	 */
	stmfd	sp!, {r4, r5, r6, lr}
	ldmia	r0, {r4, r5}			@ load old val
	ldmia	r1, {r6, lr}			@ load new val
1:	ldmia	r2, {r0, r1}			@ load current val
	eors	r3, r0, r4			@ compare with oldval (1)
	eoreqs	r3, r1, r5			@ compare with oldval (2)
2:	stmeqia	r2, {r6, lr}			@ store newval if eq
	rsbs	r0, r3, #0			@ set return val and C flag
	ldmfd	sp!, {r4, r5, r6, pc}

	.text
kuser_cmpxchg64_fixup:
	@ Called from kuser_cmpxchg_fixup.
	@ r2 = address of interrupted insn (must be preserved).
	@ sp = saved regs. r7 and r8 are clobbered.
	@ 1b = first critical insn, 2b = last critical insn.
	@ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b.
	mov	r7, #0xffff0fff
	sub	r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64)))
	subs	r8, r2, r7
	rsbcss	r8, r8, #(2b - 1b)
	strcs	r7, [sp, #S_PC]
#if __LINUX_ARM_ARCH__ < 6
	bcc	kuser_cmpxchg32_fixup
#endif
	mov	pc, lr
	.previous

#else
#warning "NPTL on non MMU needs fixing"
	mov	r0, #-1
	adds	r0, r0, #0
	usr_ret	lr
#endif

#else
#error "incoherent kernel configuration"
#endif

	/* pad to next slot */
	.rept	(16 - (. - __kuser_cmpxchg64)/4)
	.word	0
	.endr

	.align	5

__kuser_memory_barrier:				@ 0xffff0fa0
	smp_dmb	arm
	usr_ret	lr

	.align	5

__kuser_cmpxchg:				@ 0xffff0fc0

@@ -925,7 +913,7 @@ __kuser_cmpxchg: @ 0xffff0fc0
	usr_ret	lr

	.text
kuser_cmpxchg_fixup:
kuser_cmpxchg32_fixup:
	@ Called from kuser_cmpxchg_check macro.
	@ r2 = address of interrupted insn (must be preserved).
	@ sp = saved regs. r7 and r8 are clobbered.
@@ -963,39 +951,6 @@ kuser_cmpxchg_fixup:

	.align	5

/*
 * Reference prototype:
 *
 *	int __kernel_get_tls(void)
 *
 * Input:
 *
 *	lr = return address
 *
 * Output:
 *
 *	r0 = TLS value
 *
 * Clobbered:
 *
 *	none
 *
 * Definition and user space usage example:
 *
 *	typedef int (__kernel_get_tls_t)(void);
 *	#define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0)
 *
 * Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
 *
 * This could be used as follows:
 *
 * #define __kernel_get_tls() \
 *	({ register unsigned int __val asm("r0"); \
 *         asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \
 *	        : "=r" (__val) : : "lr","cc" ); \
 *	   __val; })
 */

__kuser_get_tls:				@ 0xffff0fe0
	ldr	r0, [pc, #(16 - 8)]	@ read TLS, set in kuser_get_tls_init
	usr_ret	lr
@@ -1004,19 +959,6 @@ __kuser_get_tls: @ 0xffff0fe0
	.word	0			@ 0xffff0ff0 software TLS value, then
	.endr				@ pad up to __kuser_helper_version

/*
 * Reference declaration:
 *
 *	extern unsigned int __kernel_helper_version;
 *
 * Definition and user space usage example:
 *
 *	#define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
 *
 * User space may read this to determine the curent number of helpers
 * available.
 */

__kuser_helper_version:				@ 0xffff0ffc
	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)