Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9a2533c3 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ARM fixes from Russell King:
 "This fixes various issues found during July"

* 'fixes' of git://git.linaro.org/people/rmk/linux-arm:
  ARM: 7479/1: mm: avoid NULL dereference when flushing gate_vma with VIVT caches
  ARM: Fix undefined instruction exception handling
  ARM: 7480/1: only call smp_send_stop() on SMP
  ARM: 7478/1: errata: extend workaround for erratum #720789
  ARM: 7477/1: vfp: Always save VFP state in vfp_pm_suspend on UP
  ARM: 7476/1: vfp: only clear vfp state for current cpu in vfp_pm_suspend
  ARM: 7468/1: ftrace: Trace function entry before updating index
  ARM: 7467/1: mutex: use generic xchg-based implementation for ARMv6+
  ARM: 7466/1: disable interrupt before spinning endlessly
  ARM: 7465/1: Handle >4GB memory sizes in device tree and mem=size@start option
parents d4fdc325 b74253f7
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -215,7 +215,9 @@ static inline void vivt_flush_cache_mm(struct mm_struct *mm)
static inline void
vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
	struct mm_struct *mm = vma->vm_mm;

	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
		__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
					vma->vm_flags);
}
@@ -223,7 +225,9 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
static inline void
vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
{
	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
	struct mm_struct *mm = vma->vm_mm;

	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
		unsigned long addr = user_addr & PAGE_MASK;
		__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
	}
+4 −115
Original line number Diff line number Diff line
@@ -7,121 +7,10 @@
 */
#ifndef _ASM_MUTEX_H
#define _ASM_MUTEX_H

#if __LINUX_ARM_ARCH__ < 6
/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
# include <asm-generic/mutex-xchg.h>
#else

/*
 * Attempting to lock a mutex on ARMv6+ can be done with a bastardized
 * atomic decrement (it is not a reliable atomic decrement but it satisfies
 * the defined semantics for our purpose, while being smaller and faster
 * than a real atomic decrement or atomic swap.  The idea is to attempt
 * decrementing the lock value only once.  If once decremented it isn't zero,
 * or if its store-back fails due to a dispute on the exclusive store, we
 * simply bail out immediately through the slow path where the lock will be
 * reattempted until it succeeds.
 */
static inline void
__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
	int __ex_flag, __res;

	__asm__ (

		"ldrex	%0, [%2]	\n\t"
		"sub	%0, %0, #1	\n\t"
		"strex	%1, %0, [%2]	"

		: "=&r" (__res), "=&r" (__ex_flag)
		: "r" (&(count)->counter)
		: "cc","memory" );

	__res |= __ex_flag;
	if (unlikely(__res != 0))
		fail_fn(count);
}

static inline int
__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
{
	int __ex_flag, __res;

	__asm__ (

		"ldrex	%0, [%2]	\n\t"
		"sub	%0, %0, #1	\n\t"
		"strex	%1, %0, [%2]	"

		: "=&r" (__res), "=&r" (__ex_flag)
		: "r" (&(count)->counter)
		: "cc","memory" );

	__res |= __ex_flag;
	if (unlikely(__res != 0))
		__res = fail_fn(count);
	return __res;
}

/*
 * Same trick is used for the unlock fast path. However the original value,
 * rather than the result, is used to test for success in order to have
 * better generated assembly.
 */
static inline void
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
	int __ex_flag, __res, __orig;

	__asm__ (

		"ldrex	%0, [%3]	\n\t"
		"add	%1, %0, #1	\n\t"
		"strex	%2, %1, [%3]	"

		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
		: "r" (&(count)->counter)
		: "cc","memory" );

	__orig |= __ex_flag;
	if (unlikely(__orig != 0))
		fail_fn(count);
}

/*
 * If the unlock was done on a contended lock, or if the unlock simply fails
 * then the mutex remains locked.
 * On pre-ARMv6 hardware this results in a swp-based implementation,
 * which is the most efficient. For ARMv6+, we emit a pair of exclusive
 * accesses instead.
 */
#define __mutex_slowpath_needs_to_unlock()	1

/*
 * For __mutex_fastpath_trylock we use another construct which could be
 * described as a "single value cmpxchg".
 *
 * This provides the needed trylock semantics like cmpxchg would, but it is
 * lighter and less generic than a true cmpxchg implementation.
 */
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
	int __ex_flag, __res, __orig;

	__asm__ (

		"1: ldrex	%0, [%3]	\n\t"
		"subs		%1, %0, #1	\n\t"
		"strexeq	%2, %1, [%3]	\n\t"
		"movlt		%0, #0		\n\t"
		"cmpeq		%2, #0		\n\t"
		"bgt		1b		"

		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
		: "r" (&count->counter)
		: "cc", "memory" );

	return __orig;
}

#endif
#include <asm-generic/mutex-xchg.h>
#endif
+2 −2
Original line number Diff line number Diff line
@@ -196,7 +196,7 @@ static const struct tagtable __tagtable_##fn __tag = { tag, fn }

struct membank {
	phys_addr_t start;
	unsigned long size;
	phys_addr_t size;
	unsigned int highmem;
};

@@ -217,7 +217,7 @@ extern struct meminfo meminfo;
#define bank_phys_end(bank)	((bank)->start + (bank)->size)
#define bank_phys_size(bank)	(bank)->size

extern int arm_add_memory(phys_addr_t start, unsigned long size);
extern int arm_add_memory(phys_addr_t start, phys_addr_t size);
extern void early_print(const char *str, ...);
extern void dump_machine_table(void);

+72 −39
Original line number Diff line number Diff line
@@ -244,6 +244,19 @@ svc_preempt:
	b	1b
#endif

__und_fault:
	@ Correct the PC such that it is pointing at the instruction
	@ which caused the fault.  If the faulting instruction was ARM
	@ the PC will be pointing at the next instruction, and have to
	@ subtract 4.  Otherwise, it is Thumb, and the PC will be
	@ pointing at the second half of the Thumb instruction.  We
	@ have to subtract 2.
	ldr	r2, [r0, #S_PC]
	sub	r2, r2, r1
	str	r2, [r0, #S_PC]
	b	do_undefinstr
ENDPROC(__und_fault)

	.align	5
__und_svc:
#ifdef CONFIG_KPROBES
@@ -264,22 +277,29 @@ __und_svc:
#ifndef CONFIG_THUMB2_KERNEL
	ldr	r0, [r4, #-4]
#else
	mov	r1, #2
	ldrh	r0, [r4, #-2]			@ Thumb instruction at LR - 2
	cmp	r0, #0xe800			@ 32-bit instruction if xx >= 0
	ldrhhs	r9, [r4]			@ bottom 16 bits
	orrhs	r0, r9, r0, lsl #16
	blo	__und_svc_fault
	ldrh	r9, [r4]			@ bottom 16 bits
	add	r4, r4, #2
	str	r4, [sp, #S_PC]
	orr	r0, r9, r0, lsl #16
#endif
	adr	r9, BSYM(1f)
	adr	r9, BSYM(__und_svc_finish)
	mov	r2, r4
	bl	call_fpe

	mov	r1, #4				@ PC correction to apply
__und_svc_fault:
	mov	r0, sp				@ struct pt_regs *regs
	bl	do_undefinstr
	bl	__und_fault

	@
	@ IRQs off again before pulling preserved data off the stack
	@
1:	disable_irq_notrace
__und_svc_finish:
	disable_irq_notrace

	@
	@ restore SPSR and restart the instruction
@@ -423,25 +443,33 @@ __und_usr:
	mov	r2, r4
	mov	r3, r5

	@ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the
	@      faulting instruction depending on Thumb mode.
	@ r3 = regs->ARM_cpsr
	@
	@ fall through to the emulation code, which returns using r9 if
	@ it has emulated the instruction, or the more conventional lr
	@ if we are to treat this as a real undefined instruction
	@
	@  r0 - instruction
	@ The emulation code returns using r9 if it has emulated the
	@ instruction, or the more conventional lr if we are to treat
	@ this as a real undefined instruction
	@
	adr	r9, BSYM(ret_from_exception)
	adr	lr, BSYM(__und_usr_unknown)

	tst	r3, #PSR_T_BIT			@ Thumb mode?
	itet	eq				@ explicit IT needed for the 1f label
	subeq	r4, r2, #4			@ ARM instr at LR - 4
	subne	r4, r2, #2			@ Thumb instr at LR - 2
1:	ldreqt	r0, [r4]
	bne	__und_usr_thumb
	sub	r4, r2, #4			@ ARM instr at LR - 4
1:	ldrt	r0, [r4]
#ifdef CONFIG_CPU_ENDIAN_BE8
	reveq	r0, r0				@ little endian instruction
	rev	r0, r0				@ little endian instruction
#endif
	beq	call_fpe
	@ r0 = 32-bit ARM instruction which caused the exception
	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
	@ r4 = PC value for the faulting instruction
	@ lr = 32-bit undefined instruction function
	adr	lr, BSYM(__und_usr_fault_32)
	b	call_fpe

__und_usr_thumb:
	@ Thumb instruction
	sub	r4, r2, #2			@ First half of thumb instr at LR - 2
#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
/*
 * Thumb-2 instruction handling.  Note that because pre-v6 and >= v6 platforms
@@ -455,7 +483,7 @@ __und_usr:
	ldr	r5, .LCcpu_architecture
	ldr	r5, [r5]
	cmp	r5, #CPU_ARCH_ARMv7
	blo	__und_usr_unknown
	blo	__und_usr_fault_16		@ 16bit undefined instruction
/*
 * The following code won't get run unless the running CPU really is v7, so
 * coding round the lack of ldrht on older arches is pointless.  Temporarily
@@ -463,15 +491,18 @@ __und_usr:
 */
	.arch	armv6t2
#endif
2:
 ARM(	ldrht	r5, [r4], #2	)
 THUMB(	ldrht	r5, [r4]	)
 THUMB(	add	r4, r4, #2	)
2:	ldrht	r5, [r4]
	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
	blo	__und_usr_unknown
3:	ldrht	r0, [r4]
	blo	__und_usr_fault_16		@ 16bit undefined instruction
3:	ldrht	r0, [r2]
	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
	orr	r0, r0, r5, lsl #16
	adr	lr, BSYM(__und_usr_fault_32)
	@ r0 = the two 16-bit Thumb instructions which caused the exception
	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
	@ r4 = PC value for the first 16-bit Thumb instruction
	@ lr = 32bit undefined instruction function

#if __LINUX_ARM_ARCH__ < 7
/* If the target arch was overridden, change it back: */
@@ -482,17 +513,13 @@ __und_usr:
#endif
#endif /* __LINUX_ARM_ARCH__ < 7 */
#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
	b	__und_usr_unknown
	b	__und_usr_fault_16
#endif
 UNWIND(.fnend)
ENDPROC(__und_usr)

	@
	@ fallthrough to call_fpe
	@

/*
 * The out of line fixup for the ldrt above.
 * The out of line fixup for the ldrt instructions above.
 */
	.pushsection .fixup, "ax"
	.align	2
@@ -524,11 +551,12 @@ ENDPROC(__und_usr)
 * NEON handler code.
 *
 * Emulators may wish to make use of the following registers:
 *  r0  = instruction opcode.
 *  r2  = PC+4
 *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
 *  r2  = PC value to resume execution after successful emulation
 *  r9  = normal "successful" return address
 *  r10 = this threads thread_info structure.
 *  r10 = this threads thread_info structure
 *  lr  = unrecognised instruction return address
 * IRQs disabled, FIQs enabled.
 */
	@
	@ Fall-through from Thumb-2 __und_usr
@@ -659,12 +687,17 @@ ENTRY(no_fp)
	mov	pc, lr
ENDPROC(no_fp)

__und_usr_unknown:
	enable_irq
__und_usr_fault_32:
	mov	r1, #4
	b	1f
__und_usr_fault_16:
	mov	r1, #2
1:	enable_irq
	mov	r0, sp
	adr	lr, BSYM(ret_from_exception)
	b	do_undefinstr
ENDPROC(__und_usr_unknown)
	b	__und_fault
ENDPROC(__und_usr_fault_32)
ENDPROC(__und_usr_fault_16)

	.align	5
__pabt_usr:
+9 −8
Original line number Diff line number Diff line
@@ -179,19 +179,20 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
	old = *parent;
	*parent = return_hooker;

	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
				       frame_pointer);
	if (err == -EBUSY) {
		*parent = old;
		return;
	}

	trace.func = self_addr;
	trace.depth = current->curr_ret_stack + 1;

	/* Only trace if the calling function expects to */
	if (!ftrace_graph_entry(&trace)) {
		current->curr_ret_stack--;
		*parent = old;
		return;
	}

	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
				       frame_pointer);
	if (err == -EBUSY) {
		*parent = old;
		return;
	}
}

Loading