Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 85250231 authored by Al Viro's avatar Al Viro
Browse files

alpha: switch __copy_user() and __do_clean_user() to normal calling conventions



They used to need odd calling conventions due to old exception handling
mechanism, the last remnants of which had disappeared back in 2002.

Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent d597580d
Loading
Loading
Loading
Loading
+11 −56
Original line number Original line Diff line number Diff line
@@ -334,45 +334,17 @@ __asm__ __volatile__("1: stb %r2,%1\n" \
 * Complex access routines
 * Complex access routines
 */
 */


/* This little bit of silliness is to get the GP loaded for a function
extern long __copy_user(void *to, const void *from, long len);
   that ordinarily wouldn't.  Otherwise we could have it done by the macro
   directly, which can be optimized the linker.  */
#ifdef MODULE
#define __module_address(sym)		"r"(sym),
#define __module_call(ra, arg, sym)	"jsr $" #ra ",(%" #arg ")," #sym
#else
#define __module_address(sym)
#define __module_call(ra, arg, sym)	"bsr $" #ra "," #sym " !samegp"
#endif

extern void __copy_user(void);

extern inline long
__copy_tofrom_user_nocheck(void *to, const void *from, long len)
{
	register void * __cu_to __asm__("$6") = to;
	register const void * __cu_from __asm__("$7") = from;
	register long __cu_len __asm__("$0") = len;

	__asm__ __volatile__(
		__module_call(28, 3, __copy_user)
		: "=r" (__cu_len), "=r" (__cu_from), "=r" (__cu_to)
		: __module_address(__copy_user)
		  "0" (__cu_len), "1" (__cu_from), "2" (__cu_to)
		: "$1", "$2", "$3", "$4", "$5", "$28", "memory");

	return __cu_len;
}


#define __copy_to_user(to, from, n)			\
#define __copy_to_user(to, from, n)			\
({							\
({							\
	__chk_user_ptr(to);				\
	__chk_user_ptr(to);				\
	__copy_tofrom_user_nocheck((__force void *)(to), (from), (n));	\
	__copy_user((__force void *)(to), (from), (n));	\
})
})
#define __copy_from_user(to, from, n)			\
#define __copy_from_user(to, from, n)			\
({							\
({							\
	__chk_user_ptr(from);				\
	__chk_user_ptr(from);				\
	__copy_tofrom_user_nocheck((to), (__force void *)(from), (n));	\
	__copy_user((to), (__force void *)(from), (n));	\
})
})


#define __copy_to_user_inatomic __copy_to_user
#define __copy_to_user_inatomic __copy_to_user
@@ -382,7 +354,7 @@ extern inline long
copy_to_user(void __user *to, const void *from, long n)
copy_to_user(void __user *to, const void *from, long n)
{
{
	if (likely(__access_ok((unsigned long)to, n, get_fs())))
	if (likely(__access_ok((unsigned long)to, n, get_fs())))
		n = __copy_tofrom_user_nocheck((__force void *)to, from, n);
		n = __copy_user((__force void *)to, from, n);
	return n;
	return n;
}
}


@@ -397,21 +369,7 @@ copy_from_user(void *to, const void __user *from, long n)
	return res;
	return res;
}
}


extern void __do_clear_user(void);
extern long __clear_user(void __user *to, long len);

extern inline long
__clear_user(void __user *to, long len)
{
	register void __user * __cl_to __asm__("$6") = to;
	register long __cl_len __asm__("$0") = len;
	__asm__ __volatile__(
		__module_call(28, 2, __do_clear_user)
		: "=r"(__cl_len), "=r"(__cl_to)
		: __module_address(__do_clear_user)
		  "0"(__cl_len), "1"(__cl_to)
		: "$1", "$2", "$3", "$4", "$5", "$28", "memory");
	return __cl_len;
}


extern inline long
extern inline long
clear_user(void __user *to, long len)
clear_user(void __user *to, long len)
@@ -421,9 +379,6 @@ clear_user(void __user *to, long len)
	return len;
	return len;
}
}


#undef __module_address
#undef __module_call

#define user_addr_max() \
#define user_addr_max() \
        (uaccess_kernel() ? ~0UL : TASK_SIZE)
        (uaccess_kernel() ? ~0UL : TASK_SIZE)


+26 −40
Original line number Original line Diff line number Diff line
@@ -8,21 +8,6 @@
 * right "bytes left to zero" value (and that it is updated only _after_
 * right "bytes left to zero" value (and that it is updated only _after_
 * a successful copy).  There is also some rather minor exception setup
 * a successful copy).  There is also some rather minor exception setup
 * stuff.
 * stuff.
 *
 * NOTE! This is not directly C-callable, because the calling semantics
 * are different:
 *
 * Inputs:
 *	length in $0
 *	destination address in $6
 *	exception pointer in $7
 *	return address in $28 (exceptions expect it there)
 *
 * Outputs:
 *	bytes left to copy in $0
 *
 * Clobbers:
 *	$1,$2,$3,$4,$5,$6
 */
 */
#include <asm/export.h>
#include <asm/export.h>


@@ -38,62 +23,63 @@
	.set noreorder
	.set noreorder
	.align 4
	.align 4


	.globl __do_clear_user
	.globl __clear_user
	.ent __do_clear_user
	.ent __clear_user
	.frame	$30, 0, $28
	.frame	$30, 0, $26
	.prologue 0
	.prologue 0


$loop:
$loop:
	and	$1, 3, $4	# e0    :
	and	$1, 3, $4	# e0    :
	beq	$4, 1f		# .. e1 :
	beq	$4, 1f		# .. e1 :


0:	EX( stq_u $31, 0($6) )	# e0    : zero one word
0:	EX( stq_u $31, 0($16) )	# e0    : zero one word
	subq	$0, 8, $0	# .. e1 :
	subq	$0, 8, $0	# .. e1 :
	subq	$4, 1, $4	# e0    :
	subq	$4, 1, $4	# e0    :
	addq	$6, 8, $6	# .. e1 :
	addq	$16, 8, $16	# .. e1 :
	bne	$4, 0b		# e1    :
	bne	$4, 0b		# e1    :
	unop			#       :
	unop			#       :


1:	bic	$1, 3, $1	# e0    :
1:	bic	$1, 3, $1	# e0    :
	beq	$1, $tail	# .. e1 :
	beq	$1, $tail	# .. e1 :


2:	EX( stq_u $31, 0($6) )	# e0    : zero four words
2:	EX( stq_u $31, 0($16) )	# e0    : zero four words
	subq	$0, 8, $0	# .. e1 :
	subq	$0, 8, $0	# .. e1 :
	EX( stq_u $31, 8($6) )	# e0    :
	EX( stq_u $31, 8($16) )	# e0    :
	subq	$0, 8, $0	# .. e1 :
	subq	$0, 8, $0	# .. e1 :
	EX( stq_u $31, 16($6) )	# e0    :
	EX( stq_u $31, 16($16) )	# e0    :
	subq	$0, 8, $0	# .. e1 :
	subq	$0, 8, $0	# .. e1 :
	EX( stq_u $31, 24($6) )	# e0    :
	EX( stq_u $31, 24($16) )	# e0    :
	subq	$0, 8, $0	# .. e1 :
	subq	$0, 8, $0	# .. e1 :
	subq	$1, 4, $1	# e0    :
	subq	$1, 4, $1	# e0    :
	addq	$6, 32, $6	# .. e1 :
	addq	$16, 32, $16	# .. e1 :
	bne	$1, 2b		# e1    :
	bne	$1, 2b		# e1    :


$tail:
$tail:
	bne	$2, 1f		# e1    : is there a tail to do?
	bne	$2, 1f		# e1    : is there a tail to do?
	ret	$31, ($28), 1	# .. e1 :
	ret	$31, ($26), 1	# .. e1 :


1:	EX( ldq_u $5, 0($6) )	# e0    :
1:	EX( ldq_u $5, 0($16) )	# e0    :
	clr	$0		# .. e1 :
	clr	$0		# .. e1 :
	nop			# e1    :
	nop			# e1    :
	mskqh	$5, $0, $5	# e0    :
	mskqh	$5, $0, $5	# e0    :
	EX( stq_u $5, 0($6) )	# e0    :
	EX( stq_u $5, 0($16) )	# e0    :
	ret	$31, ($28), 1	# .. e1 :
	ret	$31, ($26), 1	# .. e1 :


__do_clear_user:
__clear_user:
	and	$6, 7, $4	# e0    : find dest misalignment
	and	$17, $17, $0
	and	$16, 7, $4	# e0    : find dest misalignment
	beq	$0, $zerolength # .. e1 :
	beq	$0, $zerolength # .. e1 :
	addq	$0, $4, $1	# e0    : bias counter
	addq	$0, $4, $1	# e0    : bias counter
	and	$1, 7, $2	# e1    : number of bytes in tail
	and	$1, 7, $2	# e1    : number of bytes in tail
	srl	$1, 3, $1	# e0    :
	srl	$1, 3, $1	# e0    :
	beq	$4, $loop	# .. e1 :
	beq	$4, $loop	# .. e1 :


	EX( ldq_u $5, 0($6) )	# e0    : load dst word to mask back in
	EX( ldq_u $5, 0($16) )	# e0    : load dst word to mask back in
	beq	$1, $oneword	# .. e1 : sub-word store?
	beq	$1, $oneword	# .. e1 : sub-word store?


	mskql	$5, $6, $5	# e0    : take care of misaligned head
	mskql	$5, $16, $5	# e0    : take care of misaligned head
	addq	$6, 8, $6	# .. e1 :
	addq	$16, 8, $16	# .. e1 :
	EX( stq_u $5, -8($6) )	# e0    :
	EX( stq_u $5, -8($16) )	# e0    :
	addq	$0, $4, $0	# .. e1 : bytes left -= 8 - misalignment
	addq	$0, $4, $0	# .. e1 : bytes left -= 8 - misalignment
	subq	$1, 1, $1	# e0    :
	subq	$1, 1, $1	# e0    :
	subq	$0, 8, $0	# .. e1 :
	subq	$0, 8, $0	# .. e1 :
@@ -101,15 +87,15 @@ __do_clear_user:
	unop			#       :
	unop			#       :


$oneword:
$oneword:
	mskql	$5, $6, $4	# e0    :
	mskql	$5, $16, $4	# e0    :
	mskqh	$5, $2, $5	# e0    :
	mskqh	$5, $2, $5	# e0    :
	or	$5, $4, $5	# e1    :
	or	$5, $4, $5	# e1    :
	EX( stq_u $5, 0($6) )	# e0    :
	EX( stq_u $5, 0($16) )	# e0    :
	clr	$0		# .. e1 :
	clr	$0		# .. e1 :


$zerolength:
$zerolength:
$exception:
$exception:
	ret	$31, ($28), 1	# .. e1 :
	ret	$31, ($26), 1	# .. e1 :


	.end __do_clear_user
	.end __clear_user
	EXPORT_SYMBOL(__do_clear_user)
	EXPORT_SYMBOL(__clear_user)
+34 −48
Original line number Original line Diff line number Diff line
@@ -9,21 +9,6 @@
 * contains the right "bytes left to copy" value (and that it is updated
 * contains the right "bytes left to copy" value (and that it is updated
 * only _after_ a successful copy). There is also some rather minor
 * only _after_ a successful copy). There is also some rather minor
 * exception setup stuff..
 * exception setup stuff..
 *
 * NOTE! This is not directly C-callable, because the calling semantics are
 * different:
 *
 * Inputs:
 *	length in $0
 *	destination address in $6
 *	source address in $7
 *	return address in $28
 *
 * Outputs:
 *	bytes left to copy in $0
 *
 * Clobbers:
 *	$1,$2,$3,$4,$5,$6,$7
 */
 */


#include <asm/export.h>
#include <asm/export.h>
@@ -49,58 +34,59 @@
	.ent __copy_user
	.ent __copy_user
__copy_user:
__copy_user:
	.prologue 0
	.prologue 0
	and $6,7,$3
	and $18,$18,$0
	and $16,7,$3
	beq $0,$35
	beq $0,$35
	beq $3,$36
	beq $3,$36
	subq $3,8,$3
	subq $3,8,$3
	.align 4
	.align 4
$37:
$37:
	EXI( ldq_u $1,0($7) )
	EXI( ldq_u $1,0($17) )
	EXO( ldq_u $2,0($6) )
	EXO( ldq_u $2,0($16) )
	extbl $1,$7,$1
	extbl $1,$17,$1
	mskbl $2,$6,$2
	mskbl $2,$16,$2
	insbl $1,$6,$1
	insbl $1,$16,$1
	addq $3,1,$3
	addq $3,1,$3
	bis $1,$2,$1
	bis $1,$2,$1
	EXO( stq_u $1,0($6) )
	EXO( stq_u $1,0($16) )
	subq $0,1,$0
	subq $0,1,$0
	addq $6,1,$6
	addq $16,1,$16
	addq $7,1,$7
	addq $17,1,$17
	beq $0,$41
	beq $0,$41
	bne $3,$37
	bne $3,$37
$36:
$36:
	and $7,7,$1
	and $17,7,$1
	bic $0,7,$4
	bic $0,7,$4
	beq $1,$43
	beq $1,$43
	beq $4,$48
	beq $4,$48
	EXI( ldq_u $3,0($7) )
	EXI( ldq_u $3,0($17) )
	.align 4
	.align 4
$50:
$50:
	EXI( ldq_u $2,8($7) )
	EXI( ldq_u $2,8($17) )
	subq $4,8,$4
	subq $4,8,$4
	extql $3,$7,$3
	extql $3,$17,$3
	extqh $2,$7,$1
	extqh $2,$17,$1
	bis $3,$1,$1
	bis $3,$1,$1
	EXO( stq $1,0($6) )
	EXO( stq $1,0($16) )
	addq $7,8,$7
	addq $17,8,$17
	subq $0,8,$0
	subq $0,8,$0
	addq $6,8,$6
	addq $16,8,$16
	bis $2,$2,$3
	bis $2,$2,$3
	bne $4,$50
	bne $4,$50
$48:
$48:
	beq $0,$41
	beq $0,$41
	.align 4
	.align 4
$57:
$57:
	EXI( ldq_u $1,0($7) )
	EXI( ldq_u $1,0($17) )
	EXO( ldq_u $2,0($6) )
	EXO( ldq_u $2,0($16) )
	extbl $1,$7,$1
	extbl $1,$17,$1
	mskbl $2,$6,$2
	mskbl $2,$16,$2
	insbl $1,$6,$1
	insbl $1,$16,$1
	bis $1,$2,$1
	bis $1,$2,$1
	EXO( stq_u $1,0($6) )
	EXO( stq_u $1,0($16) )
	subq $0,1,$0
	subq $0,1,$0
	addq $6,1,$6
	addq $16,1,$16
	addq $7,1,$7
	addq $17,1,$17
	bne $0,$57
	bne $0,$57
	br $31,$41
	br $31,$41
	.align 4
	.align 4
@@ -108,27 +94,27 @@ $43:
	beq $4,$65
	beq $4,$65
	.align 4
	.align 4
$66:
$66:
	EXI( ldq $1,0($7) )
	EXI( ldq $1,0($17) )
	subq $4,8,$4
	subq $4,8,$4
	EXO( stq $1,0($6) )
	EXO( stq $1,0($16) )
	addq $7,8,$7
	addq $17,8,$17
	subq $0,8,$0
	subq $0,8,$0
	addq $6,8,$6
	addq $16,8,$16
	bne $4,$66
	bne $4,$66
$65:
$65:
	beq $0,$41
	beq $0,$41
	EXI( ldq $2,0($7) )
	EXI( ldq $2,0($17) )
	EXO( ldq $1,0($6) )
	EXO( ldq $1,0($16) )
	mskql $2,$0,$2
	mskql $2,$0,$2
	mskqh $1,$0,$1
	mskqh $1,$0,$1
	bis $2,$1,$2
	bis $2,$1,$2
	EXO( stq $2,0($6) )
	EXO( stq $2,0($16) )
	bis $31,$31,$0
	bis $31,$31,$0
$41:
$41:
$35:
$35:
$exitin:
$exitin:
$exitout:
$exitout:
	ret $31,($28),1
	ret $31,($26),1


	.end __copy_user
	.end __copy_user
EXPORT_SYMBOL(__copy_user)
EXPORT_SYMBOL(__copy_user)
+35 −49
Original line number Original line Diff line number Diff line
@@ -9,21 +9,6 @@
 * a successful copy).  There is also some rather minor exception setup
 * a successful copy).  There is also some rather minor exception setup
 * stuff.
 * stuff.
 *
 *
 * NOTE! This is not directly C-callable, because the calling semantics
 * are different:
 *
 * Inputs:
 *	length in $0
 *	destination address in $6
 *	exception pointer in $7
 *	return address in $28 (exceptions expect it there)
 *
 * Outputs:
 *	bytes left to copy in $0
 *
 * Clobbers:
 *	$1,$2,$3,$4,$5,$6
 *
 * Much of the information about 21264 scheduling/coding comes from:
 * Much of the information about 21264 scheduling/coding comes from:
 *	Compiler Writer's Guide for the Alpha 21264
 *	Compiler Writer's Guide for the Alpha 21264
 *	abbreviated as 'CWG' in other comments here
 *	abbreviated as 'CWG' in other comments here
@@ -56,14 +41,15 @@
	.set noreorder
	.set noreorder
	.align 4
	.align 4


	.globl __do_clear_user
	.globl __clear_user
	.ent __do_clear_user
	.ent __clear_user
	.frame	$30, 0, $28
	.frame	$30, 0, $26
	.prologue 0
	.prologue 0


				# Pipeline info : Slotting & Comments
				# Pipeline info : Slotting & Comments
__do_clear_user:
__clear_user:
	and	$6, 7, $4	# .. E  .. ..	: find dest head misalignment
	and	$17, $17, $0
	and	$16, 7, $4	# .. E  .. ..	: find dest head misalignment
	beq	$0, $zerolength # U  .. .. ..	:  U L U L
	beq	$0, $zerolength # U  .. .. ..	:  U L U L


	addq	$0, $4, $1	# .. .. .. E	: bias counter
	addq	$0, $4, $1	# .. .. .. E	: bias counter
@@ -75,14 +61,14 @@ __do_clear_user:


/*
/*
 * Head is not aligned.  Write (8 - $4) bytes to head of destination
 * Head is not aligned.  Write (8 - $4) bytes to head of destination
 * This means $6 is known to be misaligned
 * This means $16 is known to be misaligned
 */
 */
	EX( ldq_u $5, 0($6) )	# .. .. .. L	: load dst word to mask back in
	EX( ldq_u $5, 0($16) )	# .. .. .. L	: load dst word to mask back in
	beq	$1, $onebyte	# .. .. U  ..	: sub-word store?
	beq	$1, $onebyte	# .. .. U  ..	: sub-word store?
	mskql	$5, $6, $5	# .. U  .. ..	: take care of misaligned head
	mskql	$5, $16, $5	# .. U  .. ..	: take care of misaligned head
	addq	$6, 8, $6	# E  .. .. .. 	: L U U L
	addq	$16, 8, $16	# E  .. .. .. 	: L U U L


	EX( stq_u $5, -8($6) )	# .. .. .. L	:
	EX( stq_u $5, -8($16) )	# .. .. .. L	:
	subq	$1, 1, $1	# .. .. E  ..	:
	subq	$1, 1, $1	# .. .. E  ..	:
	addq	$0, $4, $0	# .. E  .. ..	: bytes left -= 8 - misalignment
	addq	$0, $4, $0	# .. E  .. ..	: bytes left -= 8 - misalignment
	subq	$0, 8, $0	# E  .. .. ..	: U L U L
	subq	$0, 8, $0	# E  .. .. ..	: U L U L
@@ -93,11 +79,11 @@ __do_clear_user:
 * values upon initial entry to the loop
 * values upon initial entry to the loop
 * $1 is number of quadwords to clear (zero is a valid value)
 * $1 is number of quadwords to clear (zero is a valid value)
 * $2 is number of trailing bytes (0..7) ($2 never used...)
 * $2 is number of trailing bytes (0..7) ($2 never used...)
 * $6 is known to be aligned 0mod8
 * $16 is known to be aligned 0mod8
 */
 */
$headalign:
$headalign:
	subq	$1, 16, $4	# .. .. .. E	: If < 16, we can not use the huge loop
	subq	$1, 16, $4	# .. .. .. E	: If < 16, we can not use the huge loop
	and	$6, 0x3f, $2	# .. .. E  ..	: Forward work for huge loop
	and	$16, 0x3f, $2	# .. .. E  ..	: Forward work for huge loop
	subq	$2, 0x40, $3	# .. E  .. ..	: bias counter (huge loop)
	subq	$2, 0x40, $3	# .. E  .. ..	: bias counter (huge loop)
	blt	$4, $trailquad	# U  .. .. ..	: U L U L
	blt	$4, $trailquad	# U  .. .. ..	: U L U L


@@ -114,21 +100,21 @@ $headalign:
	beq	$3, $bigalign	# U  .. .. ..	: U L U L : Aligned 0mod64
	beq	$3, $bigalign	# U  .. .. ..	: U L U L : Aligned 0mod64


$alignmod64:
$alignmod64:
	EX( stq_u $31, 0($6) )	# .. .. .. L
	EX( stq_u $31, 0($16) )	# .. .. .. L
	addq	$3, 8, $3	# .. .. E  ..
	addq	$3, 8, $3	# .. .. E  ..
	subq	$0, 8, $0	# .. E  .. ..
	subq	$0, 8, $0	# .. E  .. ..
	nop			# E  .. .. ..	: U L U L
	nop			# E  .. .. ..	: U L U L


	nop			# .. .. .. E
	nop			# .. .. .. E
	subq	$1, 1, $1	# .. .. E  ..
	subq	$1, 1, $1	# .. .. E  ..
	addq	$6, 8, $6	# .. E  .. ..
	addq	$16, 8, $16	# .. E  .. ..
	blt	$3, $alignmod64	# U  .. .. ..	: U L U L
	blt	$3, $alignmod64	# U  .. .. ..	: U L U L


$bigalign:
$bigalign:
/*
/*
 * $0 is the number of bytes left
 * $0 is the number of bytes left
 * $1 is the number of quads left
 * $1 is the number of quads left
 * $6 is aligned 0mod64
 * $16 is aligned 0mod64
 * we know that we'll be taking a minimum of one trip through
 * we know that we'll be taking a minimum of one trip through
 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
 * We are _not_ going to update $0 after every single store.  That
 * We are _not_ going to update $0 after every single store.  That
@@ -145,39 +131,39 @@ $bigalign:
	nop			# E :
	nop			# E :
	nop			# E :
	nop			# E :
	nop			# E :
	nop			# E :
	bis	$6,$6,$3	# E : U L U L : Initial wh64 address is dest
	bis	$16,$16,$3	# E : U L U L : Initial wh64 address is dest
	/* This might actually help for the current trip... */
	/* This might actually help for the current trip... */


$do_wh64:
$do_wh64:
	wh64	($3)		# .. .. .. L1	: memory subsystem hint
	wh64	($3)		# .. .. .. L1	: memory subsystem hint
	subq	$1, 16, $4	# .. .. E  ..	: Forward calculation - repeat the loop?
	subq	$1, 16, $4	# .. .. E  ..	: Forward calculation - repeat the loop?
	EX( stq_u $31, 0($6) )	# .. L  .. ..
	EX( stq_u $31, 0($16) )	# .. L  .. ..
	subq	$0, 8, $0	# E  .. .. ..	: U L U L
	subq	$0, 8, $0	# E  .. .. ..	: U L U L


	addq	$6, 128, $3	# E : Target address of wh64
	addq	$16, 128, $3	# E : Target address of wh64
	EX( stq_u $31, 8($6) )	# L :
	EX( stq_u $31, 8($16) )	# L :
	EX( stq_u $31, 16($6) )	# L :
	EX( stq_u $31, 16($16) )	# L :
	subq	$0, 16, $0	# E : U L L U
	subq	$0, 16, $0	# E : U L L U


	nop			# E :
	nop			# E :
	EX( stq_u $31, 24($6) )	# L :
	EX( stq_u $31, 24($16) )	# L :
	EX( stq_u $31, 32($6) )	# L :
	EX( stq_u $31, 32($16) )	# L :
	subq	$0, 168, $5	# E : U L L U : two trips through the loop left?
	subq	$0, 168, $5	# E : U L L U : two trips through the loop left?
	/* 168 = 192 - 24, since we've already completed some stores */
	/* 168 = 192 - 24, since we've already completed some stores */


	subq	$0, 16, $0	# E :
	subq	$0, 16, $0	# E :
	EX( stq_u $31, 40($6) )	# L :
	EX( stq_u $31, 40($16) )	# L :
	EX( stq_u $31, 48($6) )	# L :
	EX( stq_u $31, 48($16) )	# L :
	cmovlt	$5, $6, $3	# E : U L L U : Latency 2, extra mapping cycle
	cmovlt	$5, $16, $3	# E : U L L U : Latency 2, extra mapping cycle


	subq	$1, 8, $1	# E :
	subq	$1, 8, $1	# E :
	subq	$0, 16, $0	# E :
	subq	$0, 16, $0	# E :
	EX( stq_u $31, 56($6) )	# L :
	EX( stq_u $31, 56($16) )	# L :
	nop			# E : U L U L
	nop			# E : U L U L


	nop			# E :
	nop			# E :
	subq	$0, 8, $0	# E :
	subq	$0, 8, $0	# E :
	addq	$6, 64, $6	# E :
	addq	$16, 64, $16	# E :
	bge	$4, $do_wh64	# U : U L U L
	bge	$4, $do_wh64	# U : U L U L


$trailquad:
$trailquad:
@@ -190,14 +176,14 @@ $trailquad:
	beq	$1, $trailbytes	# U  .. .. ..	: U L U L : Only 0..7 bytes to go
	beq	$1, $trailbytes	# U  .. .. ..	: U L U L : Only 0..7 bytes to go


$onequad:
$onequad:
	EX( stq_u $31, 0($6) )	# .. .. .. L
	EX( stq_u $31, 0($16) )	# .. .. .. L
	subq	$1, 1, $1	# .. .. E  ..
	subq	$1, 1, $1	# .. .. E  ..
	subq	$0, 8, $0	# .. E  .. ..
	subq	$0, 8, $0	# .. E  .. ..
	nop			# E  .. .. ..	: U L U L
	nop			# E  .. .. ..	: U L U L


	nop			# .. .. .. E
	nop			# .. .. .. E
	nop			# .. .. E  ..
	nop			# .. .. E  ..
	addq	$6, 8, $6	# .. E  .. ..
	addq	$16, 8, $16	# .. E  .. ..
	bgt	$1, $onequad	# U  .. .. ..	: U L U L
	bgt	$1, $onequad	# U  .. .. ..	: U L U L


	# We have an unknown number of bytes left to go.
	# We have an unknown number of bytes left to go.
@@ -211,9 +197,9 @@ $trailbytes:
	# so we will use $0 as the loop counter
	# so we will use $0 as the loop counter
	# We know for a fact that $0 > 0 zero due to previous context
	# We know for a fact that $0 > 0 zero due to previous context
$onebyte:
$onebyte:
	EX( stb $31, 0($6) )	# .. .. .. L
	EX( stb $31, 0($16) )	# .. .. .. L
	subq	$0, 1, $0	# .. .. E  ..	:
	subq	$0, 1, $0	# .. .. E  ..	:
	addq	$6, 1, $6	# .. E  .. ..	:
	addq	$16, 1, $16	# .. E  .. ..	:
	bgt	$0, $onebyte	# U  .. .. ..	: U L U L
	bgt	$0, $onebyte	# U  .. .. ..	: U L U L


$zerolength:
$zerolength:
@@ -221,6 +207,6 @@ $exception: # Destination for exception recovery(?)
	nop			# .. .. .. E	:
	nop			# .. .. .. E	:
	nop			# .. .. E  ..	:
	nop			# .. .. E  ..	:
	nop			# .. E  .. ..	:
	nop			# .. E  .. ..	:
	ret	$31, ($28), 1	# L0 .. .. ..	: L U L U
	ret	$31, ($26), 1	# L0 .. .. ..	: L U L U
	.end __do_clear_user
	.end __clear_user
	EXPORT_SYMBOL(__do_clear_user)
	EXPORT_SYMBOL(__clear_user)
+45 −59
Original line number Original line Diff line number Diff line
@@ -12,21 +12,6 @@
 * only _after_ a successful copy). There is also some rather minor
 * only _after_ a successful copy). There is also some rather minor
 * exception setup stuff..
 * exception setup stuff..
 *
 *
 * NOTE! This is not directly C-callable, because the calling semantics are
 * different:
 *
 * Inputs:
 *	length in $0
 *	destination address in $6
 *	source address in $7
 *	return address in $28
 *
 * Outputs:
 *	bytes left to copy in $0
 *
 * Clobbers:
 *	$1,$2,$3,$4,$5,$6,$7
 *
 * Much of the information about 21264 scheduling/coding comes from:
 * Much of the information about 21264 scheduling/coding comes from:
 *	Compiler Writer's Guide for the Alpha 21264
 *	Compiler Writer's Guide for the Alpha 21264
 *	abbreviated as 'CWG' in other comments here
 *	abbreviated as 'CWG' in other comments here
@@ -60,10 +45,11 @@
				# Pipeline info: Slotting & Comments
				# Pipeline info: Slotting & Comments
__copy_user:
__copy_user:
	.prologue 0
	.prologue 0
	subq $0, 32, $1		# .. E  .. ..	: Is this going to be a small copy?
	andq $18, $18, $0
	subq $18, 32, $1	# .. E  .. ..	: Is this going to be a small copy?
	beq $0, $zerolength	# U  .. .. ..	: U L U L
	beq $0, $zerolength	# U  .. .. ..	: U L U L


	and $6,7,$3		# .. .. .. E	: is leading dest misalignment
	and $16,7,$3		# .. .. .. E	: is leading dest misalignment
	ble $1, $onebyteloop	# .. .. U  ..	: 1st branch : small amount of data
	ble $1, $onebyteloop	# .. .. U  ..	: 1st branch : small amount of data
	beq $3, $destaligned	# .. U  .. ..	: 2nd (one cycle fetcher stall)
	beq $3, $destaligned	# .. U  .. ..	: 2nd (one cycle fetcher stall)
	subq $3, 8, $3		# E  .. .. ..	: L U U L : trip counter
	subq $3, 8, $3		# E  .. .. ..	: L U U L : trip counter
@@ -73,17 +59,17 @@ __copy_user:
 * We know we have at least one trip through this loop
 * We know we have at least one trip through this loop
 */
 */
$aligndest:
$aligndest:
	EXI( ldbu $1,0($7) )	# .. .. .. L	: Keep loads separate from stores
	EXI( ldbu $1,0($17) )	# .. .. .. L	: Keep loads separate from stores
	addq $6,1,$6		# .. .. E  ..	: Section 3.8 in the CWG
	addq $16,1,$16		# .. .. E  ..	: Section 3.8 in the CWG
	addq $3,1,$3		# .. E  .. ..	:
	addq $3,1,$3		# .. E  .. ..	:
	nop			# E  .. .. ..	: U L U L
	nop			# E  .. .. ..	: U L U L


/*
/*
 * the -1 is to compensate for the inc($6) done in a previous quadpack
 * the -1 is to compensate for the inc($16) done in a previous quadpack
 * which allows us zero dependencies within either quadpack in the loop
 * which allows us zero dependencies within either quadpack in the loop
 */
 */
	EXO( stb $1,-1($6) )	# .. .. .. L	:
	EXO( stb $1,-1($16) )	# .. .. .. L	:
	addq $7,1,$7		# .. .. E  ..	: Section 3.8 in the CWG
	addq $17,1,$17		# .. .. E  ..	: Section 3.8 in the CWG
	subq $0,1,$0		# .. E  .. ..	:
	subq $0,1,$0		# .. E  .. ..	:
	bne $3, $aligndest	# U  .. .. ..	: U L U L
	bne $3, $aligndest	# U  .. .. ..	: U L U L


@@ -92,29 +78,29 @@ $aligndest:
 * If we arrived via branch, we have a minimum of 32 bytes
 * If we arrived via branch, we have a minimum of 32 bytes
 */
 */
$destaligned:
$destaligned:
	and $7,7,$1		# .. .. .. E	: Check _current_ source alignment
	and $17,7,$1		# .. .. .. E	: Check _current_ source alignment
	bic $0,7,$4		# .. .. E  ..	: number bytes as a quadword loop
	bic $0,7,$4		# .. .. E  ..	: number bytes as a quadword loop
	EXI( ldq_u $3,0($7) )	# .. L  .. ..	: Forward fetch for fallthrough code
	EXI( ldq_u $3,0($17) )	# .. L  .. ..	: Forward fetch for fallthrough code
	beq $1,$quadaligned	# U  .. .. ..	: U L U L
	beq $1,$quadaligned	# U  .. .. ..	: U L U L


/*
/*
 * In the worst case, we've just executed an ldq_u here from 0($7)
 * In the worst case, we've just executed an ldq_u here from 0($17)
 * and we'll repeat it once if we take the branch
 * and we'll repeat it once if we take the branch
 */
 */


/* Misaligned quadword loop - not unrolled.  Leave it that way. */
/* Misaligned quadword loop - not unrolled.  Leave it that way. */
$misquad:
$misquad:
	EXI( ldq_u $2,8($7) )	# .. .. .. L	:
	EXI( ldq_u $2,8($17) )	# .. .. .. L	:
	subq $4,8,$4		# .. .. E  ..	:
	subq $4,8,$4		# .. .. E  ..	:
	extql $3,$7,$3		# .. U  .. ..	:
	extql $3,$17,$3		# .. U  .. ..	:
	extqh $2,$7,$1		# U  .. .. ..	: U U L L
	extqh $2,$17,$1		# U  .. .. ..	: U U L L


	bis $3,$1,$1		# .. .. .. E	:
	bis $3,$1,$1		# .. .. .. E	:
	EXO( stq $1,0($6) )	# .. .. L  ..	:
	EXO( stq $1,0($16) )	# .. .. L  ..	:
	addq $7,8,$7		# .. E  .. ..	:
	addq $17,8,$17		# .. E  .. ..	:
	subq $0,8,$0		# E  .. .. ..	: U L L U
	subq $0,8,$0		# E  .. .. ..	: U L L U


	addq $6,8,$6		# .. .. .. E	:
	addq $16,8,$16		# .. .. .. E	:
	bis $2,$2,$3		# .. .. E  ..	:
	bis $2,$2,$3		# .. .. E  ..	:
	nop			# .. E  .. ..	:
	nop			# .. E  .. ..	:
	bne $4,$misquad		# U  .. .. ..	: U L U L
	bne $4,$misquad		# U  .. .. ..	: U L U L
@@ -125,8 +111,8 @@ $misquad:
	beq $0,$zerolength	# U  .. .. ..	: U L U L
	beq $0,$zerolength	# U  .. .. ..	: U L U L


/* We know we have at least one trip through the byte loop */
/* We know we have at least one trip through the byte loop */
	EXI ( ldbu $2,0($7) )	# .. .. .. L	: No loads in the same quad
	EXI ( ldbu $2,0($17) )	# .. .. .. L	: No loads in the same quad
	addq $6,1,$6		# .. .. E  ..	: as the store (Section 3.8 in CWG)
	addq $16,1,$16		# .. .. E  ..	: as the store (Section 3.8 in CWG)
	nop			# .. E  .. ..	:
	nop			# .. E  .. ..	:
	br $31, $dirtyentry	# L0 .. .. ..	: L U U L
	br $31, $dirtyentry	# L0 .. .. ..	: L U U L
/* Do the trailing byte loop load, then hop into the store part of the loop */
/* Do the trailing byte loop load, then hop into the store part of the loop */
@@ -136,8 +122,8 @@ $misquad:
 * Based upon the usage context, it's worth the effort to unroll this loop
 * Based upon the usage context, it's worth the effort to unroll this loop
 * $0 - number of bytes to be moved
 * $0 - number of bytes to be moved
 * $4 - number of bytes to move as quadwords
 * $4 - number of bytes to move as quadwords
 * $6 is current destination address
 * $16 is current destination address
 * $7 is current source address
 * $17 is current source address
 */
 */
$quadaligned:
$quadaligned:
	subq	$4, 32, $2	# .. .. .. E	: do not unroll for small stuff
	subq	$4, 32, $2	# .. .. .. E	: do not unroll for small stuff
@@ -155,29 +141,29 @@ $quadaligned:
 * instruction memory hint instruction).
 * instruction memory hint instruction).
 */
 */
$unroll4:
$unroll4:
	EXI( ldq $1,0($7) )	# .. .. .. L
	EXI( ldq $1,0($17) )	# .. .. .. L
	EXI( ldq $2,8($7) )	# .. .. L  ..
	EXI( ldq $2,8($17) )	# .. .. L  ..
	subq	$4,32,$4	# .. E  .. ..
	subq	$4,32,$4	# .. E  .. ..
	nop			# E  .. .. ..	: U U L L
	nop			# E  .. .. ..	: U U L L


	addq	$7,16,$7	# .. .. .. E
	addq	$17,16,$17	# .. .. .. E
	EXO( stq $1,0($6) )	# .. .. L  ..
	EXO( stq $1,0($16) )	# .. .. L  ..
	EXO( stq $2,8($6) )	# .. L  .. ..
	EXO( stq $2,8($16) )	# .. L  .. ..
	subq	$0,16,$0	# E  .. .. ..	: U L L U
	subq	$0,16,$0	# E  .. .. ..	: U L L U


	addq	$6,16,$6	# .. .. .. E
	addq	$16,16,$16	# .. .. .. E
	EXI( ldq $1,0($7) )	# .. .. L  ..
	EXI( ldq $1,0($17) )	# .. .. L  ..
	EXI( ldq $2,8($7) )	# .. L  .. ..
	EXI( ldq $2,8($17) )	# .. L  .. ..
	subq	$4, 32, $3	# E  .. .. ..	: U U L L : is there enough for another trip?
	subq	$4, 32, $3	# E  .. .. ..	: U U L L : is there enough for another trip?


	EXO( stq $1,0($6) )	# .. .. .. L
	EXO( stq $1,0($16) )	# .. .. .. L
	EXO( stq $2,8($6) )	# .. .. L  ..
	EXO( stq $2,8($16) )	# .. .. L  ..
	subq	$0,16,$0	# .. E  .. ..
	subq	$0,16,$0	# .. E  .. ..
	addq	$7,16,$7	# E  .. .. ..	: U L L U
	addq	$17,16,$17	# E  .. .. ..	: U L L U


	nop			# .. .. .. E
	nop			# .. .. .. E
	nop			# .. .. E  ..
	nop			# .. .. E  ..
	addq	$6,16,$6	# .. E  .. ..
	addq	$16,16,$16	# .. E  .. ..
	bgt	$3,$unroll4	# U  .. .. ..	: U L U L
	bgt	$3,$unroll4	# U  .. .. ..	: U L U L


	nop
	nop
@@ -186,14 +172,14 @@ $unroll4:
	beq	$4, $noquads
	beq	$4, $noquads


$onequad:
$onequad:
	EXI( ldq $1,0($7) )
	EXI( ldq $1,0($17) )
	subq	$4,8,$4
	subq	$4,8,$4
	addq	$7,8,$7
	addq	$17,8,$17
	nop
	nop


	EXO( stq $1,0($6) )
	EXO( stq $1,0($16) )
	subq	$0,8,$0
	subq	$0,8,$0
	addq	$6,8,$6
	addq	$16,8,$16
	bne	$4,$onequad
	bne	$4,$onequad


$noquads:
$noquads:
@@ -207,23 +193,23 @@ $noquads:
 * There's no point in doing a lot of complex alignment calculations to try to
 * There's no point in doing a lot of complex alignment calculations to try to
 * to quadword stuff for a small amount of data.
 * to quadword stuff for a small amount of data.
 *	$0 - remaining number of bytes left to copy
 *	$0 - remaining number of bytes left to copy
 *	$6 - current dest addr
 *	$16 - current dest addr
 *	$7 - current source addr
 *	$17 - current source addr
 */
 */


$onebyteloop:
$onebyteloop:
	EXI ( ldbu $2,0($7) )	# .. .. .. L	: No loads in the same quad
	EXI ( ldbu $2,0($17) )	# .. .. .. L	: No loads in the same quad
	addq $6,1,$6		# .. .. E  ..	: as the store (Section 3.8 in CWG)
	addq $16,1,$16		# .. .. E  ..	: as the store (Section 3.8 in CWG)
	nop			# .. E  .. ..	:
	nop			# .. E  .. ..	:
	nop			# E  .. .. ..	: U L U L
	nop			# E  .. .. ..	: U L U L


$dirtyentry:
$dirtyentry:
/*
/*
 * the -1 is to compensate for the inc($6) done in a previous quadpack
 * the -1 is to compensate for the inc($16) done in a previous quadpack
 * which allows us zero dependencies within either quadpack in the loop
 * which allows us zero dependencies within either quadpack in the loop
 */
 */
	EXO ( stb $2,-1($6) )	# .. .. .. L	:
	EXO ( stb $2,-1($16) )	# .. .. .. L	:
	addq $7,1,$7		# .. .. E  ..	: quadpack as the load
	addq $17,1,$17		# .. .. E  ..	: quadpack as the load
	subq $0,1,$0		# .. E  .. ..	: change count _after_ copy
	subq $0,1,$0		# .. E  .. ..	: change count _after_ copy
	bgt $0,$onebyteloop	# U  .. .. ..	: U L U L
	bgt $0,$onebyteloop	# U  .. .. ..	: U L U L


@@ -233,7 +219,7 @@ $exitout: # Destination for exception recovery(?)
	nop			# .. .. .. E
	nop			# .. .. .. E
	nop			# .. .. E  ..
	nop			# .. .. E  ..
	nop			# .. E  .. ..
	nop			# .. E  .. ..
	ret $31,($28),1		# L0 .. .. ..	: L U L U
	ret $31,($26),1		# L0 .. .. ..	: L U L U


	.end __copy_user
	.end __copy_user
	EXPORT_SYMBOL(__copy_user)
	EXPORT_SYMBOL(__copy_user)