Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bf66def8 authored by Kevin Brodsky's avatar Kevin Brodsky Committed by Mark Salyzyn
Browse files

UPSTREAM: arm64: Refactor vDSO time functions



(cherry pick from commit b33f491f5a9aaf171b7de0f905362eb0314af478)

Time functions are directly implemented in assembly in arm64, and it
is desirable to keep it this way for performance reasons (everything
fits in registers, so that the stack is not used at all). However, the
current implementation is quite difficult to read and understand (even
considering it's assembly).  Additionally, due to the structure of
__kernel_clock_gettime, which heavily uses conditional branches to
share code between the different clocks, it is difficult to support a
new clock without making the branches even harder to follow.

This commit completely refactors the structure of clock_gettime (and
gettimeofday along the way) while keeping exactly the same algorithms.
We no longer try to share code; instead, macros provide common
operations. This new approach comes with a number of advantages:
- In clock_gettime, clock implementations are no longer interspersed,
  making them much more readable. Additionally, macros only use
  registers passed as arguments or reserved with .req, this way it is
  easy to make sure that registers are properly allocated. To avoid a
  large number of branches in a given execution path, a jump table is
  used; a normal execution uses 3 unconditional branches.
- __do_get_tspec has been replaced with 2 macros (get_ts_clock_mono,
  get_clock_shifted_nsec) and explicit loading of data from the vDSO
  page. Consequently, clock_gettime and gettimeofday are now leaf
  functions, and saving x30 (lr) is no longer necessary.
- Variables protected by tb_seq_count are now loaded all at once,
  allowing to merge the seqcnt_read macro into seqcnt_check.
- For CLOCK_REALTIME_COARSE, removed an unused load of the wall to
  monotonic timespec.
- For CLOCK_MONOTONIC_COARSE, removed a few shift instructions.

Obviously, the downside of sharing less code is an increase in code
size. However since the vDSO has its own code page, this does not
really matter, as long as the size of the DSO remains below 4 kB. For
now this should be all right:
                    Before  After
  vdso.so size (B)  2776    3000

Signed-off-by: default avatarKevin Brodsky <kevin.brodsky@arm.com>
Reviewed-by: default avatarDave Martin <dave.martin@arm.com>
Acked-by: default avatarWill Deacon <will.deacon@arm.com>
Signed-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Bug: 20045882
Bug: 19198045
Change-Id: I0a0036c54419ee561efe3aad97489e7748ddc59e
parent dd803333
Loading
Loading
Loading
Loading
+172 −126
Original line number Diff line number Diff line
@@ -26,24 +26,100 @@
#define NSEC_PER_SEC_HI16	0x3b9a

vdso_data	.req	x6
use_syscall	.req	w7
seqcnt		.req	w8
seqcnt		.req	w7
w_tmp		.req	w8
x_tmp		.req	x8

/*
 * Conventions for macro arguments:
 * - An argument is write-only if its name starts with "res".
 * - All other arguments are read-only, unless otherwise specified.
 */

	.macro	seqcnt_acquire
9999:	ldr	seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
	tbnz	seqcnt, #0, 9999b
	dmb	ishld
	ldr	use_syscall, [vdso_data, #VDSO_USE_SYSCALL]
	.endm

	.macro	seqcnt_read, cnt
	.macro	seqcnt_check fail
	dmb	ishld
	ldr	\cnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
	ldr	w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
	cmp	w_tmp, seqcnt
	b.ne	\fail
	.endm

	.macro	seqcnt_check, cnt, fail
	cmp	\cnt, seqcnt
	b.ne	\fail
	.macro	syscall_check fail
	ldr	w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
	cbnz	w_tmp, \fail
	.endm

	.macro get_nsec_per_sec res
	mov	\res, #NSEC_PER_SEC_LO16
	movk	\res, #NSEC_PER_SEC_HI16, lsl #16
	.endm

	/*
	 * Returns the clock delta, in nanoseconds left-shifted by the clock
	 * shift.
	 */
	.macro	get_clock_shifted_nsec res, cycle_last, mult
	/* Read the virtual counter. */
	isb
	mrs	x_tmp, cntvct_el0
	/* Calculate cycle delta and convert to ns. */
	sub	\res, x_tmp, \cycle_last
	/* We can only guarantee 56 bits of precision. */
	movn	x_tmp, #0xff00, lsl #48
	and	\res, x_tmp, \res
	mul	\res, \res, \mult
	.endm

	/*
	 * Returns in res_{sec,nsec} the REALTIME timespec, based on the
	 * "wall time" (xtime) and the clock_mono delta.
	 */
	.macro	get_ts_realtime res_sec, res_nsec, \
			clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
	add	\res_nsec, \clock_nsec, \xtime_nsec
	udiv	x_tmp, \res_nsec, \nsec_to_sec
	add	\res_sec, \xtime_sec, x_tmp
	msub	\res_nsec, x_tmp, \nsec_to_sec, \res_nsec
	.endm

	/* sec and nsec are modified in place. */
	.macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
	/* Add timespec. */
	add	\sec, \sec, \ts_sec
	add	\nsec, \nsec, \ts_nsec

	/* Normalise the new timespec. */
	cmp	\nsec, \nsec_to_sec
	b.lt	9999f
	sub	\nsec, \nsec, \nsec_to_sec
	add	\sec, \sec, #1
9999:
	cmp	\nsec, #0
	b.ge	9998f
	add	\nsec, \nsec, \nsec_to_sec
	sub	\sec, \sec, #1
9998:
	.endm

	.macro clock_gettime_return, shift=0
	.if \shift == 1
	lsr	x11, x11, x12
	.endif
	stp	x10, x11, [x1, #TSPEC_TV_SEC]
	mov	x0, xzr
	ret
	.endm

	.macro jump_slot jumptable, index, label
	.if (. - \jumptable) != 4 * (\index)
	.error "Jump slot index mismatch"
	.endif
	b	\label
	.endm

	.text
@@ -51,18 +127,24 @@ seqcnt .req w8
/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
ENTRY(__kernel_gettimeofday)
	.cfi_startproc
	mov	x2, x30
	.cfi_register x30, x2

	/* Acquire the sequence counter and get the timespec. */
	adr	vdso_data, _vdso_data
1:	seqcnt_acquire
	cbnz	use_syscall, 4f

	/* If tv is NULL, skip to the timezone code. */
	cbz	x0, 2f
	bl	__do_get_tspec
	seqcnt_check w9, 1b

	/* Compute the time of day. */
1:	seqcnt_acquire
	syscall_check fail=4f
	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
	seqcnt_check fail=1b

	get_nsec_per_sec res=x9
	lsl	x9, x9, x12

	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
	get_ts_realtime res_sec=x10, res_nsec=x11, \
		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9

	/* Convert ns to us. */
	mov	x13, #1000
@@ -76,95 +158,102 @@ ENTRY(__kernel_gettimeofday)
	stp	w4, w5, [x1, #TZ_MINWEST]
3:
	mov	x0, xzr
	ret	x2
	ret
4:
	/* Syscall fallback. */
	mov	x8, #__NR_gettimeofday
	svc	#0
	ret	x2
	ret
	.cfi_endproc
ENDPROC(__kernel_gettimeofday)

#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE

/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
ENTRY(__kernel_clock_gettime)
	.cfi_startproc
	cmp	w0, #CLOCK_REALTIME
	ccmp	w0, #CLOCK_MONOTONIC, #0x4, ne
	b.ne	2f

	mov	x2, x30
	.cfi_register x30, x2

	/* Get kernel timespec. */
	cmp 	w0, #JUMPSLOT_MAX
	b.hi 	syscall
	adr	vdso_data, _vdso_data
1:	seqcnt_acquire
	cbnz	use_syscall, 7f
	adr 	x_tmp, jumptable
	add 	x_tmp, x_tmp, w0, uxtw #2
	br 	x_tmp

	ALIGN
jumptable:
	jump_slot jumptable, CLOCK_REALTIME, realtime
	jump_slot jumptable, CLOCK_MONOTONIC, monotonic
	b 	syscall
	b 	syscall
	b 	syscall
	jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
	jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse

	.if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
	.error	"Wrong jumptable size"
	.endif

	ALIGN
realtime:
	seqcnt_acquire
	syscall_check fail=syscall
	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
	seqcnt_check fail=realtime

	bl	__do_get_tspec
	seqcnt_check w9, 1b
	/* All computations are done with left-shifted nsecs. */
	get_nsec_per_sec res=x9
	lsl	x9, x9, x12

	mov	x30, x2
	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
	get_ts_realtime res_sec=x10, res_nsec=x11, \
		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
	clock_gettime_return, shift=1

	cmp	w0, #CLOCK_MONOTONIC
	b.ne	6f
	ALIGN
monotonic:
	seqcnt_acquire
	syscall_check fail=syscall
	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
	ldp	x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
	seqcnt_check fail=monotonic

	/* Get wtm timespec. */
	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
	/* All computations are done with left-shifted nsecs. */
	lsl	x4, x4, x12
	get_nsec_per_sec res=x9
	lsl	x9, x9, x12

	/* Check the sequence counter. */
	seqcnt_read w9
	seqcnt_check w9, 1b
	b	4f
2:
	cmp	w0, #CLOCK_REALTIME_COARSE
	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
	b.ne	8f
	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
	get_ts_realtime res_sec=x10, res_nsec=x11, \
		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9

	/* xtime_coarse_nsec is already right-shifted */
	mov	x12, #0
	add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
	clock_gettime_return, shift=1

	/* Get coarse timespec. */
	adr	vdso_data, _vdso_data
3:	seqcnt_acquire
	ALIGN
realtime_coarse:
	seqcnt_acquire
	ldp	x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
	seqcnt_check fail=realtime_coarse
	clock_gettime_return

	/* Get wtm timespec. */
	ALIGN
monotonic_coarse:
	seqcnt_acquire
	ldp	x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
	seqcnt_check fail=monotonic_coarse

	/* Check the sequence counter. */
	seqcnt_read w9
	seqcnt_check w9, 3b

	cmp	w0, #CLOCK_MONOTONIC_COARSE
	b.ne	6f
4:
	/* Add on wtm timespec. */
	add	x10, x10, x13
	lsl	x14, x14, x12
	add	x11, x11, x14

	/* Normalise the new timespec. */
	mov	x15, #NSEC_PER_SEC_LO16
	movk	x15, #NSEC_PER_SEC_HI16, lsl #16
	lsl	x15, x15, x12
	cmp	x11, x15
	b.lt	5f
	sub	x11, x11, x15
	add	x10, x10, #1
5:
	cmp	x11, #0
	b.ge	6f
	add	x11, x11, x15
	sub	x10, x10, #1
	/* Computations are done in (non-shifted) nsecs. */
	get_nsec_per_sec res=x9
	add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
	clock_gettime_return

6:	/* Store to the user timespec. */
	lsr	x11, x11, x12
	stp	x10, x11, [x1, #TSPEC_TV_SEC]
	mov	x0, xzr
	ret
7:
	mov	x30, x2
8:	/* Syscall fallback. */
	ALIGN
syscall: /* Syscall fallback. */
	mov	x8, #__NR_clock_gettime
	svc	#0
	ret
@@ -204,46 +293,3 @@ ENTRY(__kernel_clock_getres)
	.quad	CLOCK_COARSE_RES
	.cfi_endproc
ENDPROC(__kernel_clock_getres)

/*
 * Read the current time from the architected counter.
 * Expects vdso_data to be initialised.
 * Clobbers the temporary registers (x9 - x15).
 * Returns:
 *  - w9		= vDSO sequence counter
 *  - (x10, x11)	= (ts->tv_sec, shifted ts->tv_nsec)
 *  - w12		= cs_shift
 */
ENTRY(__do_get_tspec)
	.cfi_startproc

	/* Read from the vDSO data page. */
	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
	seqcnt_read w9

	/* Read the virtual counter. */
	isb
	mrs	x15, cntvct_el0

	/* Calculate cycle delta and convert to ns. */
	sub	x10, x15, x10
	/* We can only guarantee 56 bits of precision. */
	movn	x15, #0xff00, lsl #48
	and	x10, x15, x10
	mul	x10, x10, x11

	/* Use the kernel time to calculate the new timespec. */
	mov	x11, #NSEC_PER_SEC_LO16
	movk	x11, #NSEC_PER_SEC_HI16, lsl #16
	lsl	x11, x11, x12
	add	x15, x10, x14
	udiv	x14, x15, x11
	add	x10, x13, x14
	mul	x13, x14, x11
	sub	x11, x15, x13

	ret
	.cfi_endproc
ENDPROC(__do_get_tspec)