Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 597bc5c0 authored by Paul Mackerras's avatar Paul Mackerras
Browse files

powerpc: Improve resolution of VDSO clock_gettime



Currently the clock_gettime implementation in the VDSO produces a
result with microsecond resolution for the cases that are handled
without a system call, i.e. CLOCK_REALTIME and CLOCK_MONOTONIC.  The
nanoseconds field of the result is obtained by computing a
microseconds value and multiplying by 1000.

This changes the code in the VDSO to do the computation for
clock_gettime with nanosecond resolution.  That means that the
resolution of the result will ultimately depend on the timebase
frequency.

Because the timestamp in the VDSO datapage (stamp_xsec, the real time
corresponding to the timebase count in tb_orig_stamp) is in units of
2^-20 seconds, it doesn't have sufficient resolution for computing a
result with nanosecond resolution.  Therefore this adds a copy of
xtime to the VDSO datapage and updates it in update_gtod() along with
the other time-related fields.

Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
parent c73049f6
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#ifndef __ASSEMBLY__

#include <linux/unistd.h>
#include <linux/time.h>

#define SYSCALL_MAP_SIZE      ((__NR_syscalls + 31) / 32)

@@ -83,6 +84,7 @@ struct vdso_data {
	__u32 icache_log_block_size;		/* L1 i-cache log block size */
	__s32 wtom_clock_sec;			/* Wall to monotonic clock */
	__s32 wtom_clock_nsec;
	struct timespec stamp_xtime;	/* xtime as at tb_orig_stamp */
   	__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls  */
   	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
};
@@ -102,6 +104,7 @@ struct vdso_data {
	__u32 tz_dsttime;		/* Type of dst correction	0x5C */
	__s32 wtom_clock_sec;			/* Wall to monotonic clock */
	__s32 wtom_clock_nsec;
	struct timespec stamp_xtime;	/* xtime as at tb_orig_stamp */
   	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
	__u32 dcache_block_size;	/* L1 d-cache block size     */
	__u32 icache_block_size;	/* L1 i-cache block size     */
+1 −0
Original line number Diff line number Diff line
@@ -306,6 +306,7 @@ int main(void)
	DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
	DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
	DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
	DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
	DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
	DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
	DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
+1 −0
Original line number Diff line number Diff line
@@ -456,6 +456,7 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
	vdso_data->tb_to_xs = new_tb_to_xs;
	vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
	vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
	vdso_data->stamp_xtime = xtime;
	smp_wmb();
	++(vdso_data->tb_update_count);
}
+125 −83
Original line number Diff line number Diff line
@@ -16,6 +16,13 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>

/* Offset for the low 32-bit part of a field of long type */
#ifdef CONFIG_PPC64
#define LOPART	4
#else
#define LOPART	0
#endif

	.text
/*
 * Exact prototype of gettimeofday
@@ -90,101 +97,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)

	mflr	r12			/* r12 saves lr */
  .cfi_register lr,r12
	mr	r10,r3			/* r10 saves id */
	mr	r11,r4			/* r11 saves tp */
	bl	__get_datapage@local	/* get data page */
	mr	r9,r3			/* datapage ptr in r9 */
	beq	cr1,50f			/* if monotonic -> jump there */

	/*
	 * CLOCK_REALTIME
	 */

	bl	__do_get_xsec@local	/* get xsec from tb & kernel */
	bne-	98f			/* out of line -> do syscall */

	/* seconds are xsec >> 20 */
	rlwinm	r5,r4,12,20,31
	rlwimi	r5,r3,12,0,19
	stw	r5,TSPC32_TV_SEC(r11)

	/* get remaining xsec and convert to nsec. we scale
	 * up remaining xsec by 12 bits and get the top 32 bits
	 * of the multiplication, then we multiply by 1000
	 */
	rlwinm	r5,r4,12,0,19
	lis	r6,1000000@h
	ori	r6,r6,1000000@l
	mulhwu	r5,r5,r6
	mulli	r5,r5,1000
	stw	r5,TSPC32_TV_NSEC(r11)
	mtlr	r12
	crclr	cr0*4+so
	li	r3,0
	blr
50:	bl	__do_get_tspec@local	/* get sec/nsec from tb & kernel */
	bne	cr1,80f			/* not monotonic -> all done */

	/*
	 * CLOCK_MONOTONIC
	 */

50:	bl	__do_get_xsec@local	/* get xsec from tb & kernel */
	bne-	98f			/* out of line -> do syscall */

	/* seconds are xsec >> 20 */
	rlwinm	r6,r4,12,20,31
	rlwimi	r6,r3,12,0,19

	/* get remaining xsec and convert to nsec. we scale
	 * up remaining xsec by 12 bits and get the top 32 bits
	 * of the multiplication, then we multiply by 1000
	 */
	rlwinm	r7,r4,12,0,19
	lis	r5,1000000@h
	ori	r5,r5,1000000@l
	mulhwu	r7,r7,r5
	mulli	r7,r7,1000

	/* now we must fixup using wall to monotonic. We need to snapshot
	 * that value and do the counter trick again. Fortunately, we still
	 * have the counter value in r8 that was returned by __do_get_xsec.
	 * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5
	 * can be used
	 * At this point, r3,r4 contain our sec/nsec values, r5 and r6
	 * can be used, r7 contains NSEC_PER_SEC.
	 */

	lwz	r3,WTOM_CLOCK_SEC(r9)
	lwz	r4,WTOM_CLOCK_NSEC(r9)
	lwz	r5,WTOM_CLOCK_SEC(r9)
	lwz	r6,WTOM_CLOCK_NSEC(r9)

	/* We now have our result in r3,r4. We create a fake dependency
	 * on that result and re-check the counter
	/* We now have our offset in r5,r6. We create a fake dependency
	 * on that value and re-check the counter
	 */
	or	r5,r4,r3
	xor	r0,r5,r5
	or	r0,r6,r5
	xor	r0,r0,r0
	add	r9,r9,r0
#ifdef CONFIG_PPC64
	lwz	r0,(CFG_TB_UPDATE_COUNT+4)(r9)
#else
	lwz	r0,(CFG_TB_UPDATE_COUNT)(r9)
#endif
	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
        cmpl    cr0,r8,r0		/* check if updated */
	bne-	50b

	/* Calculate and store result. Note that this mimmics the C code,
	/* Calculate and store result. Note that this mimics the C code,
	 * which may cause funny results if nsec goes negative... is that
	 * possible at all ?
	 */
	add	r3,r3,r6
	add	r4,r4,r7
	lis	r5,NSEC_PER_SEC@h
	ori	r5,r5,NSEC_PER_SEC@l
	cmpl	cr0,r4,r5
	cmpli	cr1,r4,0
	add	r3,r3,r5
	add	r4,r4,r6
	cmpw	cr0,r4,r7
	cmpwi	cr1,r4,0
	blt	1f
	subf	r4,r5,r4
	subf	r4,r7,r4
	addi	r3,r3,1
1:	bge	cr1,1f
1:	bge	cr1,80f
	addi	r3,r3,-1
	add	r4,r4,r5
1:	stw	r3,TSPC32_TV_SEC(r11)
	add	r4,r4,r7

80:	stw	r3,TSPC32_TV_SEC(r11)
	stw	r4,TSPC32_TV_NSEC(r11)

	mtlr	r12
@@ -195,10 +154,6 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
	/*
	 * syscall fallback
	 */
98:
	mtlr	r12
	mr	r3,r10
	mr	r4,r11
99:
	li	r0,__NR_clock_gettime
	sc
@@ -254,11 +209,7 @@ __do_get_xsec:
	/* Check for update count & load values. We use the low
	 * order 32 bits of the update count
	 */
#ifdef CONFIG_PPC64
1:	lwz	r8,(CFG_TB_UPDATE_COUNT+4)(r9)
#else
1:	lwz	r8,(CFG_TB_UPDATE_COUNT)(r9)
#endif
1:	lwz	r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
	andi.	r0,r8,1			/* pending update ? loop */
	bne-	1b
	xor	r0,r8,r8		/* create dependency */
@@ -305,11 +256,7 @@ __do_get_xsec:
	or	r6,r4,r3
	xor	r0,r6,r6
	add	r9,r9,r0
#ifdef CONFIG_PPC64
	lwz	r0,(CFG_TB_UPDATE_COUNT+4)(r9)
#else
	lwz	r0,(CFG_TB_UPDATE_COUNT)(r9)
#endif
	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
        cmpl    cr0,r8,r0		/* check if updated */
	bne-	1b

@@ -322,3 +269,98 @@ __do_get_xsec:
	 */
3:	blr
  .cfi_endproc

/*
 * This is the core of clock_gettime(), it returns the current
 * time in seconds and nanoseconds in r3 and r4.
 * It expects the datapage ptr in r9 and doesn't clobber it.
 * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7.
 * On return, r8 contains the counter value that can be reused.
 * This clobbers cr0 but not any other cr field.
 */
__do_get_tspec:
  .cfi_startproc
	/* Check for update count & load values. We use the low
	 * order 32 bits of the update count
	 */
1:	lwz	r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
	andi.	r0,r8,1			/* pending update ? loop */
	bne-	1b
	xor	r0,r8,r8		/* create dependency */
	add	r9,r9,r0

	/* Load orig stamp (offset to TB) */
	lwz	r5,CFG_TB_ORIG_STAMP(r9)
	lwz	r6,(CFG_TB_ORIG_STAMP+4)(r9)

	/* Get a stable TB value */
2:	mftbu	r3
	mftbl	r4
	mftbu	r0
	cmpl	cr0,r3,r0
	bne-	2b

	/* Subtract tb orig stamp and shift left 12 bits.
	 */
	subfc	r7,r6,r4
	subfe	r0,r5,r3
	slwi	r0,r0,12
	rlwimi.	r0,r7,12,20,31
	slwi	r7,r7,12

	/* Load scale factor & do multiplication */
	lwz	r5,CFG_TB_TO_XS(r9)	/* load values */
	lwz	r6,(CFG_TB_TO_XS+4)(r9)
	mulhwu	r3,r7,r6
	mullw	r10,r7,r5
	mulhwu	r4,r7,r5
	addc	r10,r3,r10
	li	r3,0

	beq+	4f			/* skip high part computation if 0 */
	mulhwu	r3,r0,r5
	mullw	r7,r0,r5
	mulhwu	r5,r0,r6
	mullw	r6,r0,r6
	adde	r4,r4,r7
	addze	r3,r3
	addc	r4,r4,r5
	addze	r3,r3
	addc	r10,r10,r6

4:	addze	r4,r4			/* add in carry */
	lis	r7,NSEC_PER_SEC@h
	ori	r7,r7,NSEC_PER_SEC@l
	mulhwu	r4,r4,r7		/* convert to nanoseconds */

	/* At this point, we have seconds & nanoseconds since the xtime
	 * stamp in r3+CA and r4.  Load & add the xtime stamp.
	 */
#ifdef CONFIG_PPC64
	lwz	r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9)
	lwz	r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9)
#else
	lwz	r5,STAMP_XTIME+TSPC32_TV_SEC(r9)
	lwz	r6,STAMP_XTIME+TSPC32_TV_NSEC(r9)
#endif
	add	r4,r4,r6
	adde	r3,r3,r5

	/* We now have our result in r3,r4. We create a fake dependency
	 * on that result and re-check the counter
	 */
	or	r6,r4,r3
	xor	r0,r6,r6
	add	r9,r9,r0
	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
        cmpl    cr0,r8,r0		/* check if updated */
	bne-	1b

	/* check for nanosecond overflow and adjust if necessary */
	cmpw	r4,r7
	bltlr				/* all done if no overflow */
	subf	r4,r7,r4		/* adjust if overflow */
	addi	r3,r3,1

	blr
  .cfi_endproc
+76 −65
Original line number Diff line number Diff line
@@ -75,90 +75,49 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)

	mflr	r12			/* r12 saves lr */
  .cfi_register lr,r12
	mr	r10,r3			/* r10 saves id */
	mr	r11,r4			/* r11 saves tp */
	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */
	beq	cr1,50f			/* if monotonic -> jump there */

	/*
	 * CLOCK_REALTIME
	 */

	bl	V_LOCAL_FUNC(__do_get_xsec)	/* get xsec from tb & kernel */

	lis     r7,15			/* r7 = 1000000 = USEC_PER_SEC */
	ori     r7,r7,16960
	rldicl  r5,r4,44,20		/* r5 = sec = xsec / XSEC_PER_SEC */
	rldicr  r6,r5,20,43		/* r6 = sec * XSEC_PER_SEC */
	std	r5,TSPC64_TV_SEC(r11)	/* store sec in tv */
	subf	r0,r6,r4		/* r0 = xsec = (xsec - r6) */
	mulld   r0,r0,r7		/* usec = (xsec * USEC_PER_SEC) /
					 * XSEC_PER_SEC
					 */
	rldicl  r0,r0,44,20
	mulli	r0,r0,1000		/* nsec = usec * 1000 */
	std	r0,TSPC64_TV_NSEC(r11)	/* store nsec in tp */

	mtlr	r12
	crclr	cr0*4+so
	li	r3,0
	blr
50:	bl	V_LOCAL_FUNC(__do_get_tspec)	/* get time from tb & kernel */
	bne	cr1,80f			/* if not monotonic, all done */

	/*
	 * CLOCK_MONOTONIC
	 */

50:	bl	V_LOCAL_FUNC(__do_get_xsec)	/* get xsec from tb & kernel */

	lis     r7,15			/* r7 = 1000000 = USEC_PER_SEC */
	ori     r7,r7,16960
	rldicl  r5,r4,44,20		/* r5 = sec = xsec / XSEC_PER_SEC */
	rldicr  r6,r5,20,43		/* r6 = sec * XSEC_PER_SEC */
	subf	r0,r6,r4		/* r0 = xsec = (xsec - r6) */
	mulld   r0,r0,r7		/* usec = (xsec * USEC_PER_SEC) /
					 * XSEC_PER_SEC
					 */
	rldicl  r6,r0,44,20
	mulli	r6,r6,1000		/* nsec = usec * 1000 */

	/* now we must fixup using wall to monotonic. We need to snapshot
	 * that value and do the counter trick again. Fortunately, we still
	 * have the counter value in r8 that was returned by __do_get_xsec.
	 * At this point, r5,r6 contain our sec/nsec values.
	 * can be used
	 * have the counter value in r8 that was returned by __do_get_tspec.
	 * At this point, r4,r5 contain our sec/nsec values.
	 */

	lwa	r4,WTOM_CLOCK_SEC(r3)
	lwa	r7,WTOM_CLOCK_NSEC(r3)
	lwa	r6,WTOM_CLOCK_SEC(r3)
	lwa	r9,WTOM_CLOCK_NSEC(r3)

	/* We now have our result in r4,r7. We create a fake dependency
	/* We now have our result in r6,r9. We create a fake dependency
	 * on that result and re-check the counter
	 */
	or	r9,r4,r7
	xor	r0,r9,r9
	or	r0,r6,r9
	xor	r0,r0,r0
	add	r3,r3,r0
	ld	r0,CFG_TB_UPDATE_COUNT(r3)
        cmpld   cr0,r0,r8		/* check if updated */
	bne-	50b

	/* Calculate and store result. Note that this mimmics the C code,
	 * which may cause funny results if nsec goes negative... is that
	 * possible at all ?
	/* Add wall->monotonic offset and check for overflow or underflow.
	 */
	add	r4,r4,r5
	add	r7,r7,r6
	lis	r9,NSEC_PER_SEC@h
	ori	r9,r9,NSEC_PER_SEC@l
	cmpl	cr0,r7,r9
	cmpli	cr1,r7,0
	add	r4,r4,r6
	add	r5,r5,r9
	cmpd	cr0,r5,r7
	cmpdi	cr1,r5,0
	blt	1f
	subf	r7,r9,r7
	subf	r5,r7,r5
	addi	r4,r4,1
1:	bge	cr1,1f
1:	bge	cr1,80f
	addi	r4,r4,-1
	add	r7,r7,r9
1:	std	r4,TSPC64_TV_SEC(r11)
	std	r7,TSPC64_TV_NSEC(r11)
	add	r5,r5,r7

80:	std	r4,TSPC64_TV_SEC(r11)
	std	r5,TSPC64_TV_NSEC(r11)

	mtlr	r12
	crclr	cr0*4+so
@@ -168,10 +127,6 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
	/*
	 * syscall fallback
	 */
98:
	mtlr	r12
	mr	r3,r10
	mr	r4,r11
99:
	li	r0,__NR_clock_gettime
	sc
@@ -253,3 +208,59 @@ V_FUNCTION_BEGIN(__do_get_xsec)
	blr
  .cfi_endproc
V_FUNCTION_END(__do_get_xsec)

/*
 * This is the core of clock_gettime(), it returns the current
 * time in seconds and nanoseconds in r4 and r5.
 * It expects the datapage ptr in r3 and doesn't clobber it.
 * It clobbers r0 and r6 and returns NSEC_PER_SEC in r7.
 * On return, r8 contains the counter value that can be reused.
 * This clobbers cr0 but not any other cr field.
 */
V_FUNCTION_BEGIN(__do_get_tspec)
  .cfi_startproc
	/* check for update count & load values */
1:	ld	r8,CFG_TB_UPDATE_COUNT(r3)
	andi.	r0,r8,1			/* pending update ? loop */
	bne-	1b
	xor	r0,r8,r8		/* create dependency */
	add	r3,r3,r0

	/* Get TB & offset it. We use the MFTB macro which will generate
	 * workaround code for Cell.
	 */
	MFTB(r7)
	ld	r9,CFG_TB_ORIG_STAMP(r3)
	subf	r7,r9,r7

	/* Scale result */
	ld	r5,CFG_TB_TO_XS(r3)
	sldi	r7,r7,12		/* compute time since stamp_xtime */
	mulhdu	r6,r7,r5		/* in units of 2^-32 seconds */

	/* Add stamp since epoch */
	ld	r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
	ld	r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
	or	r0,r4,r5
	or	r0,r0,r6
	xor	r0,r0,r0
	add	r3,r3,r0
	ld	r0,CFG_TB_UPDATE_COUNT(r3)
	cmpld   r0,r8			/* check if updated */
	bne-	1b			/* reload if so */

	/* convert to seconds & nanoseconds and add to stamp */
	lis	r7,NSEC_PER_SEC@h
	ori	r7,r7,NSEC_PER_SEC@l
	mulhwu	r0,r6,r7		/* compute nanoseconds and */
	srdi	r6,r6,32		/* seconds since stamp_xtime */
	clrldi	r0,r0,32
	add	r5,r5,r0		/* add nanoseconds together */
	cmpd	r5,r7			/* overflow? */
	add	r4,r4,r6
	bltlr				/* all done if no overflow */
	subf	r5,r7,r5		/* if overflow, adjust */
	addi	r4,r4,1
	blr
  .cfi_endproc
V_FUNCTION_END(__do_get_tspec)