Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c818a181 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds
Browse files

[PATCH] x86_64: On Intel CPUs don't do an additional CPU sync before RDTSC



RDTSC serialization using cpuid is not needed for Intel platforms.
This increases gettimeofday performance.

Cc: vojtech@suse.cz
Cc: rohit.seth@intel.com

Signed-off-by: default avatarAndi Kleen <ak@suse.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 7f6c5b04
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1042,6 +1042,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
	set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
 	c->x86_max_cores = intel_num_cpu_cores(c);

	srat_detect_node();
+7 −19
Original line number Diff line number Diff line
@@ -71,14 +71,6 @@ unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
struct timespec __xtime __section_xtime;
struct timezone __sys_tz __section_sys_tz;

static inline void rdtscll_sync(unsigned long *tsc)
{
#ifdef CONFIG_SMP
	sync_core();
#endif
	rdtscll(*tsc);
}

/*
 * do_gettimeoffset() returns microseconds since last timer interrupt was
 * triggered by hardware. A memory read of HPET is slower than a register read
@@ -93,7 +85,7 @@ static inline unsigned int do_gettimeoffset_tsc(void)
{
	unsigned long t;
	unsigned long x;
	rdtscll_sync(&t);
	t = get_cycles_sync();
	if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */
	x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
	return x;
@@ -309,8 +301,7 @@ unsigned long long monotonic_clock(void)
			last_offset = vxtime.last_tsc;
			base = monotonic_base;
		} while (read_seqretry(&xtime_lock, seq));
		sync_core();
		rdtscll(this_offset);
		this_offset = get_cycles_sync();
		offset = (this_offset - last_offset)*1000/cpu_khz; 
		return base + offset;
	}
@@ -391,7 +382,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
		delay = LATCH - 1 - delay;
	}

	rdtscll_sync(&tsc);
	tsc = get_cycles_sync();

	if (vxtime.mode == VXTIME_HPET) {
		if (offset - vxtime.last > hpet_tick) {
@@ -700,8 +691,7 @@ static unsigned int __init hpet_calibrate_tsc(void)
	do {
		local_irq_disable();
		hpet_now = hpet_readl(HPET_COUNTER);
		sync_core();
		rdtscl(tsc_now);
		tsc_now = get_cycles_sync();
		local_irq_restore(flags);
	} while ((tsc_now - tsc_start) < TICK_COUNT &&
		 (hpet_now - hpet_start) < TICK_COUNT);
@@ -731,11 +721,9 @@ static unsigned int __init pit_calibrate_tsc(void)
	outb(0xb0, 0x43);
	outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
	outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
	rdtscll(start);
	sync_core();
	start = get_cycles_sync();
	while ((inb(0x61) & 0x20) == 0);
	sync_core();
	rdtscll(end);
	end = get_cycles_sync();

	spin_unlock_irqrestore(&i8253_lock, flags);
	
@@ -939,7 +927,7 @@ void __init time_init(void)
	vxtime.mode = VXTIME_TSC;
	vxtime.quot = (1000000L << 32) / vxtime_hz;
	vxtime.tsc_quot = (1000L << 32) / cpu_khz;
	rdtscll_sync(&vxtime.last_tsc);
	vxtime.last_tsc = get_cycles_sync();
	setup_irq(0, &irq0);

	set_cyc2ns_scale(cpu_khz);
+1 −2
Original line number Diff line number Diff line
@@ -66,8 +66,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
			(__jiffies - __wall_jiffies) * (1000000 / HZ);

		if (__vxtime.mode != VXTIME_HPET) {
			sync_core();
			rdtscll(t);
			t = get_cycles_sync();
			if (t < __vxtime.last_tsc)
				t = __vxtime.last_tsc;
			usec += ((t - __vxtime.last_tsc) *
+1 −0
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@
#define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
#define X86_FEATURE_K8_C	(3*32+ 4) /* C stepping K8 */
#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
#define X86_FEATURE_SYNC_RDTSC  (3*32+6)  /* RDTSC syncs CPU core */

/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
+16 −0
Original line number Diff line number Diff line
@@ -10,6 +10,9 @@
#include <asm/msr.h>
#include <asm/vsyscall.h>
#include <asm/hpet.h>
#include <asm/system.h>
#include <asm/processor.h>
#include <linux/compiler.h>

#define CLOCK_TICK_RATE	PIT_TICK_RATE	/* Underlying HZ */

@@ -23,6 +26,19 @@ static inline cycles_t get_cycles (void)
	return ret;
}

/* Like get_cycles, but make sure the CPU is synchronized. */
static __always_inline cycles_t get_cycles_sync(void)
{
	unsigned long long ret;
	unsigned eax;
	/* Don't do an additional sync on CPUs where we know
	   RDTSC is already synchronous. */
	alternative_io(ASM_NOP2, "cpuid", X86_FEATURE_SYNC_RDTSC,
			  "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
	rdtscll(ret);
	return ret;
}

extern unsigned int cpu_khz;

extern int read_current_timer(unsigned long *timer_value);