Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c08c8205 authored by Vojtech Pavlik's avatar Vojtech Pavlik Committed by Andi Kleen
Browse files

[PATCH] Add the vgetcpu vsyscall



This patch adds a vgetcpu vsyscall, which depending on the CPU RDTSCP
capability uses either the RDTSCP or CPUID to obtain a CPU and node
numbers and pass them to the program.

AK: Lots of changes over Vojtech's original code:
Better prototype for vgetcpu()
It's better to pass the cpu / node numbers as separate arguments
to avoid mistakes when going from SMP to NUMA.
Also add a fast time stamp based cache using a user supplied
argument to speed things more up.
Use fast method from Chuck Ebbert to retrieve node/cpu from
GDT limit instead of CPUID
Made sure RDTSCP init is always executed after node is known.
Drop printk

Signed-off-by: default avatarVojtech Pavlik <vojtech@suse.cz>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
parent a670fad0
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -371,7 +371,7 @@ ENTRY(cpu_gdt_table)
	.quad	0,0			/* TSS */
	.quad	0,0			/* TSS */
	.quad	0,0			/* LDT */
	.quad	0,0			/* LDT */
	.quad   0,0,0			/* three TLS descriptors */ 
	.quad   0,0,0			/* three TLS descriptors */ 
	.quad	0			/* unused */
	.quad	0x0000f40000000000	/* node/CPU stored in limit */
gdt_end:	
gdt_end:	
	/* asm/segment.h:GDT_ENTRIES must match this */	
	/* asm/segment.h:GDT_ENTRIES must match this */	
	/* This should be a multiple of the cache line size */
	/* This should be a multiple of the cache line size */
+7 −6
Original line number Original line Diff line number Diff line
@@ -899,12 +899,8 @@ static int __cpuinit
time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
{
{
	unsigned cpu = (unsigned long) hcpu;
	unsigned cpu = (unsigned long) hcpu;
	if (action == CPU_ONLINE &&
	if (action == CPU_ONLINE)
		cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
		vsyscall_set_cpu(cpu);
		unsigned p;
		p = smp_processor_id() | (cpu_to_node(smp_processor_id())<<12);
		write_rdtscp_aux(p);
	}
	return NOTIFY_DONE;
	return NOTIFY_DONE;
}
}


@@ -993,6 +989,11 @@ void time_init_gtod(void)
	if (unsynchronized_tsc())
	if (unsynchronized_tsc())
		notsc = 1;
		notsc = 1;


 	if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
		vgetcpu_mode = VGETCPU_RDTSCP;
	else
		vgetcpu_mode = VGETCPU_LSL;

	if (vxtime.hpet_address && notsc) {
	if (vxtime.hpet_address && notsc) {
		timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
		timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
		if (hpet_use_timer)
		if (hpet_use_timer)
+3 −0
Original line number Original line Diff line number Diff line
@@ -99,6 +99,9 @@ SECTIONS
  .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
  .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
  vxtime = VVIRT(.vxtime);
  vxtime = VVIRT(.vxtime);


  .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
  vgetcpu_mode = VVIRT(.vgetcpu_mode);

  .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
  .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
  wall_jiffies = VVIRT(.wall_jiffies);
  wall_jiffies = VVIRT(.wall_jiffies);


+82 −2
Original line number Original line Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/seqlock.h>
#include <linux/seqlock.h>
#include <linux/jiffies.h>
#include <linux/jiffies.h>
#include <linux/sysctl.h>
#include <linux/sysctl.h>
#include <linux/getcpu.h>


#include <asm/vsyscall.h>
#include <asm/vsyscall.h>
#include <asm/pgtable.h>
#include <asm/pgtable.h>
@@ -33,11 +34,15 @@
#include <asm/fixmap.h>
#include <asm/fixmap.h>
#include <asm/errno.h>
#include <asm/errno.h>
#include <asm/io.h>
#include <asm/io.h>
#include <asm/segment.h>
#include <asm/desc.h>
#include <asm/topology.h>


#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))


int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
int __vgetcpu_mode __section_vgetcpu_mode;


#include <asm/unistd.h>
#include <asm/unistd.h>


@@ -127,9 +132,46 @@ time_t __vsyscall(1) vtime(time_t *t)
	return __xtime.tv_sec;
	return __xtime.tv_sec;
}
}


long __vsyscall(2) venosys_0(void)
/* Fast way to get current CPU and node.
   This helps to do per node and per CPU caches in user space.
   The result is not guaranteed without CPU affinity, but usually
   works out because the scheduler tries to keep a thread on the same
   CPU.

   tcache must point to a two element sized long array.
   All arguments can be NULL. */
long __vsyscall(2)
vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
{
{
	return -ENOSYS;
	unsigned int dummy, p;
	unsigned long j = 0;

	/* Fast cache - only recompute value once per jiffies and avoid
	   relatively costly rdtscp/cpuid otherwise.
	   This works because the scheduler usually keeps the process
	   on the same CPU and this syscall doesn't guarantee its
	   results anyways.
	   We do this here because otherwise user space would do it on
	   its own in a likely inferior way (no access to jiffies).
	   If you don't like it pass NULL. */
	if (tcache && tcache->t0 == (j = __jiffies)) {
		p = tcache->t1;
	} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
		/* Load per CPU data from RDTSCP */
		rdtscp(dummy, dummy, p);
	} else {
		/* Load per CPU data from GDT */
		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
	}
	if (tcache) {
		tcache->t0 = j;
		tcache->t1 = p;
	}
	if (cpu)
		*cpu = p & 0xfff;
	if (node)
		*node = p >> 12;
	return 0;
}
}


long __vsyscall(3) venosys_1(void)
long __vsyscall(3) venosys_1(void)
@@ -200,6 +242,43 @@ static ctl_table kernel_root_table2[] = {


#endif
#endif


static void __cpuinit write_rdtscp_cb(void *info)
{
	write_rdtscp_aux((unsigned long)info);
}

void __cpuinit vsyscall_set_cpu(int cpu)
{
	unsigned long *d;
	unsigned long node = 0;
#ifdef CONFIG_NUMA
	node = cpu_to_node[cpu];
#endif
	if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
		void *info = (void *)((node << 12) | cpu);
		/* Can happen on preemptive kernel */
		if (get_cpu() == cpu)
			write_rdtscp_cb(info);
#ifdef CONFIG_SMP
		else {
			/* the notifier is unfortunately not executed on the
			   target CPU */
			smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
		}
#endif
		put_cpu();
	}

	/* Store cpu number in limit so that it can be loaded quickly
	   in user space in vgetcpu.
	   12 bits for the CPU and 8 bits for the node. */
	d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
	*d = 0x0f40000000000ULL;
	*d |= cpu;
	*d |= (node & 0xf) << 12;
	*d |= (node >> 4) << 48;
}

static void __init map_vsyscall(void)
static void __init map_vsyscall(void)
{
{
	extern char __vsyscall_0;
	extern char __vsyscall_0;
@@ -214,6 +293,7 @@ static int __init vsyscall_init(void)
			VSYSCALL_ADDR(__NR_vgettimeofday)));
			VSYSCALL_ADDR(__NR_vgettimeofday)));
	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
	BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
	map_vsyscall();
	map_vsyscall();
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_SYSCTL
	register_sysctl_table(kernel_root_table2, 0);
	register_sysctl_table(kernel_root_table2, 0);
+3 −2
Original line number Original line Diff line number Diff line
@@ -20,15 +20,16 @@
#define __USER_CS     0x33   /* 6*8+3 */ 
#define __USER_CS     0x33   /* 6*8+3 */ 
#define __USER32_DS	__USER_DS 
#define __USER32_DS	__USER_DS 


#define GDT_ENTRY_TLS 1
#define GDT_ENTRY_TSS 8	/* needs two entries */
#define GDT_ENTRY_TSS 8	/* needs two entries */
#define GDT_ENTRY_LDT 10 /* needs two entries */
#define GDT_ENTRY_LDT 10 /* needs two entries */
#define GDT_ENTRY_TLS_MIN 12
#define GDT_ENTRY_TLS_MIN 12
#define GDT_ENTRY_TLS_MAX 14
#define GDT_ENTRY_TLS_MAX 14
/* 15 free */


#define GDT_ENTRY_TLS_ENTRIES 3
#define GDT_ENTRY_TLS_ENTRIES 3


#define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
#define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)

/* TLS indexes for 64bit - hardcoded in arch_prctl */
/* TLS indexes for 64bit - hardcoded in arch_prctl */
#define FS_TLS 0	
#define FS_TLS 0	
#define GS_TLS 1	
#define GS_TLS 1	
Loading