Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a7f4df4e authored by Alex Smith's avatar Alex Smith Committed by Ralf Baechle
Browse files

MIPS: VDSO: Add implementations of gettimeofday() and clock_gettime()



Add user-mode implementations of gettimeofday() and clock_gettime() to
the VDSO. This is currently usable with 2 clocksources: the CP0 count
register, which is accessible to user-mode via RDHWR on R2 and later
cores, or the MIPS Global Interrupt Controller (GIC) timer, which
provides a "user-mode visible" section containing a mirror of its
counter registers. This section must be mapped into user memory, which
is done below the VDSO data page.

When a supported clocksource is not in use, the VDSO functions will
return -ENOSYS, which causes libc to fall back on the standard syscall
path.

When support for neither of these clocksources is compiled into the
kernel at all, the VDSO still provides clock_gettime(), as the coarse
realtime/monotonic clocks can still be implemented. However,
gettimeofday() is not provided in this case as nothing can be done
without a suitable clocksource. This causes the symbol lookup to fail
in libc and it will then always use the standard syscall path.

This patch includes a workaround for a bug in QEMU which results in
RDHWR on the CP0 count register always returning a constant (incorrect)
value. A fix for this has been submitted, and the workaround can be
removed after the fix has been in stable releases for a reasonable
amount of time.

A simple performance test which calls gettimeofday() 1000 times in a
loop and calculates the average execution time gives the following
results on a Malta + I6400 (running at 20MHz):

 - Syscall:    ~31000 ns
 - VDSO (GIC): ~15000 ns
 - VDSO (CP0): ~9500 ns

[markos.chandras@imgtec.com:
- Minor code re-arrangements in order for mappings to be made
in the order they appear to the process' address space.
- Move do_{monotonic, realtime} outside of the MIPS_CLOCK_VSYSCALL ifdef
- Use gic_get_usm_range so we can do the GIC mapping in the
arch/mips/kernel/vdso instead of the GIC irqchip driver]

Signed-off-by: default avatarAlex Smith <alex.smith@imgtec.com>
Signed-off-by: default avatarMarkos Chandras <markos.chandras@imgtec.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/11338/


Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent c0a9f72c
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -61,6 +61,8 @@ config MIPS
	select SYSCTL_EXCEPTION_TRACE
	select HAVE_VIRT_CPU_ACCOUNTING_GEN
	select HAVE_IRQ_TIME_ACCOUNTING
	select GENERIC_TIME_VSYSCALL
	select ARCH_CLOCKSOURCE_DATA

menu "Machine selection"

@@ -1040,6 +1042,9 @@ config CSRC_R4K
config CSRC_SB1250
	bool

config MIPS_CLOCK_VSYSCALL
	def_bool CSRC_R4K || CLKSRC_MIPS_GIC

config GPIO_TXX9
	select ARCH_REQUIRE_GPIOLIB
	bool
+29 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2015 Imagination Technologies
 * Author: Alex Smith <alex.smith@imgtec.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation;  either version 2 of the  License, or (at your
 * option) any later version.
 */

#ifndef __ASM_CLOCKSOURCE_H
#define __ASM_CLOCKSOURCE_H

#include <linux/types.h>

/* VDSO clocksources. */
#define VDSO_CLOCK_NONE		0	/* No suitable clocksource. */
#define VDSO_CLOCK_R4K		1	/* Use the coprocessor 0 count. */
#define VDSO_CLOCK_GIC		2	/* Use the GIC. */

/**
 * struct arch_clocksource_data - Architecture-specific clocksource information.
 * @vdso_clock_mode: Method the VDSO should use to access the clocksource.
 */
struct arch_clocksource_data {
	u8 vdso_clock_mode;
};

#endif /* __ASM_CLOCKSOURCE_H */
+67 −1
Original line number Diff line number Diff line
@@ -13,6 +13,8 @@

#include <linux/mm_types.h>

#include <asm/barrier.h>

/**
 * struct mips_vdso_image - Details of a VDSO image.
 * @data: Pointer to VDSO image data (page-aligned).
@@ -53,18 +55,82 @@ extern struct mips_vdso_image vdso_image_n32;

/**
 * union mips_vdso_data - Data provided by the kernel for the VDSO.
 * @xtime_sec:		Current real time (seconds part).
 * @xtime_nsec:		Current real time (nanoseconds part, shifted).
 * @wall_to_mono_sec:	Wall-to-monotonic offset (seconds part).
 * @wall_to_mono_nsec:	Wall-to-monotonic offset (nanoseconds part).
 * @seq_count:		Counter to synchronise updates (odd = updating).
 * @cs_shift:		Clocksource shift value.
 * @clock_mode:		Clocksource to use for time functions.
 * @cs_mult:		Clocksource multiplier value.
 * @cs_cycle_last:	Clock cycle value at last update.
 * @cs_mask:		Clocksource mask value.
 * @tz_minuteswest:	Minutes west of Greenwich (from timezone).
 * @tz_dsttime:		Type of DST correction (from timezone).
 *
 * This structure contains data needed by functions within the VDSO. It is
 * populated by the kernel and mapped read-only into user memory.
 * populated by the kernel and mapped read-only into user memory. The time
 * fields are mirrors of internal data from the timekeeping infrastructure.
 *
 * Note: Care should be taken when modifying as the layout must remain the same
 * for both 64- and 32-bit (for 32-bit userland on 64-bit kernel).
 */
union mips_vdso_data {
	struct {
		u64 xtime_sec;
		u64 xtime_nsec;
		u32 wall_to_mono_sec;
		u32 wall_to_mono_nsec;
		u32 seq_count;
		u32 cs_shift;
		u8 clock_mode;
		u32 cs_mult;
		u64 cs_cycle_last;
		u64 cs_mask;
		s32 tz_minuteswest;
		s32 tz_dsttime;
	};

	u8 page[PAGE_SIZE];
};

static inline u32 vdso_data_read_begin(const union mips_vdso_data *data)
{
	u32 seq;

	while (true) {
		seq = ACCESS_ONCE(data->seq_count);
		if (likely(!(seq & 1))) {
			/* Paired with smp_wmb() in vdso_data_write_*(). */
			smp_rmb();
			return seq;
		}

		cpu_relax();
	}
}

static inline bool vdso_data_read_retry(const union mips_vdso_data *data,
					u32 start_seq)
{
	/* Paired with smp_wmb() in vdso_data_write_*(). */
	smp_rmb();
	return unlikely(data->seq_count != start_seq);
}

static inline void vdso_data_write_begin(union mips_vdso_data *data)
{
	++data->seq_count;

	/* Ensure sequence update is written before other data page values. */
	smp_wmb();
}

static inline void vdso_data_write_end(union mips_vdso_data *data)
{
	/* Ensure data values are written before updating sequence again. */
	smp_wmb();
	++data->seq_count;
}

#endif /* __ASM_VDSO_H */
+44 −0
Original line number Diff line number Diff line
@@ -28,6 +28,43 @@ static u64 notrace r4k_read_sched_clock(void)
	return read_c0_count();
}

static inline unsigned int rdhwr_count(void)
{
	unsigned int count;

	__asm__ __volatile__(
	"	.set push\n"
	"	.set mips32r2\n"
	"	rdhwr	%0, $2\n"
	"	.set pop\n"
	: "=r" (count));

	return count;
}

static bool rdhwr_count_usable(void)
{
	unsigned int prev, curr, i;

	/*
	 * Older QEMUs have a broken implementation of RDHWR for the CP0 count
	 * which always returns a constant value. Try to identify this and don't
	 * use it in the VDSO if it is broken. This workaround can be removed
	 * once the fix has been in QEMU stable for a reasonable amount of time.
	 */
	for (i = 0, prev = rdhwr_count(); i < 100; i++) {
		curr = rdhwr_count();

		if (curr != prev)
			return true;

		prev = curr;
	}

	pr_warn("Not using R4K clocksource in VDSO due to broken RDHWR\n");
	return false;
}

int __init init_r4k_clocksource(void)
{
	if (!cpu_has_counter || !mips_hpt_frequency)
@@ -36,6 +73,13 @@ int __init init_r4k_clocksource(void)
	/* Calculate a somewhat reasonable rating value */
	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;

	/*
	 * R2 onwards makes the count accessible to user mode so it can be used
	 * by the VDSO (HWREna is configured by configure_hwrena()).
	 */
	if (cpu_has_mips_r2_r6 && rdhwr_count_usable())
		clocksource_mips.archdata.vdso_clock_mode = VDSO_CLOCK_R4K;

	clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);

	sched_clock_register(r4k_read_sched_clock, 32, mips_hpt_frequency);
+65 −6
Original line number Diff line number Diff line
@@ -12,9 +12,12 @@
#include <linux/elf.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/irqchip/mips-gic.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>

#include <asm/abi.h>
#include <asm/vdso.h>
@@ -23,7 +26,7 @@
static union mips_vdso_data vdso_data __page_aligned_data;

/*
 * Mapping for the VDSO data pages. The real pages are mapped manually, as
 * Mapping for the VDSO data/GIC pages. The real pages are mapped manually, as
 * what we map and where within the area they are mapped is determined at
 * runtime.
 */
@@ -64,25 +67,67 @@ static int __init init_vdso(void)
}
subsys_initcall(init_vdso);

void update_vsyscall(struct timekeeper *tk)
{
	vdso_data_write_begin(&vdso_data);

	vdso_data.xtime_sec = tk->xtime_sec;
	vdso_data.xtime_nsec = tk->tkr_mono.xtime_nsec;
	vdso_data.wall_to_mono_sec = tk->wall_to_monotonic.tv_sec;
	vdso_data.wall_to_mono_nsec = tk->wall_to_monotonic.tv_nsec;
	vdso_data.cs_shift = tk->tkr_mono.shift;

	vdso_data.clock_mode = tk->tkr_mono.clock->archdata.vdso_clock_mode;
	if (vdso_data.clock_mode != VDSO_CLOCK_NONE) {
		vdso_data.cs_mult = tk->tkr_mono.mult;
		vdso_data.cs_cycle_last = tk->tkr_mono.cycle_last;
		vdso_data.cs_mask = tk->tkr_mono.mask;
	}

	vdso_data_write_end(&vdso_data);
}

void update_vsyscall_tz(void)
{
	if (vdso_data.clock_mode != VDSO_CLOCK_NONE) {
		vdso_data.tz_minuteswest = sys_tz.tz_minuteswest;
		vdso_data.tz_dsttime = sys_tz.tz_dsttime;
	}
}

int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
	struct mips_vdso_image *image = current->thread.abi->vdso;
	struct mm_struct *mm = current->mm;
	unsigned long base, vdso_addr;
	unsigned long gic_size, vvar_size, size, base, data_addr, vdso_addr;
	struct vm_area_struct *vma;
	struct resource gic_res;
	int ret;

	down_write(&mm->mmap_sem);

	base = get_unmapped_area(NULL, 0, PAGE_SIZE + image->size, 0, 0);
	/*
	 * Determine total area size. This includes the VDSO data itself, the
	 * data page, and the GIC user page if present. Always create a mapping
	 * for the GIC user area if the GIC is present regardless of whether it
	 * is the current clocksource, in case it comes into use later on. We
	 * only map a page even though the total area is 64K, as we only need
	 * the counter registers at the start.
	 */
	gic_size = gic_present ? PAGE_SIZE : 0;
	vvar_size = gic_size + PAGE_SIZE;
	size = vvar_size + image->size;

	base = get_unmapped_area(NULL, 0, size, 0, 0);
	if (IS_ERR_VALUE(base)) {
		ret = base;
		goto out;
	}

	vdso_addr = base + PAGE_SIZE;
	data_addr = base + gic_size;
	vdso_addr = data_addr + PAGE_SIZE;

	vma = _install_special_mapping(mm, base, PAGE_SIZE,
	vma = _install_special_mapping(mm, base, vvar_size,
				       VM_READ | VM_MAYREAD,
				       &vdso_vvar_mapping);
	if (IS_ERR(vma)) {
@@ -90,8 +135,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
		goto out;
	}

	/* Map GIC user page. */
	if (gic_size) {
		ret = gic_get_usm_range(&gic_res);
		if (ret)
			goto out;

		ret = io_remap_pfn_range(vma, base,
					 gic_res.start >> PAGE_SHIFT,
					 gic_size,
					 pgprot_noncached(PAGE_READONLY));
		if (ret)
			goto out;
	}

	/* Map data page. */
	ret = remap_pfn_range(vma, base,
	ret = remap_pfn_range(vma, data_addr,
			      virt_to_phys(&vdso_data) >> PAGE_SHIFT,
			      PAGE_SIZE, PAGE_READONLY);
	if (ret)
Loading