xen: Account for stolen time (f91a8b44) · Commits · e / devices / android_kernel_fairphone_FP4

arch/i386/xen/time.c

+150 −9

Original line number	Diff line number	Diff line
		@@ -11,6 +11,7 @@
		#include <linux/interrupt.h>
		#include <linux/clocksource.h>
		#include <linux/clockchips.h>
		#include <linux/kernel_stat.h>

		#include <asm/xen/hypervisor.h>
		#include <asm/xen/hypercall.h>
		@@ -25,6 +26,7 @@

		/* Xen may fire a timer up to this many ns early */
		#define TIMER_SLOP 100000
		#define NS_PER_TICK (1000000000LL / HZ)

		/* These are perodically updated in shared_info, and then copied here. */
		struct shadow_time_info {
		@@ -37,6 +39,139 @@ struct shadow_time_info {

		static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);

		/* runstate info updated by Xen */
		static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);

		/* snapshots of runstate info */
		static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);

		/* unused ns of stolen and blocked time */
		static DEFINE_PER_CPU(u64, residual_stolen);
		static DEFINE_PER_CPU(u64, residual_blocked);

		/* return an consistent snapshot of 64-bit time/counter value */
		static u64 get64(const u64 *p)
		{
		u64 ret;

		if (BITS_PER_LONG < 64) {
		u32 p32 = (u32 )p;
		u32 h, l;

		/*
		* Read high then low, and then make sure high is
		* still the same; this will only loop if low wraps
		* and carries into high.
		* XXX some clean way to make this endian-proof?
		*/
		do {
		h = p32[1];
		barrier();
		l = p32[0];
		barrier();
		} while (p32[1] != h);

		ret = (((u64)h) << 32) \| l;
		} else
		ret = *p;

		return ret;
		}

		/*
		* Runstate accounting
		*/
		static void get_runstate_snapshot(struct vcpu_runstate_info *res)
		{
		u64 state_time;
		struct vcpu_runstate_info *state;

		preempt_disable();

		state = &__get_cpu_var(runstate);

		/*
		* The runstate info is always updated by the hypervisor on
		* the current CPU, so there's no need to use anything
		* stronger than a compiler barrier when fetching it.
		*/
		do {
		state_time = get64(&state->state_entry_time);
		barrier();
		res = state;
		barrier();
		} while (get64(&state->state_entry_time) != state_time);

		preempt_enable();
		}

		static void setup_runstate_info(int cpu)
		{
		struct vcpu_register_runstate_memory_area area;

		area.addr.v = &per_cpu(runstate, cpu);

		if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
		cpu, &area))
		BUG();
		}

		static void do_stolen_accounting(void)
		{
		struct vcpu_runstate_info state;
		struct vcpu_runstate_info *snap;
		s64 blocked, runnable, offline, stolen;
		cputime_t ticks;

		get_runstate_snapshot(&state);

		WARN_ON(state.state != RUNSTATE_running);

		snap = &__get_cpu_var(runstate_snapshot);

		/* work out how much time the VCPU has not been running */
		blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
		runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
		offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];

		*snap = state;

		/* Add the appropriate number of ticks of stolen time,
		including any left-overs from last time. Passing NULL to
		account_steal_time accounts the time as stolen. */
		stolen = runnable + offline + __get_cpu_var(residual_stolen);

		if (stolen < 0)
		stolen = 0;

		ticks = 0;
		while (stolen >= NS_PER_TICK) {
		ticks++;
		stolen -= NS_PER_TICK;
		}
		__get_cpu_var(residual_stolen) = stolen;
		account_steal_time(NULL, ticks);

		/* Add the appropriate number of ticks of blocked time,
		including any left-overs from last time. Passing idle to
		account_steal_time accounts the time as idle/wait. */
		blocked += __get_cpu_var(residual_blocked);

		if (blocked < 0)
		blocked = 0;

		ticks = 0;
		while (blocked >= NS_PER_TICK) {
		ticks++;
		blocked -= NS_PER_TICK;
		}
		__get_cpu_var(residual_blocked) = blocked;
		account_steal_time(idle_task(smp_processor_id()), ticks);
		}



		/* Get the CPU speed from Xen */
		unsigned long xen_cpu_khz(void)
		{
		u64 cpu_khz = 1000000ULL << 32;
		@@ -56,13 +191,11 @@ unsigned long xen_cpu_khz(void)
		* Reads a consistent set of time-base values from Xen, into a shadow data
		* area.
		*/
		static void get_time_values_from_xen(void)
		static unsigned get_time_values_from_xen(void)
		{
		struct vcpu_time_info *src;
		struct shadow_time_info *dst;

		preempt_disable();

		/* src is shared memory with the hypervisor, so we need to
		make sure we get a consistent snapshot, even in the face of
		being preempted. */
		@@ -79,7 +212,7 @@ static void get_time_values_from_xen(void)
		rmb(); /* test version after fetching data */
		} while ((src->version & 1) \| (dst->version ^ src->version));

		preempt_enable();
		return dst->version;
		}

		/*
		@@ -123,7 +256,7 @@ static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
		static u64 get_nsec_offset(struct shadow_time_info *shadow)
		{
		u64 now, delta;
		rdtscll(now);
		now = native_read_tsc();
		delta = now - shadow->tsc_timestamp;
		return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
		}
		@@ -132,10 +265,14 @@ cycle_t xen_clocksource_read(void)
		{
		struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
		cycle_t ret;
		unsigned version;

		get_time_values_from_xen();

		do {
		version = get_time_values_from_xen();
		barrier();
		ret = shadow->system_timestamp + get_nsec_offset(shadow);
		barrier();
		} while (version != __get_cpu_var(xen_vcpu)->time.version);

		put_cpu_var(shadow_time);

		@@ -352,6 +489,8 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
		ret = IRQ_HANDLED;
		}

		do_stolen_accounting();

		return ret;
		}

		@@ -378,6 +517,8 @@ static void xen_setup_timer(int cpu)
		evt->irq = irq;
		clockevents_register_device(evt);

		setup_runstate_info(cpu);

		put_cpu_var(xen_clock_events);
		}

		@@ -390,7 +531,7 @@ __init void xen_time_init(void)
		clocksource_register(&xen_clocksource);

		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
		/* Successfully turned off 100hz tick, so we have the
		/* Successfully turned off 100Hz tick, so we have the
		vcpuop-based timer interface */
		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
		xen_clockevent = &xen_vcpuop_clockevent;