Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d145c725 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (27 commits)
  lguest: use __PAGE_KERNEL instead of _PAGE_KERNEL
  lguest: Use explicit includes rateher than indirect
  lguest: get rid of lg variable assignments
  lguest: change gpte_addr header
  lguest: move changed bitmap to lg_cpu
  lguest: move last_pages to lg_cpu
  lguest: change last_guest to last_cpu
  lguest: change spte_addr header
  lguest: per-vcpu lguest pgdir management
  lguest: make pending notifications per-vcpu
  lguest: makes special fields be per-vcpu
  lguest: per-vcpu lguest task management
  lguest: replace lguest_arch with lg_cpu_arch.
  lguest: make registers per-vcpu
  lguest: make emulate_insn receive a vcpu struct.
  lguest: map_switcher_in_guest() per-vcpu
  lguest: per-vcpu interrupt processing.
  lguest: per-vcpu lguest timers
  lguest: make hypercalls use the vcpu struct
  lguest: make write() operation smp aware
  ...

Manual conflict resolved (maybe even correctly, who knows) in
drivers/lguest/x86/core.c
parents 44c3b591 84f12e39
Loading
Loading
Loading
Loading
+43 −6
Original line number Diff line number Diff line
@@ -79,6 +79,9 @@ static void *guest_base;
/* The maximum guest physical address allowed, and maximum possible. */
static unsigned long guest_limit, guest_max;

/* a per-cpu variable indicating whose vcpu is currently running */
static unsigned int __thread cpu_id;

/* This is our list of devices. */
struct device_list
{
@@ -153,6 +156,9 @@ struct virtqueue
	void (*handle_output)(int fd, struct virtqueue *me);
};

/* Remember the arguments to the program so we can "reboot" */
static char **main_args;

/* Since guest is UP and we don't run at the same time, we don't need barriers.
 * But I include them in the code in case others copy it. */
#define wmb()
@@ -554,7 +560,7 @@ static void wake_parent(int pipefd, int lguest_fd)
			else
				FD_CLR(-fd - 1, &devices.infds);
		} else /* Send LHREQ_BREAK command. */
			write(lguest_fd, args, sizeof(args));
			pwrite(lguest_fd, args, sizeof(args), cpu_id);
	}
}

@@ -1489,7 +1495,9 @@ static void setup_block_file(const char *filename)

	/* Create stack for thread and run it */
	stack = malloc(32768);
	if (clone(io_thread, stack + 32768, CLONE_VM, dev) == -1)
	/* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
	 * becoming a zombie. */
	if (clone(io_thread, stack + 32768,  CLONE_VM | SIGCHLD, dev) == -1)
		err(1, "Creating clone");

	/* We don't need to keep the I/O thread's end of the pipes open. */
@@ -1499,7 +1507,21 @@ static void setup_block_file(const char *filename)
	verbose("device %u: virtblock %llu sectors\n",
		devices.device_num, cap);
}
/* That's the end of device setup. */
/* That's the end of device setup. :*/

/* Reboot */
static void __attribute__((noreturn)) restart_guest(void)
{
	unsigned int i;

	/* Closing pipes causes the waker thread and io_threads to die, and
	 * closing /dev/lguest cleans up the Guest.  Since we don't track all
	 * open fds, we simply close everything beyond stderr. */
	for (i = 3; i < FD_SETSIZE; i++)
		close(i);
	execv(main_args[0], main_args);
	err(1, "Could not exec %s", main_args[0]);
}

/*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves
 * its input and output, and finally, lays it to rest. */
@@ -1511,7 +1533,8 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
		int readval;

		/* We read from the /dev/lguest device to run the Guest. */
		readval = read(lguest_fd, &notify_addr, sizeof(notify_addr));
		readval = pread(lguest_fd, &notify_addr,
				sizeof(notify_addr), cpu_id);

		/* One unsigned long means the Guest did HCALL_NOTIFY */
		if (readval == sizeof(notify_addr)) {
@@ -1521,16 +1544,23 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
		/* ENOENT means the Guest died.  Reading tells us why. */
		} else if (errno == ENOENT) {
			char reason[1024] = { 0 };
			read(lguest_fd, reason, sizeof(reason)-1);
			pread(lguest_fd, reason, sizeof(reason)-1, cpu_id);
			errx(1, "%s", reason);
		/* ERESTART means that we need to reboot the guest */
		} else if (errno == ERESTART) {
			restart_guest();
		/* EAGAIN means the Waker wanted us to look at some input.
		 * Anything else means a bug or incompatible change. */
		} else if (errno != EAGAIN)
			err(1, "Running guest failed");

		/* Only service input on thread for CPU 0. */
		if (cpu_id != 0)
			continue;

		/* Service input, then unset the BREAK to release the Waker. */
		handle_input(lguest_fd);
		if (write(lguest_fd, args, sizeof(args)) < 0)
		if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0)
			err(1, "Resetting break");
	}
}
@@ -1571,6 +1601,12 @@ int main(int argc, char *argv[])
	/* If they specify an initrd file to load. */
	const char *initrd_name = NULL;

	/* Save the args: we "reboot" by execing ourselves again. */
	main_args = argv;
	/* We don't "wait" for the children, so prevent them from becoming
	 * zombies. */
	signal(SIGCHLD, SIG_IGN);

	/* First we initialize the device list.  Since console and network
	 * device receive input from a file descriptor, we keep an fdset
	 * (infds) and the maximum fd number (max_infd) with the head of the
@@ -1582,6 +1618,7 @@ int main(int argc, char *argv[])
	devices.lastdev = &devices.dev;
	devices.next_irq = 1;

	cpu_id = 0;
	/* We need to know how much memory so we can set up the device
	 * descriptor and memory pages for the devices as we parse the command
	 * line.  So we quickly look through the arguments to find the amount
+9 −2
Original line number Diff line number Diff line
@@ -67,6 +67,7 @@
#include <asm/mce.h>
#include <asm/io.h>
#include <asm/i387.h>
#include <asm/reboot.h>		/* for struct machine_ops */

/*G:010 Welcome to the Guest!
 *
@@ -813,7 +814,7 @@ static void lguest_safe_halt(void)
 * rather than virtual addresses, so we use __pa() here. */
static void lguest_power_off(void)
{
	hcall(LHCALL_CRASH, __pa("Power down"), 0, 0);
	hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0);
}

/*
@@ -823,7 +824,7 @@ static void lguest_power_off(void)
 */
static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
{
	hcall(LHCALL_CRASH, __pa(p), 0, 0);
	hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0);
	/* The hcall won't return, but to keep gcc happy, we're "done". */
	return NOTIFY_DONE;
}
@@ -927,6 +928,11 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
	return insn_len;
}

static void lguest_restart(char *reason)
{
	hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
}

/*G:030 Once we get to lguest_init(), we know we're a Guest.  The pv_ops
 * structures in the kernel provide points for (almost) every routine we have
 * to override to avoid privileged instructions. */
@@ -1060,6 +1066,7 @@ __init void lguest_init(void)
	 * the Guest routine to power off. */
	pm_power_off = lguest_power_off;

	machine_ops.restart = lguest_restart;
	/* Now we're set up, call start_kernel() in init/main.c and we proceed
	 * to boot as normal.  It never returns. */
	start_kernel();
+1 −1
Original line number Diff line number Diff line
@@ -72,7 +72,7 @@ obj-$(CONFIG_ISDN) += isdn/
obj-$(CONFIG_EDAC)		+= edac/
obj-$(CONFIG_MCA)		+= mca/
obj-$(CONFIG_EISA)		+= eisa/
obj-$(CONFIG_LGUEST_GUEST)	+= lguest/
obj-y				+= lguest/
obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
obj-$(CONFIG_MMC)		+= mmc/
+24 −22
Original line number Diff line number Diff line
@@ -151,43 +151,43 @@ int lguest_address_ok(const struct lguest *lg,
/* This routine copies memory from the Guest.  Here we can see how useful the
 * kill_lguest() routine we met in the Launcher can be: we return a random
 * value (all zeroes) instead of needing to return an error. */
void __lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
{
	if (!lguest_address_ok(lg, addr, bytes)
	    || copy_from_user(b, lg->mem_base + addr, bytes) != 0) {
	if (!lguest_address_ok(cpu->lg, addr, bytes)
	    || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) {
		/* copy_from_user should do this, but as we rely on it... */
		memset(b, 0, bytes);
		kill_guest(lg, "bad read address %#lx len %u", addr, bytes);
		kill_guest(cpu, "bad read address %#lx len %u", addr, bytes);
	}
}

/* This is the write (copy into guest) version. */
void __lgwrite(struct lguest *lg, unsigned long addr, const void *b,
void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
	       unsigned bytes)
{
	if (!lguest_address_ok(lg, addr, bytes)
	    || copy_to_user(lg->mem_base + addr, b, bytes) != 0)
		kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
	if (!lguest_address_ok(cpu->lg, addr, bytes)
	    || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0)
		kill_guest(cpu, "bad write address %#lx len %u", addr, bytes);
}
/*:*/

/*H:030 Let's jump straight to the the main loop which runs the Guest.
 * Remember, this is called by the Launcher reading /dev/lguest, and we keep
 * going around and around until something interesting happens. */
int run_guest(struct lguest *lg, unsigned long __user *user)
int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
{
	/* We stop running once the Guest is dead. */
	while (!lg->dead) {
	while (!cpu->lg->dead) {
		/* First we run any hypercalls the Guest wants done. */
		if (lg->hcall)
			do_hypercalls(lg);
		if (cpu->hcall)
			do_hypercalls(cpu);

		/* It's possible the Guest did a NOTIFY hypercall to the
		 * Launcher, in which case we return from the read() now. */
		if (lg->pending_notify) {
			if (put_user(lg->pending_notify, user))
		if (cpu->pending_notify) {
			if (put_user(cpu->pending_notify, user))
				return -EFAULT;
			return sizeof(lg->pending_notify);
			return sizeof(cpu->pending_notify);
		}

		/* Check for signals */
@@ -195,13 +195,13 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
			return -ERESTARTSYS;

		/* If Waker set break_out, return to Launcher. */
		if (lg->break_out)
		if (cpu->break_out)
			return -EAGAIN;

		/* Check if there are any interrupts which can be delivered
		 * now: if so, this sets up the hander to be executed when we
		 * next run the Guest. */
		maybe_do_interrupt(lg);
		maybe_do_interrupt(cpu);

		/* All long-lived kernel loops need to check with this horrible
		 * thing called the freezer.  If the Host is trying to suspend,
@@ -210,12 +210,12 @@ int run_guest(struct lguest *lg, unsigned long __user *user)

		/* Just make absolutely sure the Guest is still alive.  One of
		 * those hypercalls could have been fatal, for example. */
		if (lg->dead)
		if (cpu->lg->dead)
			break;

		/* If the Guest asked to be stopped, we sleep.  The Guest's
		 * clock timer or LHCALL_BREAK from the Waker will wake us. */
		if (lg->halted) {
		if (cpu->halted) {
			set_current_state(TASK_INTERRUPTIBLE);
			schedule();
			continue;
@@ -226,15 +226,17 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
		local_irq_disable();

		/* Actually run the Guest until something happens. */
		lguest_arch_run_guest(lg);
		lguest_arch_run_guest(cpu);

		/* Now we're ready to be interrupted or moved to other CPUs */
		local_irq_enable();

		/* Now we deal with whatever happened to the Guest. */
		lguest_arch_handle_trap(lg);
		lguest_arch_handle_trap(cpu);
	}

	if (cpu->lg->dead == ERR_PTR(-ERESTART))
		return -ERESTART;
	/* The Guest is dead => "No such file or directory" */
	return -ENOENT;
}
@@ -253,7 +255,7 @@ static int __init init(void)

	/* Lguest can't run under Xen, VMI or itself.  It does Tricky Stuff. */
	if (paravirt_enabled()) {
		printk("lguest is afraid of %s\n", pv_info.name);
		printk("lguest is afraid of being a guest\n");
		return -EPERM;
	}

+55 −51
Original line number Diff line number Diff line
@@ -23,13 +23,14 @@
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/mm.h>
#include <linux/ktime.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include "lg.h"

/*H:120 This is the core hypercall routine: where the Guest gets what it wants.
 * Or gets killed.  Or, in the case of LHCALL_CRASH, both. */
static void do_hcall(struct lguest *lg, struct hcall_args *args)
static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
{
	switch (args->arg0) {
	case LHCALL_FLUSH_ASYNC:
@@ -39,60 +40,62 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
	case LHCALL_LGUEST_INIT:
		/* You can't get here unless you're already initialized.  Don't
		 * do that. */
		kill_guest(lg, "already have lguest_data");
		kill_guest(cpu, "already have lguest_data");
		break;
	case LHCALL_CRASH: {
		/* Crash is such a trivial hypercall that we do it in four
	case LHCALL_SHUTDOWN: {
		/* Shutdown is such a trivial hypercall that we do it in four
		 * lines right here. */
		char msg[128];
		/* If the lgread fails, it will call kill_guest() itself; the
		 * kill_guest() with the message will be ignored. */
		__lgread(lg, msg, args->arg1, sizeof(msg));
		__lgread(cpu, msg, args->arg1, sizeof(msg));
		msg[sizeof(msg)-1] = '\0';
		kill_guest(lg, "CRASH: %s", msg);
		kill_guest(cpu, "CRASH: %s", msg);
		if (args->arg2 == LGUEST_SHUTDOWN_RESTART)
			cpu->lg->dead = ERR_PTR(-ERESTART);
		break;
	}
	case LHCALL_FLUSH_TLB:
		/* FLUSH_TLB comes in two flavors, depending on the
		 * argument: */
		if (args->arg1)
			guest_pagetable_clear_all(lg);
			guest_pagetable_clear_all(cpu);
		else
			guest_pagetable_flush_user(lg);
			guest_pagetable_flush_user(cpu);
		break;

	/* All these calls simply pass the arguments through to the right
	 * routines. */
	case LHCALL_NEW_PGTABLE:
		guest_new_pagetable(lg, args->arg1);
		guest_new_pagetable(cpu, args->arg1);
		break;
	case LHCALL_SET_STACK:
		guest_set_stack(lg, args->arg1, args->arg2, args->arg3);
		guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
		break;
	case LHCALL_SET_PTE:
		guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3));
		guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
		break;
	case LHCALL_SET_PMD:
		guest_set_pmd(lg, args->arg1, args->arg2);
		guest_set_pmd(cpu->lg, args->arg1, args->arg2);
		break;
	case LHCALL_SET_CLOCKEVENT:
		guest_set_clockevent(lg, args->arg1);
		guest_set_clockevent(cpu, args->arg1);
		break;
	case LHCALL_TS:
		/* This sets the TS flag, as we saw used in run_guest(). */
		lg->ts = args->arg1;
		cpu->ts = args->arg1;
		break;
	case LHCALL_HALT:
		/* Similarly, this sets the halted flag for run_guest(). */
		lg->halted = 1;
		cpu->halted = 1;
		break;
	case LHCALL_NOTIFY:
		lg->pending_notify = args->arg1;
		cpu->pending_notify = args->arg1;
		break;
	default:
		/* It should be an architecture-specific hypercall. */
		if (lguest_arch_do_hcall(lg, args))
			kill_guest(lg, "Bad hypercall %li\n", args->arg0);
		if (lguest_arch_do_hcall(cpu, args))
			kill_guest(cpu, "Bad hypercall %li\n", args->arg0);
	}
}
/*:*/
@@ -104,13 +107,13 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
 * Guest put them in the ring, but we also promise the Guest that they will
 * happen before any normal hypercall (which is why we check this before
 * checking for a normal hcall). */
static void do_async_hcalls(struct lguest *lg)
static void do_async_hcalls(struct lg_cpu *cpu)
{
	unsigned int i;
	u8 st[LHCALL_RING_SIZE];

	/* For simplicity, we copy the entire call status array in at once. */
	if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
	if (copy_from_user(&st, &cpu->lg->lguest_data->hcall_status, sizeof(st)))
		return;

	/* We process "struct lguest_data"s hcalls[] ring once. */
@@ -119,7 +122,7 @@ static void do_async_hcalls(struct lguest *lg)
		/* We remember where we were up to from last time.  This makes
		 * sure that the hypercalls are done in the order the Guest
		 * places them in the ring. */
		unsigned int n = lg->next_hcall;
		unsigned int n = cpu->next_hcall;

		/* 0xFF means there's no call here (yet). */
		if (st[n] == 0xFF)
@@ -127,65 +130,65 @@ static void do_async_hcalls(struct lguest *lg)

		/* OK, we have hypercall.  Increment the "next_hcall" cursor,
		 * and wrap back to 0 if we reach the end. */
		if (++lg->next_hcall == LHCALL_RING_SIZE)
			lg->next_hcall = 0;
		if (++cpu->next_hcall == LHCALL_RING_SIZE)
			cpu->next_hcall = 0;

		/* Copy the hypercall arguments into a local copy of
		 * the hcall_args struct. */
		if (copy_from_user(&args, &lg->lguest_data->hcalls[n],
		if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n],
				   sizeof(struct hcall_args))) {
			kill_guest(lg, "Fetching async hypercalls");
			kill_guest(cpu, "Fetching async hypercalls");
			break;
		}

		/* Do the hypercall, same as a normal one. */
		do_hcall(lg, &args);
		do_hcall(cpu, &args);

		/* Mark the hypercall done. */
		if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
			kill_guest(lg, "Writing result for async hypercall");
		if (put_user(0xFF, &cpu->lg->lguest_data->hcall_status[n])) {
			kill_guest(cpu, "Writing result for async hypercall");
			break;
		}

		/* Stop doing hypercalls if they want to notify the Launcher:
		 * it needs to service this first. */
		if (lg->pending_notify)
		if (cpu->pending_notify)
			break;
	}
}

/* Last of all, we look at what happens first of all.  The very first time the
 * Guest makes a hypercall, we end up here to set things up: */
static void initialize(struct lguest *lg)
static void initialize(struct lg_cpu *cpu)
{
	/* You can't do anything until you're initialized.  The Guest knows the
	 * rules, so we're unforgiving here. */
	if (lg->hcall->arg0 != LHCALL_LGUEST_INIT) {
		kill_guest(lg, "hypercall %li before INIT", lg->hcall->arg0);
	if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) {
		kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0);
		return;
	}

	if (lguest_arch_init_hypercalls(lg))
		kill_guest(lg, "bad guest page %p", lg->lguest_data);
	if (lguest_arch_init_hypercalls(cpu))
		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);

	/* The Guest tells us where we're not to deliver interrupts by putting
	 * the range of addresses into "struct lguest_data". */
	if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
	    || get_user(lg->noirq_end, &lg->lguest_data->noirq_end))
		kill_guest(lg, "bad guest page %p", lg->lguest_data);
	if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
	    || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);

	/* We write the current time into the Guest's data page once so it can
	 * set its clock. */
	write_timestamp(lg);
	write_timestamp(cpu);

	/* page_tables.c will also do some setup. */
	page_table_guest_data_init(lg);
	page_table_guest_data_init(cpu);

	/* This is the one case where the above accesses might have been the
	 * first write to a Guest page.  This may have caused a copy-on-write
	 * fault, but the old page might be (read-only) in the Guest
	 * pagetable. */
	guest_pagetable_clear_all(lg);
	guest_pagetable_clear_all(cpu);
}

/*H:100
@@ -194,27 +197,27 @@ static void initialize(struct lguest *lg)
 * Remember from the Guest, hypercalls come in two flavors: normal and
 * asynchronous.  This file handles both of types.
 */
void do_hypercalls(struct lguest *lg)
void do_hypercalls(struct lg_cpu *cpu)
{
	/* Not initialized yet?  This hypercall must do it. */
	if (unlikely(!lg->lguest_data)) {
	if (unlikely(!cpu->lg->lguest_data)) {
		/* Set up the "struct lguest_data" */
		initialize(lg);
		initialize(cpu);
		/* Hcall is done. */
		lg->hcall = NULL;
		cpu->hcall = NULL;
		return;
	}

	/* The Guest has initialized.
	 *
	 * Look in the hypercall ring for the async hypercalls: */
	do_async_hcalls(lg);
	do_async_hcalls(cpu);

	/* If we stopped reading the hypercall ring because the Guest did a
	 * NOTIFY to the Launcher, we want to return now.  Otherwise we do
	 * the hypercall. */
	if (!lg->pending_notify) {
		do_hcall(lg, lg->hcall);
	if (!cpu->pending_notify) {
		do_hcall(cpu, cpu->hcall);
		/* Tricky point: we reset the hcall pointer to mark the
		 * hypercall as "done".  We use the hcall pointer rather than
		 * the trap number to indicate a hypercall is pending.
@@ -225,16 +228,17 @@ void do_hypercalls(struct lguest *lg)
		 * Launcher, the run_guest() loop will exit without running the
		 * Guest.  When it comes back it would try to re-run the
		 * hypercall. */
		lg->hcall = NULL;
		cpu->hcall = NULL;
	}
}

/* This routine supplies the Guest with time: it's used for wallclock time at
 * initial boot and as a rough time source if the TSC isn't available. */
void write_timestamp(struct lguest *lg)
void write_timestamp(struct lg_cpu *cpu)
{
	struct timespec now;
	ktime_get_real_ts(&now);
	if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec)))
		kill_guest(lg, "Writing timestamp");
	if (copy_to_user(&cpu->lg->lguest_data->time,
			 &now, sizeof(struct timespec)))
		kill_guest(cpu, "Writing timestamp");
}
Loading