Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b2b47c21 authored by Rusty Russell's avatar Rusty Russell Committed by Linus Torvalds
Browse files

lguest: documentation II: Guest



Documentation: The Guest

Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f938d2c8
Loading
Loading
Loading
Loading
+429 −21

File changed.

Preview size limit exceeded, changes collapsed.

+41 −16
Original line number Diff line number Diff line
@@ -4,15 +4,15 @@
#include <asm/thread_info.h>
#include <asm/processor-flags.h>

/*
 * This is where we begin: we have a magic signature which the launcher looks
 * for.  The plan is that the Linux boot protocol will be extended with a
/*G:020 This is where we begin: we have a magic signature which the launcher
 * looks for.  The plan is that the Linux boot protocol will be extended with a
 * "platform type" field which will guide us here from the normal entry point,
 * but for the moment this suffices.  We pass the virtual address of the boot
 * info to lguest_init().
 * but for the moment this suffices.  The normal boot code uses %esi for the
 * boot header, so we do too.  We convert it to a virtual address by adding
 * PAGE_OFFSET, and hand it to lguest_init() as its argument (ie. %eax).
 *
 * We put it in .init.text will be discarded after boot.
 */
 * The .section line puts this code in .init.text so it will be discarded after
 * boot. */
.section .init.text, "ax", @progbits
.ascii "GenuineLguest"
	/* Set up initial stack. */
@@ -21,7 +21,9 @@
	addl $__PAGE_OFFSET, %eax
	jmp lguest_init

/* The templates for inline patching. */
/*G:055 We create a macro which puts the assembler code between lgstart_ and
 * lgend_ markers.  These templates end up in the .init.text section, so they
 * are discarded after boot. */
#define LGUEST_PATCH(name, insns...)			\
	lgstart_##name:	insns; lgend_##name:;		\
	.globl lgstart_##name; .globl lgend_##name
@@ -30,24 +32,47 @@ LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
/*:*/

.text
/* These demark the EIP range where host should never deliver interrupts. */
.global lguest_noirq_start
.global lguest_noirq_end

/*
 * We move eflags word to lguest_data.irq_enabled to restore interrupt state.
 * For page faults, gpfs and virtual interrupts, the hypervisor has saved
 * eflags manually, otherwise it was delivered directly and so eflags reflects
 * the real machine IF state, ie. interrupts on.  Since the kernel always dies
 * if it takes such a trap with interrupts disabled anyway, turning interrupts
 * back on unconditionally here is OK.
 */
/*G:045 There is one final paravirt_op that the Guest implements, and glancing
 * at it you can see why I left it to last.  It's *cool*!  It's in *assembler*!
 *
 * The "iret" instruction is used to return from an interrupt or trap.  The
 * stack looks like this:
 *   old address
 *   old code segment & privilege level
 *   old processor flags ("eflags")
 *
 * The "iret" instruction pops those values off the stack and restores them all
 * at once.  The only problem is that eflags includes the Interrupt Flag which
 * the Guest can't change: the CPU will simply ignore it when we do an "iret".
 * So we have to copy eflags from the stack to lguest_data.irq_enabled before
 * we do the "iret".
 *
 * There are two problems with this: firstly, we need to use a register to do
 * the copy and secondly, the whole thing needs to be atomic.  The first
 * problem is easy to solve: push %eax on the stack so we can use it, and then
 * restore it at the end just before the real "iret".
 *
 * The second is harder: copying eflags to lguest_data.irq_enabled will turn
 * interrupts on before we're finished, so we could be interrupted before we
 * return to userspace or wherever.  Our solution to this is to surround the
 * code with lguest_noirq_start: and lguest_noirq_end: labels.  We tell the
 * Host that it is *never* to interrupt us there, even if interrupts seem to be
 * enabled. */
ENTRY(lguest_iret)
	pushl	%eax
	movl	12(%esp), %eax
lguest_noirq_start:
	/* Note the %ss: segment prefix here.  Normal data accesses use the
	 * "ds" segment, but that will have already been restored for whatever
	 * we're returning to (such as userspace): we can't trust it.  The %ss:
	 * prefix makes sure we use the stack segment, which is still valid. */
	movl	%eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
	popl	%eax
	iret
+38 −9
Original line number Diff line number Diff line
@@ -27,18 +27,38 @@
#define LG_CLOCK_MIN_DELTA	100UL
#define LG_CLOCK_MAX_DELTA	ULONG_MAX

/*G:031 First, how does our Guest contact the Host to ask for privileged
 * operations?  There are two ways: the direct way is to make a "hypercall",
 * to make requests of the Host Itself.
 *
 * Our hypercall mechanism uses the highest unused trap code (traps 32 and
 * above are used by real hardware interrupts).  Seventeen hypercalls are
 * available: the hypercall number is put in the %eax register, and the
 * arguments (when required) are placed in %edx, %ebx and %ecx.  If a return
 * value makes sense, it's returned in %eax.
 *
 * Grossly invalid calls result in Sudden Death at the hands of the vengeful
 * Host, rather than returning failure.  This reflects Winston Churchill's
 * definition of a gentleman: "someone who is only rude intentionally". */
#define LGUEST_TRAP_ENTRY 0x1F

static inline unsigned long
hcall(unsigned long call,
      unsigned long arg1, unsigned long arg2, unsigned long arg3)
{
	/* "int" is the Intel instruction to trigger a trap. */
	asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
		       /* The call is in %eax (aka "a"), and can be replaced */
		     : "=a"(call)
		       /* The other arguments are in %eax, %edx, %ebx & %ecx */
		     : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
		       /* "memory" means this might write somewhere in memory.
			* This isn't true for all calls, but it's safe to tell
			* gcc that it might happen so it doesn't get clever. */
		     : "memory");
	return call;
}
/*:*/

void async_hcall(unsigned long call,
		 unsigned long arg1, unsigned long arg2, unsigned long arg3);
@@ -52,31 +72,40 @@ struct hcall_ring
	u32 eax, edx, ebx, ecx;
};

/* All the good stuff happens here: guest registers it with LGUEST_INIT */
/*G:032 The second method of communicating with the Host is to via "struct
 * lguest_data".  The Guest's very first hypercall is to tell the Host where
 * this is, and then the Guest and Host both publish information in it. :*/
struct lguest_data
{
/* Fields which change during running: */
	/* 512 == enabled (same as eflags) */
	/* 512 == enabled (same as eflags in normal hardware).  The Guest
	 * changes interrupts so often that a hypercall is too slow. */
	unsigned int irq_enabled;
	/* Interrupts blocked by guest. */
	/* Fine-grained interrupt disabling by the Guest */
	DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS);

	/* Virtual address of page fault. */
	/* The Host writes the virtual address of the last page fault here,
	 * which saves the Guest a hypercall.  CR2 is the native register where
	 * this address would normally be found. */
	unsigned long cr2;

	/* Async hypercall ring.  0xFF == done, 0 == pending. */
	/* Async hypercall ring.  Instead of directly making hypercalls, we can
	 * place them in here for processing the next time the Host wants.
	 * This batching can be quite efficient. */

	/* 0xFF == done (set by Host), 0 == pending (set by Guest). */
	u8 hcall_status[LHCALL_RING_SIZE];
	/* The actual registers for the hypercalls. */
	struct hcall_ring hcalls[LHCALL_RING_SIZE];

/* Fields initialized by the hypervisor at boot: */
/* Fields initialized by the Host at boot: */
	/* Memory not to try to access */
	unsigned long reserve_mem;
	/* ID of this guest (used by network driver to set ethernet address) */
	/* ID of this Guest (used by network driver to set ethernet address) */
	u16 guestid;
	/* KHz for the TSC clock. */
	u32 tsc_khz;

/* Fields initialized by the guest at boot: */
/* Fields initialized by the Guest at boot: */
	/* Instruction range to suppress interrupts even if enabled */
	unsigned long noirq_start, noirq_end;
};