Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a161883a authored by Rusty Russell's avatar Rusty Russell
Browse files

lguest: Tell Guest net not to notify us on every packet xmit



virtio_ring has the ability to suppress notifications.  This prevents
a guest exit for every packet, but we need to set a timer on packet
receipt to re-check if there were any remaining packets.

Here are the times for 1G TCP Guest->Host with different timeout
settings (it matters because the TCP window doesn't grow big enough to
fill the entire buffer):

Timeout value	Seconds		Xmit/Recv/Timeout
None (before)	25.3784		xmit 7750233 recv 1
2500 usec	62.5119		xmit 207020 recv 2 timeout 207020
1000 usec	34.5379		xmit 207003 recv 2 timeout 207003
750 usec	29.2305		xmit 207002 recv 1 timeout 207002
500 usec	19.1887		xmit 561141 recv 1 timeout 559657
250 usec	20.0465		xmit 214128 recv 2 timeout 214110
100 usec	19.2583		xmit 561621 recv 1 timeout 560153

(Note that these values are sensitive to the GSO patches which come
 later, and probably other traffic-related variables, so take with a
 large grain of salt).

Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
parent 5dae785a
Loading
Loading
Loading
Loading
+93 −13
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include <sched.h>
#include <limits.h>
#include <stddef.h>
#include <signal.h>
#include "linux/lguest_launcher.h"
#include "linux/virtio_config.h"
#include "linux/virtio_net.h"
@@ -81,6 +82,8 @@ static int waker_fd;
static void *guest_base;
/* The maximum guest physical address allowed, and maximum possible. */
static unsigned long guest_limit, guest_max;
/* The pipe for signal hander to write to. */
static int timeoutpipe[2];

/* a per-cpu variable indicating whose vcpu is currently running */
static unsigned int __thread cpu_id;
@@ -156,11 +159,14 @@ struct virtqueue
	/* Last available index we saw. */
	u16 last_avail_idx;

	/* The routine to call when the Guest pings us. */
	void (*handle_output)(int fd, struct virtqueue *me);
	/* The routine to call when the Guest pings us, or timeout. */
	void (*handle_output)(int fd, struct virtqueue *me, bool timeout);

	/* Outstanding buffers */
	unsigned int inflight;

	/* Is this blocked awaiting a timer? */
	bool blocked;
};

/* Remember the arguments to the program so we can "reboot" */
@@ -874,7 +880,7 @@ static bool handle_console_input(int fd, struct device *dev)

/* Handling output for console is simple: we just get all the output buffers
 * and write them to stdout. */
static void handle_console_output(int fd, struct virtqueue *vq)
static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
{
	unsigned int head, out, in;
	int len;
@@ -889,6 +895,21 @@ static void handle_console_output(int fd, struct virtqueue *vq)
	}
}

static void block_vq(struct virtqueue *vq)
{
	struct itimerval itm;

	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
	vq->blocked = true;

	itm.it_interval.tv_sec = 0;
	itm.it_interval.tv_usec = 0;
	itm.it_value.tv_sec = 0;
	itm.it_value.tv_usec = 500;

	setitimer(ITIMER_REAL, &itm, NULL);
}

/*
 * The Network
 *
@@ -896,9 +917,9 @@ static void handle_console_output(int fd, struct virtqueue *vq)
 * and write them (ignoring the first element) to this device's file descriptor
 * (/dev/net/tun).
 */
static void handle_net_output(int fd, struct virtqueue *vq)
static void handle_net_output(int fd, struct virtqueue *vq, bool timeout)
{
	unsigned int head, out, in;
	unsigned int head, out, in, num = 0;
	int len;
	struct iovec iov[vq->vring.num];

@@ -912,7 +933,12 @@ static void handle_net_output(int fd, struct virtqueue *vq)
		(void)convert(&iov[0], struct virtio_net_hdr);
		len = writev(vq->dev->fd, iov+1, out-1);
		add_used_and_trigger(fd, vq, head, len);
		num++;
	}

	/* Block further kicks and set up a timer if we saw anything. */
	if (!timeout && num)
		block_vq(vq);
}

/* This is where we handle a packet coming in from the tun device to our
@@ -967,18 +993,18 @@ static bool handle_tun_input(int fd, struct device *dev)
/*L:215 This is the callback attached to the network and console input
 * virtqueues: it ensures we try again, in case we stopped console or net
 * delivery because Guest didn't have any buffers. */
static void enable_fd(int fd, struct virtqueue *vq)
static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
{
	add_device_fd(vq->dev->fd);
	/* Tell waker to listen to it again */
	write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
}

static void net_enable_fd(int fd, struct virtqueue *vq)
static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout)
{
	/* We don't need to know again when Guest refills receive buffer. */
	vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
	enable_fd(fd, vq);
	enable_fd(fd, vq, timeout);
}

/* When the Guest tells us they updated the status field, we handle it. */
@@ -1047,7 +1073,7 @@ static void handle_output(int fd, unsigned long addr)
			if (strcmp(vq->dev->name, "console") != 0)
				verbose("Output to %s\n", vq->dev->name);
			if (vq->handle_output)
				vq->handle_output(fd, vq);
				vq->handle_output(fd, vq, false);
			return;
		}
	}
@@ -1061,6 +1087,29 @@ static void handle_output(int fd, unsigned long addr)
	      strnlen(from_guest_phys(addr), guest_limit - addr));
}

static void handle_timeout(int fd)
{
	char buf[32];
	struct device *i;
	struct virtqueue *vq;

	/* Clear the pipe */
	read(timeoutpipe[0], buf, sizeof(buf));

	/* Check each device and virtqueue: flush blocked ones. */
	for (i = devices.dev; i; i = i->next) {
		for (vq = i->vq; vq; vq = vq->next) {
			if (!vq->blocked)
				continue;

			vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
			vq->blocked = false;
			if (vq->handle_output)
				vq->handle_output(fd, vq, true);
		}
	}
}

/* This is called when the Waker wakes us up: check for incoming file
 * descriptors. */
static void handle_input(int fd)
@@ -1071,9 +1120,14 @@ static void handle_input(int fd)
	for (;;) {
		struct device *i;
		fd_set fds = devices.infds;
		int num;

		num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
		/* Could get interrupted */
		if (num < 0)
			continue;
		/* If nothing is ready, we're done. */
		if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0)
		if (num == 0)
			break;

		/* Otherwise, call the device(s) which have readable file
@@ -1097,6 +1151,10 @@ static void handle_input(int fd)
				write(waker_fd, &dev_fd, sizeof(dev_fd));
			}
		}

		/* Is this the timeout fd? */
		if (FD_ISSET(timeoutpipe[0], &fds))
			handle_timeout(fd);
	}
}

@@ -1145,7 +1203,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
/* Each device descriptor is followed by the description of its virtqueues.  We
 * specify how many descriptors the virtqueue is to have. */
static void add_virtqueue(struct device *dev, unsigned int num_descs,
			  void (*handle_output)(int fd, struct virtqueue *me))
			  void (*handle_output)(int, struct virtqueue *, bool))
{
	unsigned int pages;
	struct virtqueue **i, *vq = malloc(sizeof(*vq));
@@ -1161,6 +1219,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
	vq->last_avail_idx = 0;
	vq->dev = dev;
	vq->inflight = 0;
	vq->blocked = false;

	/* Initialize the configuration. */
	vq->config.num = num_descs;
@@ -1293,6 +1352,24 @@ static void setup_console(void)
}
/*:*/

static void timeout_alarm(int sig)
{
	write(timeoutpipe[1], "", 1);
}

static void setup_timeout(void)
{
	if (pipe(timeoutpipe) != 0)
		err(1, "Creating timeout pipe");

	if (fcntl(timeoutpipe[1], F_SETFL,
		  fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
		err(1, "Making timeout pipe nonblocking");

	add_device_fd(timeoutpipe[0]);
	signal(SIGALRM, timeout_alarm);
}

/*M:010 Inter-guest networking is an interesting area.  Simplest is to have a
 * --sharenet=<name> option which opens or creates a named pipe.  This can be
 * used to send packets to another guest in a 1:1 manner.
@@ -1653,7 +1730,7 @@ static bool handle_io_finish(int fd, struct device *dev)
}

/* When the Guest submits some I/O, we just need to wake the I/O thread. */
static void handle_virtblk_output(int fd, struct virtqueue *vq)
static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
{
	struct vblk_info *vblk = vq->dev->priv;
	char c = 0;
@@ -1824,7 +1901,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
		/* ERESTART means that we need to reboot the guest */
		} else if (errno == ERESTART) {
			restart_guest();
		/* EAGAIN means the Waker wanted us to look at some input.
		/* EAGAIN means a signal (timeout).
		 * Anything else means a bug or incompatible change. */
		} else if (errno != EAGAIN)
			err(1, "Running guest failed");
@@ -1948,6 +2025,9 @@ int main(int argc, char *argv[])
	/* We always have a console device */
	setup_console();

	/* We can timeout waiting for Guest network transmit. */
	setup_timeout();

	/* Now we load the kernel */
	start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));