Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 481eaec3 authored by Michael S. Tsirkin's avatar Michael S. Tsirkin
Browse files

tools/virtio: add ringtest utilities



This adds micro-benchmarks useful for tuning virtio ring layouts.
Three layouts are currently implemented:

- virtio 0.9 compatible one
- an experimental extension bypassing the ring index, polling ring
  itself instead
- an experimental extension bypassing avail and used ring completely

Typical use:

sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring

It doesn't depend on the kernel directly, but it's handy
to have as much virtio stuff as possible in one tree.

Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent fb9b050c
Loading
Loading
Loading
Loading
+22 −0
Original line number Diff line number Diff line
all:

all: ring virtio_ring_0_9 virtio_ring_poll

CFLAGS += -Wall
CFLAGS += -pthread -O2 -ggdb
LDFLAGS += -pthread -O2 -ggdb

main.o: main.c main.h
ring.o: ring.c main.h
virtio_ring_0_9.o: virtio_ring_0_9.c main.h
virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
ring: ring.o main.o
virtio_ring_0_9: virtio_ring_0_9.o main.o
virtio_ring_poll: virtio_ring_poll.o main.o
clean:
	-rm main.o
	-rm ring.o ring
	-rm virtio_ring_0_9.o virtio_ring_0_9
	-rm virtio_ring_poll.o virtio_ring_poll

.PHONY: all clean
+2 −0
Original line number Diff line number Diff line
Partial implementation of various ring layouts, useful to tune virtio design.
Uses shared memory heavily.
+366 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2016 Red Hat, Inc.
 * Author: Michael S. Tsirkin <mst@redhat.com>
 * This work is licensed under the terms of the GNU GPL, version 2.
 *
 * Command line processing and common functions for ring benchmarking.
 */
#define _GNU_SOURCE
#include <getopt.h>
#include <pthread.h>
#include <assert.h>
#include <sched.h>
#include "main.h"
#include <sys/eventfd.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <limits.h>

int runcycles = 10000000;
int max_outstanding = INT_MAX;
int batch = 1;

bool do_sleep = false;
bool do_relax = false;
bool do_exit = true;

unsigned ring_size = 256;

static int kickfd = -1;
static int callfd = -1;

void notify(int fd)
{
	unsigned long long v = 1;
	int r;

	vmexit();
	r = write(fd, &v, sizeof v);
	assert(r == sizeof v);
	vmentry();
}

void wait_for_notify(int fd)
{
	unsigned long long v = 1;
	int r;

	vmexit();
	r = read(fd, &v, sizeof v);
	assert(r == sizeof v);
	vmentry();
}

void kick(void)
{
	notify(kickfd);
}

void wait_for_kick(void)
{
	wait_for_notify(kickfd);
}

void call(void)
{
	notify(callfd);
}

void wait_for_call(void)
{
	wait_for_notify(callfd);
}

void set_affinity(const char *arg)
{
	cpu_set_t cpuset;
	int ret;
	pthread_t self;
	long int cpu;
	char *endptr;

	if (!arg)
		return;

	cpu = strtol(arg, &endptr, 0);
	assert(!*endptr);

	assert(cpu >= 0 || cpu < CPU_SETSIZE);

	self = pthread_self();
	CPU_ZERO(&cpuset);
	CPU_SET(cpu, &cpuset);

	ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
	assert(!ret);
}

static void run_guest(void)
{
	int completed_before;
	int completed = 0;
	int started = 0;
	int bufs = runcycles;
	int spurious = 0;
	int r;
	unsigned len;
	void *buf;
	int tokick = batch;

	for (;;) {
		if (do_sleep)
			disable_call();
		completed_before = completed;
		do {
			if (started < bufs &&
			    started - completed < max_outstanding) {
				r = add_inbuf(0, NULL, "Hello, world!");
				if (__builtin_expect(r == 0, true)) {
					++started;
					if (!--tokick) {
						tokick = batch;
						if (do_sleep)
							kick_available();
					}

				}
			} else
				r = -1;

			/* Flush out completed bufs if any */
			if (get_buf(&len, &buf)) {
				++completed;
				if (__builtin_expect(completed == bufs, false))
					return;
				r = 0;
			}
		} while (r == 0);
		if (completed == completed_before)
			++spurious;
		assert(completed <= bufs);
		assert(started <= bufs);
		if (do_sleep) {
			if (enable_call())
				wait_for_call();
		} else {
			poll_used();
		}
	}
}

static void run_host(void)
{
	int completed_before;
	int completed = 0;
	int spurious = 0;
	int bufs = runcycles;
	unsigned len;
	void *buf;

	for (;;) {
		if (do_sleep) {
			if (enable_kick())
				wait_for_kick();
		} else {
			poll_avail();
		}
		if (do_sleep)
			disable_kick();
		completed_before = completed;
		while (__builtin_expect(use_buf(&len, &buf), true)) {
			if (do_sleep)
				call_used();
			++completed;
			if (__builtin_expect(completed == bufs, false))
				return;
		}
		if (completed == completed_before)
			++spurious;
		assert(completed <= bufs);
		if (completed == bufs)
			break;
	}
}

void *start_guest(void *arg)
{
	set_affinity(arg);
	run_guest();
	pthread_exit(NULL);
}

void *start_host(void *arg)
{
	set_affinity(arg);
	run_host();
	pthread_exit(NULL);
}

static const char optstring[] = "";
static const struct option longopts[] = {
	{
		.name = "help",
		.has_arg = no_argument,
		.val = 'h',
	},
	{
		.name = "host-affinity",
		.has_arg = required_argument,
		.val = 'H',
	},
	{
		.name = "guest-affinity",
		.has_arg = required_argument,
		.val = 'G',
	},
	{
		.name = "ring-size",
		.has_arg = required_argument,
		.val = 'R',
	},
	{
		.name = "run-cycles",
		.has_arg = required_argument,
		.val = 'C',
	},
	{
		.name = "outstanding",
		.has_arg = required_argument,
		.val = 'o',
	},
	{
		.name = "batch",
		.has_arg = required_argument,
		.val = 'b',
	},
	{
		.name = "sleep",
		.has_arg = no_argument,
		.val = 's',
	},
	{
		.name = "relax",
		.has_arg = no_argument,
		.val = 'x',
	},
	{
		.name = "exit",
		.has_arg = no_argument,
		.val = 'e',
	},
	{
	}
};

static void help(void)
{
	fprintf(stderr, "Usage: <test> [--help]"
		" [--host-affinity H]"
		" [--guest-affinity G]"
		" [--ring-size R (default: %d)]"
		" [--run-cycles C (default: %d)]"
		" [--batch b]"
		" [--outstanding o]"
		" [--sleep]"
		" [--relax]"
		" [--exit]"
		"\n",
		ring_size,
		runcycles);
}

int main(int argc, char **argv)
{
	int ret;
	pthread_t host, guest;
	void *tret;
	char *host_arg = NULL;
	char *guest_arg = NULL;
	char *endptr;
	long int c;

	kickfd = eventfd(0, 0);
	assert(kickfd >= 0);
	callfd = eventfd(0, 0);
	assert(callfd >= 0);

	for (;;) {
		int o = getopt_long(argc, argv, optstring, longopts, NULL);
		switch (o) {
		case -1:
			goto done;
		case '?':
			help();
			exit(2);
		case 'H':
			host_arg = optarg;
			break;
		case 'G':
			guest_arg = optarg;
			break;
		case 'R':
			ring_size = strtol(optarg, &endptr, 0);
			assert(ring_size && !(ring_size & (ring_size - 1)));
			assert(!*endptr);
			break;
		case 'C':
			c = strtol(optarg, &endptr, 0);
			assert(!*endptr);
			assert(c > 0 && c < INT_MAX);
			runcycles = c;
			break;
		case 'o':
			c = strtol(optarg, &endptr, 0);
			assert(!*endptr);
			assert(c > 0 && c < INT_MAX);
			max_outstanding = c;
			break;
		case 'b':
			c = strtol(optarg, &endptr, 0);
			assert(!*endptr);
			assert(c > 0 && c < INT_MAX);
			batch = c;
			break;
		case 's':
			do_sleep = true;
			break;
		case 'x':
			do_relax = true;
			break;
		case 'e':
			do_exit = true;
			break;
		default:
			help();
			exit(4);
			break;
		}
	}

	/* does nothing here, used to make sure all smp APIs compile */
	smp_acquire();
	smp_release();
	smp_mb();
done:

	if (batch > max_outstanding)
		batch = max_outstanding;

	if (optind < argc) {
		help();
		exit(4);
	}
	alloc_ring();

	ret = pthread_create(&host, NULL, start_host, host_arg);
	assert(!ret);
	ret = pthread_create(&guest, NULL, start_guest, guest_arg);
	assert(!ret);

	ret = pthread_join(guest, &tret);
	assert(!ret);
	ret = pthread_join(host, &tret);
	assert(!ret);
	return 0;
}
+119 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2016 Red Hat, Inc.
 * Author: Michael S. Tsirkin <mst@redhat.com>
 * This work is licensed under the terms of the GNU GPL, version 2.
 *
 * Common macros and functions for ring benchmarking.
 */
#ifndef MAIN_H
#define MAIN_H

#include <stdbool.h>

extern bool do_exit;

#if defined(__x86_64__) || defined(__i386__)
#include "x86intrin.h"

static inline void wait_cycles(unsigned long long cycles)
{
	unsigned long long t;

	t = __rdtsc();
	while (__rdtsc() - t < cycles) {}
}

#define VMEXIT_CYCLES 500
#define VMENTRY_CYCLES 500

#else
static inline void wait_cycles(unsigned long long cycles)
{
	_Exit(5);
}
#define VMEXIT_CYCLES 0
#define VMENTRY_CYCLES 0
#endif

static inline void vmexit(void)
{
	if (!do_exit)
		return;
	
	wait_cycles(VMEXIT_CYCLES);
}
static inline void vmentry(void)
{
	if (!do_exit)
		return;
	
	wait_cycles(VMENTRY_CYCLES);
}

/* implemented by ring */
void alloc_ring(void);
/* guest side */
int add_inbuf(unsigned, void *, void *);
void *get_buf(unsigned *, void **);
void disable_call();
bool enable_call();
void kick_available();
void poll_used();
/* host side */
void disable_kick();
bool enable_kick();
bool use_buf(unsigned *, void **);
void call_used();
void poll_avail();

/* implemented by main */
extern bool do_sleep;
void kick(void);
void wait_for_kick(void);
void call(void);
void wait_for_call(void);

extern unsigned ring_size;

/* Compiler barrier - similar to what Linux uses */
#define barrier() asm volatile("" ::: "memory")

/* Is there a portable way to do this? */
#if defined(__x86_64__) || defined(__i386__)
#define cpu_relax() asm ("rep; nop" ::: "memory")
#else
#define cpu_relax() assert(0)
#endif

extern bool do_relax;

static inline void busy_wait(void)
{
	if (do_relax)
		cpu_relax();
	else
		/* prevent compiler from removing busy loops */
		barrier();
} 

/*
 * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
 * with other __ATOMIC_SEQ_CST calls.
 */
#define smp_mb() __sync_synchronize()

/*
 * This abuses the atomic builtins for thread fences, and
 * adds a compiler barrier.
 */
#define smp_release() do { \
    barrier(); \
    __atomic_thread_fence(__ATOMIC_RELEASE); \
} while (0)

#define smp_acquire() do { \
    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
    barrier(); \
} while (0)

#endif
+272 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2016 Red Hat, Inc.
 * Author: Michael S. Tsirkin <mst@redhat.com>
 * This work is licensed under the terms of the GNU GPL, version 2.
 *
 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
 * signalling, unconditionally.
 */
#define _GNU_SOURCE
#include "main.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

/* Next - Where next entry will be written.
 * Prev - "Next" value when event triggered previously.
 * Event - Peer requested event after writing this entry.
 */
static inline bool need_event(unsigned short event,
			      unsigned short next,
			      unsigned short prev)
{
	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
}

/* Design:
 * Guest adds descriptors with unique index values and DESC_HW in flags.
 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
 * Flags are always set last.
 */
#define DESC_HW 0x1

struct desc {
	unsigned short flags;
	unsigned short index;
	unsigned len;
	unsigned long long addr;
};

/* how much padding is needed to avoid false cache sharing */
#define HOST_GUEST_PADDING 0x80

/* Mostly read */
struct event {
	unsigned short kick_index;
	unsigned char reserved0[HOST_GUEST_PADDING - 2];
	unsigned short call_index;
	unsigned char reserved1[HOST_GUEST_PADDING - 2];
};

struct data {
	void *buf; /* descriptor is writeable, we can't get buf from there */
	void *data;
} *data;

struct desc *ring;
struct event *event;

struct guest {
	unsigned avail_idx;
	unsigned last_used_idx;
	unsigned num_free;
	unsigned kicked_avail_idx;
	unsigned char reserved[HOST_GUEST_PADDING - 12];
} guest;

struct host {
	/* we do not need to track last avail index
	 * unless we have more than one in flight.
	 */
	unsigned used_idx;
	unsigned called_used_idx;
	unsigned char reserved[HOST_GUEST_PADDING - 4];
} host;

/* implemented by ring */
void alloc_ring(void)
{
	int ret;
	int i;

	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
	if (ret) {
		perror("Unable to allocate ring buffer.\n");
		exit(3);
	}
	event = malloc(sizeof *event);
	if (!event) {
		perror("Unable to allocate event buffer.\n");
		exit(3);
	}
	memset(event, 0, sizeof *event);
	guest.avail_idx = 0;
	guest.kicked_avail_idx = -1;
	guest.last_used_idx = 0;
	host.used_idx = 0;
	host.called_used_idx = -1;
	for (i = 0; i < ring_size; ++i) {
		struct desc desc = {
			.index = i,
		};
		ring[i] = desc;
	}
	guest.num_free = ring_size;
	data = malloc(ring_size * sizeof *data);
	if (!data) {
		perror("Unable to allocate data buffer.\n");
		exit(3);
	}
	memset(data, 0, ring_size * sizeof *data);
}

/* guest side */
int add_inbuf(unsigned len, void *buf, void *datap)
{
	unsigned head, index;

	if (!guest.num_free)
		return -1;

	guest.num_free--;
	head = (ring_size - 1) & (guest.avail_idx++);

	/* Start with a write. On MESI architectures this helps
	 * avoid a shared state with consumer that is polling this descriptor.
	 */
	ring[head].addr = (unsigned long)(void*)buf;
	ring[head].len = len;
	/* read below might bypass write above. That is OK because it's just an
	 * optimization. If this happens, we will get the cache line in a
	 * shared state which is unfortunate, but probably not worth it to
	 * add an explicit full barrier to avoid this.
	 */
	barrier();
	index = ring[head].index;
	data[index].buf = buf;
	data[index].data = datap;
	/* Barrier A (for pairing) */
	smp_release();
	ring[head].flags = DESC_HW;

	return 0;
}

void *get_buf(unsigned *lenp, void **bufp)
{
	unsigned head = (ring_size - 1) & guest.last_used_idx;
	unsigned index;
	void *datap;

	if (ring[head].flags & DESC_HW)
		return NULL;
	/* Barrier B (for pairing) */
	smp_acquire();
	*lenp = ring[head].len;
	index = ring[head].index & (ring_size - 1);
	datap = data[index].data;
	*bufp = data[index].buf;
	data[index].buf = NULL;
	data[index].data = NULL;
	guest.num_free++;
	guest.last_used_idx++;
	return datap;
}

void poll_used(void)
{
	unsigned head = (ring_size - 1) & guest.last_used_idx;

	while (ring[head].flags & DESC_HW)
		busy_wait();
}

void disable_call()
{
	/* Doing nothing to disable calls might cause
	 * extra interrupts, but reduces the number of cache misses.
	 */
}

bool enable_call()
{
	unsigned head = (ring_size - 1) & guest.last_used_idx;

	event->call_index = guest.last_used_idx;
	/* Flush call index write */
	/* Barrier D (for pairing) */
	smp_mb();
	return ring[head].flags & DESC_HW;
}

void kick_available(void)
{
	/* Flush in previous flags write */
	/* Barrier C (for pairing) */
	smp_mb();
	if (!need_event(event->kick_index,
			guest.avail_idx,
			guest.kicked_avail_idx))
		return;

	guest.kicked_avail_idx = guest.avail_idx;
	kick();
}

/* host side */
void disable_kick()
{
	/* Doing nothing to disable kicks might cause
	 * extra interrupts, but reduces the number of cache misses.
	 */
}

bool enable_kick()
{
	unsigned head = (ring_size - 1) & host.used_idx;

	event->kick_index = host.used_idx;
	/* Barrier C (for pairing) */
	smp_mb();
	return !(ring[head].flags & DESC_HW);
}

void poll_avail(void)
{
	unsigned head = (ring_size - 1) & host.used_idx;

	while (!(ring[head].flags & DESC_HW))
		busy_wait();
}

bool use_buf(unsigned *lenp, void **bufp)
{
	unsigned head = (ring_size - 1) & host.used_idx;

	if (!(ring[head].flags & DESC_HW))
		return false;

	/* make sure length read below is not speculated */
	/* Barrier A (for pairing) */
	smp_acquire();

	/* simple in-order completion: we don't need
	 * to touch index at all. This also means we
	 * can just modify the descriptor in-place.
	 */
	ring[head].len--;
	/* Make sure len is valid before flags.
	 * Note: alternative is to write len and flags in one access -
	 * possible on 64 bit architectures but wmb is free on Intel anyway
	 * so I have no way to test whether it's a gain.
	 */
	/* Barrier B (for pairing) */
	smp_release();
	ring[head].flags = 0;
	host.used_idx++;
	return true;
}

void call_used(void)
{
	/* Flush in previous flags write */
	/* Barrier D (for pairing) */
	smp_mb();
	if (!need_event(event->call_index,
			host.used_idx,
			host.called_used_idx))
		return;

	host.called_used_idx = host.used_idx;
	call();
}
Loading