Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6e06c0e2 authored by David S. Miller's avatar David S. Miller
Browse files


Michael S. Tsirkin says:

--------------------
There are mostly bugfixes here.
I hope to merge some more patches by 3.5, in particular
vlan support fixes are waiting for Eric's ack,
and a version of tracepoint patch might be
ready in time, but let's merge what's ready so it's testable.

This includes a ton of zerocopy fixes by Jason -
good stuff but too intrusive for 3.4 and zerocopy is experimental
anyway.

virtio supported delayed interrupt for a while now
so adding support to the virtio tool made sense
--------------------

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3a084ddb c70aa540
Loading
Loading
Loading
Loading
+40 −17
Original line number Diff line number Diff line
@@ -505,10 +505,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
		if (copy > size) {
			++from;
			--count;
		}
			offset = 0;
		} else
			offset += size;
		copy -= size;
		offset1 += size;
		offset = 0;
	}

	if (len == offset1)
@@ -518,24 +519,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
		struct page *page[MAX_SKB_FRAGS];
		int num_pages;
		unsigned long base;
		unsigned long truesize;

		len = from->iov_len - offset1;
		len = from->iov_len - offset;
		if (!len) {
			offset1 = 0;
			offset = 0;
			++from;
			continue;
		}
		base = (unsigned long)from->iov_base + offset1;
		base = (unsigned long)from->iov_base + offset;
		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
		if (i + size > MAX_SKB_FRAGS)
			return -EMSGSIZE;
		num_pages = get_user_pages_fast(base, size, 0, &page[i]);
		if ((num_pages != size) ||
		    (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
			/* put_page is in skb free */
		if (num_pages != size) {
			for (i = 0; i < num_pages; i++)
				put_page(page[i]);
			return -EFAULT;
		}
		truesize = size * PAGE_SIZE;
		skb->data_len += len;
		skb->len += len;
		skb->truesize += len;
		atomic_add(len, &skb->sk->sk_wmem_alloc);
		skb->truesize += truesize;
		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
		while (len) {
			int off = base & ~PAGE_MASK;
			int size = min_t(int, len, PAGE_SIZE - off);
@@ -546,7 +552,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
			len -= size;
			i++;
		}
		offset1 = 0;
		offset = 0;
		++from;
	}
	return 0;
@@ -646,7 +652,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
	int err;
	struct virtio_net_hdr vnet_hdr = { 0 };
	int vnet_hdr_len = 0;
	int copylen;
	int copylen = 0;
	bool zerocopy = false;

	if (q->flags & IFF_VNET_HDR) {
@@ -675,14 +681,30 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
	if (unlikely(len < ETH_HLEN))
		goto err;

	err = -EMSGSIZE;
	if (unlikely(count > UIO_MAXIOV))
		goto err;

	if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
		zerocopy = true;

	if (zerocopy) {
		/* Userspace may produce vectors with count greater than
		 * MAX_SKB_FRAGS, so we need to linearize parts of the skb
		 * to let the rest of data to be fit in the frags.
		 */
		if (count > MAX_SKB_FRAGS) {
			copylen = iov_length(iv, count - MAX_SKB_FRAGS);
			if (copylen < vnet_hdr_len)
				copylen = 0;
			else
				copylen -= vnet_hdr_len;
		}
		/* There are 256 bytes to be copied in skb, so there is enough
		 * room for skb expand head in case it is used.
		 * The rest buffer is mapped from userspace.
		 */
		if (copylen < vnet_hdr.hdr_len)
			copylen = vnet_hdr.hdr_len;
		if (!copylen)
			copylen = GOODCOPY_LEN;
@@ -694,10 +716,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
	if (!skb)
		goto err;

	if (zerocopy) {
	if (zerocopy)
		err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
	} else
	else
		err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
						   len);
	if (err)
@@ -716,8 +737,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
	rcu_read_lock_bh();
	vlan = rcu_dereference_bh(q->vlan);
	/* copy skb_ubuf_info for callback when skb has no error */
	if (zerocopy)
	if (zerocopy) {
		skb_shinfo(skb)->destructor_arg = m->msg_control;
		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
	}
	if (vlan)
		macvlan_start_xmit(skb, vlan->dev);
	else
+5 −2
Original line number Diff line number Diff line
@@ -166,7 +166,7 @@ static void handle_tx(struct vhost_net *net)
	if (wmem < sock->sk->sk_sndbuf / 2)
		tx_poll_stop(net);
	hdr_size = vq->vhost_hlen;
	zcopy = vhost_sock_zcopy(sock);
	zcopy = vq->ubufs;

	for (;;) {
		/* Release DMAs done buffers first */
@@ -257,6 +257,7 @@ static void handle_tx(struct vhost_net *net)
					UIO_MAXIOV;
			}
			vhost_discard_vq_desc(vq, 1);
			if (err == -EAGAIN || err == -ENOBUFS)
				tx_poll_start(net, sock);
			break;
		}
@@ -265,6 +266,8 @@ static void handle_tx(struct vhost_net *net)
				 " len %d != %zd\n", err, len);
		if (!zcopy)
			vhost_add_used_and_signal(&net->dev, vq, head, 0);
		else
			vhost_zerocopy_signal_used(vq);
		total_len += len;
		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
			vhost_poll_queue(&vq->poll);
+1 −0
Original line number Diff line number Diff line
@@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf_info *ubuf)
	struct vhost_ubuf_ref *ubufs = ubuf->ctx;
	struct vhost_virtqueue *vq = ubufs->vq;

	vhost_poll_queue(&vq->poll);
	/* set len = 1 to mark this desc buffers done DMA */
	vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
	kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
+1 −0
Original line number Diff line number Diff line
@@ -203,6 +203,7 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
void virtqueue_disable_cb(struct virtqueue *vq);

bool virtqueue_enable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb_delayed(struct virtqueue *vq);

void *virtqueue_detach_unused_buf(struct virtqueue *vq);
struct virtqueue *vring_new_virtqueue(unsigned int num,
+22 −4
Original line number Diff line number Diff line
@@ -144,7 +144,8 @@ static void wait_for_interrupt(struct vdev_info *dev)
		}
}

static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
static void run_test(struct vdev_info *dev, struct vq_info *vq,
		     bool delayed, int bufs)
{
	struct scatterlist sl;
	long started = 0, completed = 0;
@@ -183,7 +184,11 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
		assert(started <= bufs);
		if (completed == bufs)
			break;
		if (virtqueue_enable_cb(vq->vq)) {
		if (delayed) {
			if (virtqueue_enable_cb_delayed(vq->vq))
				wait_for_interrupt(dev);
		} else {
			if (virtqueue_enable_cb(vq->vq))
				wait_for_interrupt(dev);
		}
	}
@@ -215,6 +220,14 @@ const struct option longopts[] = {
		.name = "no-indirect",
		.val = 'i',
	},
	{
		.name = "delayed-interrupt",
		.val = 'D',
	},
	{
		.name = "no-delayed-interrupt",
		.val = 'd',
	},
	{
	}
};
@@ -224,6 +237,7 @@ static void help()
	fprintf(stderr, "Usage: virtio_test [--help]"
		" [--no-indirect]"
		" [--no-event-idx]"
		" [--delayed-interrupt]"
		"\n");
}

@@ -233,6 +247,7 @@ int main(int argc, char **argv)
	unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
		(1ULL << VIRTIO_RING_F_EVENT_IDX);
	int o;
	bool delayed = false;

	for (;;) {
		o = getopt_long(argc, argv, optstring, longopts, NULL);
@@ -251,6 +266,9 @@ int main(int argc, char **argv)
		case 'i':
			features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
			break;
		case 'D':
			delayed = true;
			break;
		default:
			assert(0);
			break;
@@ -260,6 +278,6 @@ int main(int argc, char **argv)
done:
	vdev_info_init(&dev, features);
	vq_info_add(&dev, 256);
	run_test(&dev, &dev.vqs[0], 0x100000);
	run_test(&dev, &dev.vqs[0], delayed, 0x100000);
	return 0;
}