Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b5b1de35 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull virtio/vhost updates from Michael Tsirkin:
 "Fixes and tweaks:

   - virtio balloon page hinting support

   - vhost scsi control queue

   - misc fixes"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  MAINTAINERS: remove reference to bogus vsock file
  vhost/scsi: Use common handling code in request queue handler
  vhost/scsi: Extract common handling code from control queue handler
  vhost/scsi: Respond to control queue operations
  vhost/scsi: truncate T10 PI iov_iter to prot_bytes
  virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON
  mm/page_poison: expose page_poisoning_enabled to kernel modules
  virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT
  kvm_config: add CONFIG_VIRTIO_MENU
parents 90de1fb8 79f800b2
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -15858,7 +15858,6 @@ F: net/vmw_vsock/virtio_transport_common.c
F:	net/vmw_vsock/virtio_transport.c
F:	drivers/net/vsockmon.c
F:	drivers/vhost/vsock.c
F:	drivers/vhost/vsock.h
F:	tools/testing/vsock/

VIRTIO CONSOLE DRIVER
+329 −97
Original line number Diff line number Diff line
@@ -203,6 +203,19 @@ struct vhost_scsi {
	int vs_events_nr; /* num of pending events, protected by vq->mutex */
};

/*
 * Context for processing request and control queue operations.
 */
struct vhost_scsi_ctx {
	int head;
	unsigned int out, in;
	size_t req_size, rsp_size;
	size_t out_size, in_size;
	u8 *target, *lunp;
	void *req;
	struct iov_iter out_iter;
};

static struct workqueue_struct *vhost_scsi_workqueue;

/* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
@@ -800,24 +813,120 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
		pr_err("Faulted on virtio_scsi_cmd_resp\n");
}

static int
vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
		    struct vhost_scsi_ctx *vc)
{
	int ret = -ENXIO;

	vc->head = vhost_get_vq_desc(vq, vq->iov,
				     ARRAY_SIZE(vq->iov), &vc->out, &vc->in,
				     NULL, NULL);

	pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
		 vc->head, vc->out, vc->in);

	/* On error, stop handling until the next kick. */
	if (unlikely(vc->head < 0))
		goto done;

	/* Nothing new?  Wait for eventfd to tell us they refilled. */
	if (vc->head == vq->num) {
		if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
			vhost_disable_notify(&vs->dev, vq);
			ret = -EAGAIN;
		}
		goto done;
	}

	/*
	 * Get the size of request and response buffers.
	 * FIXME: Not correct for BIDI operation
	 */
	vc->out_size = iov_length(vq->iov, vc->out);
	vc->in_size = iov_length(&vq->iov[vc->out], vc->in);

	/*
	 * Copy over the virtio-scsi request header, which for a
	 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
	 * single iovec may contain both the header + outgoing
	 * WRITE payloads.
	 *
	 * copy_from_iter() will advance out_iter, so that it will
	 * point at the start of the outgoing WRITE payload, if
	 * DMA_TO_DEVICE is set.
	 */
	iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size);
	ret = 0;

done:
	return ret;
}

static int
vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc)
{
	if (unlikely(vc->in_size < vc->rsp_size)) {
		vq_err(vq,
		       "Response buf too small, need min %zu bytes got %zu",
		       vc->rsp_size, vc->in_size);
		return -EINVAL;
	} else if (unlikely(vc->out_size < vc->req_size)) {
		vq_err(vq,
		       "Request buf too small, need min %zu bytes got %zu",
		       vc->req_size, vc->out_size);
		return -EIO;
	}

	return 0;
}

static int
vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
		   struct vhost_scsi_tpg **tpgp)
{
	int ret = -EIO;

	if (unlikely(!copy_from_iter_full(vc->req, vc->req_size,
					  &vc->out_iter))) {
		vq_err(vq, "Faulted on copy_from_iter\n");
	} else if (unlikely(*vc->lunp != 1)) {
		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
		vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp);
	} else {
		struct vhost_scsi_tpg **vs_tpg, *tpg;

		vs_tpg = vq->private_data;	/* validated at handler entry */

		tpg = READ_ONCE(vs_tpg[*vc->target]);
		if (unlikely(!tpg)) {
			vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
		} else {
			if (tpgp)
				*tpgp = tpg;
			ret = 0;
		}
	}

	return ret;
}

static void
vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
{
	struct vhost_scsi_tpg **vs_tpg, *tpg;
	struct virtio_scsi_cmd_req v_req;
	struct virtio_scsi_cmd_req_pi v_req_pi;
	struct vhost_scsi_ctx vc;
	struct vhost_scsi_cmd *cmd;
	struct iov_iter out_iter, in_iter, prot_iter, data_iter;
	struct iov_iter in_iter, prot_iter, data_iter;
	u64 tag;
	u32 exp_data_len, data_direction;
	unsigned int out = 0, in = 0;
	int head, ret, prot_bytes;
	size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
	size_t out_size, in_size;
	int ret, prot_bytes;
	u16 lun;
	u8 *target, *lunp, task_attr;
	u8 task_attr;
	bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI);
	void *req, *cdb;
	void *cdb;

	mutex_lock(&vq->mutex);
	/*
@@ -828,85 +937,47 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
	if (!vs_tpg)
		goto out;

	memset(&vc, 0, sizeof(vc));
	vc.rsp_size = sizeof(struct virtio_scsi_cmd_resp);

	vhost_disable_notify(&vs->dev, vq);

	for (;;) {
		head = vhost_get_vq_desc(vq, vq->iov,
					 ARRAY_SIZE(vq->iov), &out, &in,
					 NULL, NULL);
		pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
			 head, out, in);
		/* On error, stop handling until the next kick. */
		if (unlikely(head < 0))
			break;
		/* Nothing new?  Wait for eventfd to tell us they refilled. */
		if (head == vq->num) {
			if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
				vhost_disable_notify(&vs->dev, vq);
				continue;
			}
			break;
		}
		/*
		 * Check for a sane response buffer so we can report early
		 * errors back to the guest.
		 */
		if (unlikely(vq->iov[out].iov_len < rsp_size)) {
			vq_err(vq, "Expecting at least virtio_scsi_cmd_resp"
				" size, got %zu bytes\n", vq->iov[out].iov_len);
			break;
		}
		ret = vhost_scsi_get_desc(vs, vq, &vc);
		if (ret)
			goto err;

		/*
		 * Setup pointers and values based upon different virtio-scsi
		 * request header if T10_PI is enabled in KVM guest.
		 */
		if (t10_pi) {
			req = &v_req_pi;
			req_size = sizeof(v_req_pi);
			lunp = &v_req_pi.lun[0];
			target = &v_req_pi.lun[1];
			vc.req = &v_req_pi;
			vc.req_size = sizeof(v_req_pi);
			vc.lunp = &v_req_pi.lun[0];
			vc.target = &v_req_pi.lun[1];
		} else {
			req = &v_req;
			req_size = sizeof(v_req);
			lunp = &v_req.lun[0];
			target = &v_req.lun[1];
			vc.req = &v_req;
			vc.req_size = sizeof(v_req);
			vc.lunp = &v_req.lun[0];
			vc.target = &v_req.lun[1];
		}
		/*
		 * FIXME: Not correct for BIDI operation
		 */
		out_size = iov_length(vq->iov, out);
		in_size = iov_length(&vq->iov[out], in);

		/*
		 * Copy over the virtio-scsi request header, which for a
		 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
		 * single iovec may contain both the header + outgoing
		 * WRITE payloads.
		 *
		 * copy_from_iter() will advance out_iter, so that it will
		 * point at the start of the outgoing WRITE payload, if
		 * DMA_TO_DEVICE is set.
		 * Validate the size of request and response buffers.
		 * Check for a sane response buffer so we can report
		 * early errors back to the guest.
		 */
		iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size);
		ret = vhost_scsi_chk_size(vq, &vc);
		if (ret)
			goto err;

		if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) {
			vq_err(vq, "Faulted on copy_from_iter\n");
			vhost_scsi_send_bad_target(vs, vq, head, out);
			continue;
		}
		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
		if (unlikely(*lunp != 1)) {
			vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp);
			vhost_scsi_send_bad_target(vs, vq, head, out);
			continue;
		}
		ret = vhost_scsi_get_req(vq, &vc, &tpg);
		if (ret)
			goto err;

		ret = -EIO;	/* bad target on any error from here on */

		tpg = READ_ONCE(vs_tpg[*target]);
		if (unlikely(!tpg)) {
			/* Target does not exist, fail the request */
			vhost_scsi_send_bad_target(vs, vq, head, out);
			continue;
		}
		/*
		 * Determine data_direction by calculating the total outgoing
		 * iovec sizes + incoming iovec sizes vs. virtio-scsi request +
@@ -924,17 +995,17 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
		 */
		prot_bytes = 0;

		if (out_size > req_size) {
		if (vc.out_size > vc.req_size) {
			data_direction = DMA_TO_DEVICE;
			exp_data_len = out_size - req_size;
			data_iter = out_iter;
		} else if (in_size > rsp_size) {
			exp_data_len = vc.out_size - vc.req_size;
			data_iter = vc.out_iter;
		} else if (vc.in_size > vc.rsp_size) {
			data_direction = DMA_FROM_DEVICE;
			exp_data_len = in_size - rsp_size;
			exp_data_len = vc.in_size - vc.rsp_size;

			iov_iter_init(&in_iter, READ, &vq->iov[out], in,
				      rsp_size + exp_data_len);
			iov_iter_advance(&in_iter, rsp_size);
			iov_iter_init(&in_iter, READ, &vq->iov[vc.out], vc.in,
				      vc.rsp_size + exp_data_len);
			iov_iter_advance(&in_iter, vc.rsp_size);
			data_iter = in_iter;
		} else {
			data_direction = DMA_NONE;
@@ -950,21 +1021,20 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
				if (data_direction != DMA_TO_DEVICE) {
					vq_err(vq, "Received non zero pi_bytesout,"
						" but wrong data_direction\n");
					vhost_scsi_send_bad_target(vs, vq, head, out);
					continue;
					goto err;
				}
				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout);
			} else if (v_req_pi.pi_bytesin) {
				if (data_direction != DMA_FROM_DEVICE) {
					vq_err(vq, "Received non zero pi_bytesin,"
						" but wrong data_direction\n");
					vhost_scsi_send_bad_target(vs, vq, head, out);
					continue;
					goto err;
				}
				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin);
			}
			/*
			 * Set prot_iter to data_iter, and advance past any
			 * Set prot_iter to data_iter and truncate it to
			 * prot_bytes, and advance data_iter past any
			 * preceeding prot_bytes that may be present.
			 *
			 * Also fix up the exp_data_len to reflect only the
@@ -973,6 +1043,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
			if (prot_bytes) {
				exp_data_len -= prot_bytes;
				prot_iter = data_iter;
				iov_iter_truncate(&prot_iter, prot_bytes);
				iov_iter_advance(&data_iter, prot_bytes);
			}
			tag = vhost64_to_cpu(vq, v_req_pi.tag);
@@ -996,8 +1067,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
			vq_err(vq, "Received SCSI CDB with command_size: %d that"
				" exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
				scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE);
			vhost_scsi_send_bad_target(vs, vq, head, out);
			continue;
				goto err;
		}
		cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
					 exp_data_len + prot_bytes,
@@ -1005,13 +1075,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
		if (IS_ERR(cmd)) {
			vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
			       PTR_ERR(cmd));
			vhost_scsi_send_bad_target(vs, vq, head, out);
			continue;
			goto err;
		}
		cmd->tvc_vhost = vs;
		cmd->tvc_vq = vq;
		cmd->tvc_resp_iov = vq->iov[out];
		cmd->tvc_in_iovs = in;
		cmd->tvc_resp_iov = vq->iov[vc.out];
		cmd->tvc_in_iovs = vc.in;

		pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
			 cmd->tvc_cdb[0], cmd->tvc_lun);
@@ -1019,14 +1088,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
			 " %d\n", cmd, exp_data_len, prot_bytes, data_direction);

		if (data_direction != DMA_NONE) {
			ret = vhost_scsi_mapal(cmd,
					       prot_bytes, &prot_iter,
					       exp_data_len, &data_iter);
			if (unlikely(ret)) {
			if (unlikely(vhost_scsi_mapal(cmd, prot_bytes,
						      &prot_iter, exp_data_len,
						      &data_iter))) {
				vq_err(vq, "Failed to map iov to sgl\n");
				vhost_scsi_release_cmd(&cmd->tvc_se_cmd);
				vhost_scsi_send_bad_target(vs, vq, head, out);
				continue;
				goto err;
			}
		}
		/*
@@ -1034,7 +1101,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
		 * complete the virtio-scsi request in TCM callback context via
		 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
		 */
		cmd->tvc_vq_desc = head;
		cmd->tvc_vq_desc = vc.head;
		/*
		 * Dispatch cmd descriptor for cmwq execution in process
		 * context provided by vhost_scsi_workqueue.  This also ensures
@@ -1043,6 +1110,166 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
		 */
		INIT_WORK(&cmd->work, vhost_scsi_submission_work);
		queue_work(vhost_scsi_workqueue, &cmd->work);
		ret = 0;
err:
		/*
		 * ENXIO:  No more requests, or read error, wait for next kick
		 * EINVAL: Invalid response buffer, drop the request
		 * EIO:    Respond with bad target
		 * EAGAIN: Pending request
		 */
		if (ret == -ENXIO)
			break;
		else if (ret == -EIO)
			vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
	}
out:
	mutex_unlock(&vq->mutex);
}

static void
vhost_scsi_send_tmf_reject(struct vhost_scsi *vs,
			   struct vhost_virtqueue *vq,
			   struct vhost_scsi_ctx *vc)
{
	struct virtio_scsi_ctrl_tmf_resp __user *resp;
	struct virtio_scsi_ctrl_tmf_resp rsp;
	int ret;

	pr_debug("%s\n", __func__);
	memset(&rsp, 0, sizeof(rsp));
	rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
	resp = vq->iov[vc->out].iov_base;
	ret = __copy_to_user(resp, &rsp, sizeof(rsp));
	if (!ret)
		vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
	else
		pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n");
}

static void
vhost_scsi_send_an_resp(struct vhost_scsi *vs,
			struct vhost_virtqueue *vq,
			struct vhost_scsi_ctx *vc)
{
	struct virtio_scsi_ctrl_an_resp __user *resp;
	struct virtio_scsi_ctrl_an_resp rsp;
	int ret;

	pr_debug("%s\n", __func__);
	memset(&rsp, 0, sizeof(rsp));	/* event_actual = 0 */
	rsp.response = VIRTIO_SCSI_S_OK;
	resp = vq->iov[vc->out].iov_base;
	ret = __copy_to_user(resp, &rsp, sizeof(rsp));
	if (!ret)
		vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
	else
		pr_err("Faulted on virtio_scsi_ctrl_an_resp\n");
}

static void
vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
{
	union {
		__virtio32 type;
		struct virtio_scsi_ctrl_an_req an;
		struct virtio_scsi_ctrl_tmf_req tmf;
	} v_req;
	struct vhost_scsi_ctx vc;
	size_t typ_size;
	int ret;

	mutex_lock(&vq->mutex);
	/*
	 * We can handle the vq only after the endpoint is setup by calling the
	 * VHOST_SCSI_SET_ENDPOINT ioctl.
	 */
	if (!vq->private_data)
		goto out;

	memset(&vc, 0, sizeof(vc));

	vhost_disable_notify(&vs->dev, vq);

	for (;;) {
		ret = vhost_scsi_get_desc(vs, vq, &vc);
		if (ret)
			goto err;

		/*
		 * Get the request type first in order to setup
		 * other parameters dependent on the type.
		 */
		vc.req = &v_req.type;
		typ_size = sizeof(v_req.type);

		if (unlikely(!copy_from_iter_full(vc.req, typ_size,
						  &vc.out_iter))) {
			vq_err(vq, "Faulted on copy_from_iter tmf type\n");
			/*
			 * The size of the response buffer depends on the
			 * request type and must be validated against it.
			 * Since the request type is not known, don't send
			 * a response.
			 */
			continue;
		}

		switch (v_req.type) {
		case VIRTIO_SCSI_T_TMF:
			vc.req = &v_req.tmf;
			vc.req_size = sizeof(struct virtio_scsi_ctrl_tmf_req);
			vc.rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp);
			vc.lunp = &v_req.tmf.lun[0];
			vc.target = &v_req.tmf.lun[1];
			break;
		case VIRTIO_SCSI_T_AN_QUERY:
		case VIRTIO_SCSI_T_AN_SUBSCRIBE:
			vc.req = &v_req.an;
			vc.req_size = sizeof(struct virtio_scsi_ctrl_an_req);
			vc.rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp);
			vc.lunp = &v_req.an.lun[0];
			vc.target = NULL;
			break;
		default:
			vq_err(vq, "Unknown control request %d", v_req.type);
			continue;
		}

		/*
		 * Validate the size of request and response buffers.
		 * Check for a sane response buffer so we can report
		 * early errors back to the guest.
		 */
		ret = vhost_scsi_chk_size(vq, &vc);
		if (ret)
			goto err;

		/*
		 * Get the rest of the request now that its size is known.
		 */
		vc.req += typ_size;
		vc.req_size -= typ_size;

		ret = vhost_scsi_get_req(vq, &vc, NULL);
		if (ret)
			goto err;

		if (v_req.type == VIRTIO_SCSI_T_TMF)
			vhost_scsi_send_tmf_reject(vs, vq, &vc);
		else
			vhost_scsi_send_an_resp(vs, vq, &vc);
err:
		/*
		 * ENXIO:  No more requests, or read error, wait for next kick
		 * EINVAL: Invalid response buffer, drop the request
		 * EIO:    Respond with bad target
		 * EAGAIN: Pending request
		 */
		if (ret == -ENXIO)
			break;
		else if (ret == -EIO)
			vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
	}
out:
	mutex_unlock(&vq->mutex);
@@ -1050,7 +1277,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)

static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
{
	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
						poll.work);
	struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);

	pr_debug("%s: The handling func for control queue.\n", __func__);
	vhost_scsi_ctl_handle_vq(vs, vq);
}

static void
+341 −33

File changed.

Preview size limit exceeded, changes collapsed.

+8 −0
Original line number Diff line number Diff line
@@ -34,15 +34,23 @@
#define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
#define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
#define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
#define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */

/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12

#define VIRTIO_BALLOON_CMD_ID_STOP	0
#define VIRTIO_BALLOON_CMD_ID_DONE	1
struct virtio_balloon_config {
	/* Number of pages host wants Guest to give up. */
	__u32 num_pages;
	/* Number of pages we've actually got in balloon. */
	__u32 actual;
	/* Free page report command id, readonly by guest */
	__u32 free_page_report_cmd_id;
	/* Stores PAGE_POISON if page poisoning is in use */
	__u32 poison_val;
};

#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
+1 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ CONFIG_PARAVIRT=y
CONFIG_KVM_GUEST=y
CONFIG_S390_GUEST=y
CONFIG_VIRTIO=y
CONFIG_VIRTIO_MENU=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BLK=y
CONFIG_VIRTIO_CONSOLE=y
Loading