Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b8d8bdfe authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'stable/for-jens-3.2' of git://oss.oracle.com/git/kwilk/xen into for-3.2/drivers

parents 4c823cc3 6927d920
Loading
Loading
Loading
Loading
+109 −21
Original line number Diff line number Diff line
@@ -39,6 +39,9 @@
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/loop.h>
#include <linux/falloc.h>
#include <linux/fs.h>

#include <xen/events.h>
#include <xen/page.h>
@@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)

static void print_stats(struct xen_blkif *blkif)
{
	pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
	pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
		 "  |  ds %4d\n",
		 current->comm, blkif->st_oo_req,
		 blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
		 blkif->st_rd_req, blkif->st_wr_req,
		 blkif->st_f_req, blkif->st_ds_req);
	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
	blkif->st_rd_req = 0;
	blkif->st_wr_req = 0;
	blkif->st_oo_req = 0;
	blkif->st_ds_req = 0;
}

int xen_blkif_schedule(void *arg)
@@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req,
	return ret;
}

static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
{
	int err = 0;
	int status = BLKIF_RSP_OKAY;
	struct block_device *bdev = blkif->vbd.bdev;

	if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
		/* just forward the discard request */
		err = blkdev_issue_discard(bdev,
				req->u.discard.sector_number,
				req->u.discard.nr_sectors,
				GFP_KERNEL, 0);
	else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
		/* punch a hole in the backing file */
		struct loop_device *lo = bdev->bd_disk->private_data;
		struct file *file = lo->lo_backing_file;

		if (file->f_op->fallocate)
			err = file->f_op->fallocate(file,
				FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
				req->u.discard.sector_number << 9,
				req->u.discard.nr_sectors << 9);
		else
			err = -EOPNOTSUPP;
	} else
		err = -EOPNOTSUPP;

	if (err == -EOPNOTSUPP) {
		pr_debug(DRV_PFX "discard op failed, not supported\n");
		status = BLKIF_RSP_EOPNOTSUPP;
	} else if (err)
		status = BLKIF_RSP_ERROR;

	make_response(blkif, req->id, req->operation, status);
}

static void xen_blk_drain_io(struct xen_blkif *blkif)
{
	atomic_set(&blkif->drain, 1);
	do {
		/* The initial value is one, and one refcnt taken at the
		 * start of the xen_blkif_schedule thread. */
		if (atomic_read(&blkif->refcnt) <= 2)
			break;
		wait_for_completion_interruptible_timeout(
				&blkif->drain_complete, HZ);

		if (!atomic_read(&blkif->drain))
			break;
	} while (!kthread_should_stop());
	atomic_set(&blkif->drain, 0);
}

/*
 * Completion callback on the bio's. Called as bh->b_end_io()
 */
@@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
		pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
		xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
	} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
		    (error == -EOPNOTSUPP)) {
		pr_debug(DRV_PFX "write barrier op failed, not supported\n");
		xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
	} else if (error) {
		pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
			 " error=%d\n", error);
@@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
		make_response(pending_req->blkif, pending_req->id,
			      pending_req->operation, pending_req->status);
		xen_blkif_put(pending_req->blkif);
		if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
			if (atomic_read(&pending_req->blkif->drain))
				complete(&pending_req->blkif->drain_complete);
		}
		free_req(pending_req);
	}
}
@@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif)

	return more_to_do;
}

/*
 * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
 * and call the 'submit_bio' to pass it to the underlying storage.
@@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
	int i, nbio = 0;
	int operation;
	struct blk_plug plug;
	bool drain = false;

	switch (req->operation) {
	case BLKIF_OP_READ:
@@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
		blkif->st_wr_req++;
		operation = WRITE_ODIRECT;
		break;
	case BLKIF_OP_WRITE_BARRIER:
		drain = true;
	case BLKIF_OP_FLUSH_DISKCACHE:
		blkif->st_f_req++;
		operation = WRITE_FLUSH;
		break;
	case BLKIF_OP_WRITE_BARRIER:
	case BLKIF_OP_DISCARD:
		blkif->st_ds_req++;
		operation = REQ_DISCARD;
		break;
	default:
		operation = 0; /* make gcc happy */
		goto fail_response;
@@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,

	/* Check that the number of segments is sane. */
	nseg = req->nr_segments;
	if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
	if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
				operation != REQ_DISCARD) ||
	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
		pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
			 nseg);
@@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
		}
	}

	/* Wait on all outstanding I/O's and once that has been completed
	 * issue the WRITE_FLUSH.
	 */
	if (drain)
		xen_blk_drain_io(pending_req->blkif);

	/*
	 * If we have failed at this point, we need to undo the M2P override,
	 * set gnttab_set_unmap_op on all of the grant references and perform
	 * the hypercall to unmap the grants - that is all done in
	 * xen_blkbk_unmap.
	 */
	if (xen_blkbk_map(req, pending_req, seg))
	if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
		goto fail_flush;

	/* This corresponding xen_blkif_put is done in __end_block_io_op */
	/*
	 * This corresponding xen_blkif_put is done in __end_block_io_op, or
	 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
	 */
	xen_blkif_get(blkif);

	for (i = 0; i < nseg; i++) {
@@ -654,10 +737,11 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
		preq.sector_number += seg[i].nsec;
	}

	/* This will be hit if the operation was a flush. */
	/* This will be hit if the operation was a flush or discard. */
	if (!bio) {
		BUG_ON(operation != WRITE_FLUSH);
		BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);

		if (operation == WRITE_FLUSH) {
			bio = bio_alloc(GFP_KERNEL, 0);
			if (unlikely(bio == NULL))
				goto fail_put_bio;
@@ -666,6 +750,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
			bio->bi_bdev    = preq.bdev;
			bio->bi_private = pending_req;
			bio->bi_end_io  = end_block_io_op;
		} else if (operation == REQ_DISCARD) {
			xen_blk_discard(blkif, req);
			xen_blkif_put(blkif);
			free_req(pending_req);
			return 0;
		}
	}

	/*
@@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,

	if (operation == READ)
		blkif->st_rd_sect += preq.nr_sects;
	else if (operation == WRITE || operation == WRITE_FLUSH)
	else if (operation & WRITE)
		blkif->st_wr_sect += preq.nr_sects;

	return 0;
@@ -765,9 +855,9 @@ static int __init xen_blkif_init(void)

	mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;

	blkbk->pending_reqs          = kmalloc(sizeof(blkbk->pending_reqs[0]) *
	blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
					xen_blkif_reqs, GFP_KERNEL);
	blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
	blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
					mmap_pages, GFP_KERNEL);
	blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
					mmap_pages, GFP_KERNEL);
@@ -790,8 +880,6 @@ static int __init xen_blkif_init(void)
	if (rc)
		goto failed_init;

	memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));

	INIT_LIST_HEAD(&blkbk->pending_free);
	spin_lock_init(&blkbk->pending_free_lock);
	init_waitqueue_head(&blkbk->pending_free_wq);
+82 −18
Original line number Diff line number Diff line
@@ -63,13 +63,26 @@ struct blkif_common_response {

/* i386 protocol version */
#pragma pack(push, 4)

struct blkif_x86_32_request_rw {
	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};

struct blkif_x86_32_request_discard {
	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
	uint64_t nr_sectors;
};

struct blkif_x86_32_request {
	uint8_t        operation;    /* BLKIF_OP_???                         */
	uint8_t        nr_segments;  /* number of segments                   */
	blkif_vdev_t   handle;       /* only for read/write requests         */
	uint64_t       id;           /* private guest value, echoed in resp  */
	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
	union {
		struct blkif_x86_32_request_rw rw;
		struct blkif_x86_32_request_discard discard;
	} u;
};
struct blkif_x86_32_response {
	uint64_t        id;              /* copied from request */
@@ -79,13 +92,26 @@ struct blkif_x86_32_response {
#pragma pack(pop)

/* x86_64 protocol version */

struct blkif_x86_64_request_rw {
	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};

struct blkif_x86_64_request_discard {
	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
	uint64_t nr_sectors;
};

struct blkif_x86_64_request {
	uint8_t        operation;    /* BLKIF_OP_???                         */
	uint8_t        nr_segments;  /* number of segments                   */
	blkif_vdev_t   handle;       /* only for read/write requests         */
	uint64_t       __attribute__((__aligned__(8))) id;
	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
	union {
		struct blkif_x86_64_request_rw rw;
		struct blkif_x86_64_request_discard discard;
	} u;
};
struct blkif_x86_64_response {
	uint64_t       __attribute__((__aligned__(8))) id;
@@ -113,6 +139,11 @@ enum blkif_protocol {
	BLKIF_PROTOCOL_X86_64 = 3,
};

enum blkif_backend_type {
	BLKIF_BACKEND_PHY  = 1,
	BLKIF_BACKEND_FILE = 2,
};

struct xen_vbd {
	/* What the domain refers to this vbd as. */
	blkif_vdev_t		handle;
@@ -138,6 +169,7 @@ struct xen_blkif {
	unsigned int		irq;
	/* Comms information. */
	enum blkif_protocol	blk_protocol;
	enum blkif_backend_type blk_backend_type;
	union blkif_back_rings	blk_rings;
	struct vm_struct	*blk_ring_area;
	/* The VBD attached to this interface. */
@@ -149,6 +181,9 @@ struct xen_blkif {
	atomic_t		refcnt;

	wait_queue_head_t	wq;
	/* for barrier (drain) requests */
	struct completion	drain_complete;
	atomic_t		drain;
	/* One thread per one blkif. */
	struct task_struct	*xenblkd;
	unsigned int		waiting_reqs;
@@ -159,6 +194,7 @@ struct xen_blkif {
	int			st_wr_req;
	int			st_oo_req;
	int			st_f_req;
	int			st_ds_req;
	int			st_rd_sect;
	int			st_wr_sect;

@@ -182,7 +218,7 @@ struct xen_blkif {

struct phys_req {
	unsigned short		dev;
	unsigned short		nr_sects;
	blkif_sector_t		nr_sects;
	struct block_device	*bdev;
	blkif_sector_t		sector_number;
};
@@ -196,6 +232,8 @@ int xen_blkif_schedule(void *arg);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
			      struct backend_info *be, int state);

int xen_blkbk_barrier(struct xenbus_transaction xbt,
		      struct backend_info *be, int state);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);

static inline void blkif_get_x86_32_req(struct blkif_request *dst,
@@ -206,12 +244,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
	dst->nr_segments = src->nr_segments;
	dst->handle = src->handle;
	dst->id = src->id;
	dst->u.rw.sector_number = src->sector_number;
	switch (src->operation) {
	case BLKIF_OP_READ:
	case BLKIF_OP_WRITE:
	case BLKIF_OP_WRITE_BARRIER:
	case BLKIF_OP_FLUSH_DISKCACHE:
		dst->u.rw.sector_number = src->u.rw.sector_number;
		barrier();
		if (n > dst->nr_segments)
			n = dst->nr_segments;
		for (i = 0; i < n; i++)
		dst->u.rw.seg[i] = src->seg[i];
			dst->u.rw.seg[i] = src->u.rw.seg[i];
		break;
	case BLKIF_OP_DISCARD:
		dst->u.discard.sector_number = src->u.discard.sector_number;
		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
		break;
	default:
		break;
	}
}

static inline void blkif_get_x86_64_req(struct blkif_request *dst,
@@ -222,12 +273,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
	dst->nr_segments = src->nr_segments;
	dst->handle = src->handle;
	dst->id = src->id;
	dst->u.rw.sector_number = src->sector_number;
	switch (src->operation) {
	case BLKIF_OP_READ:
	case BLKIF_OP_WRITE:
	case BLKIF_OP_WRITE_BARRIER:
	case BLKIF_OP_FLUSH_DISKCACHE:
		dst->u.rw.sector_number = src->u.rw.sector_number;
		barrier();
		if (n > dst->nr_segments)
			n = dst->nr_segments;
		for (i = 0; i < n; i++)
		dst->u.rw.seg[i] = src->seg[i];
			dst->u.rw.seg[i] = src->u.rw.seg[i];
		break;
	case BLKIF_OP_DISCARD:
		dst->u.discard.sector_number = src->u.discard.sector_number;
		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
		break;
	default:
		break;
	}
}

#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
+78 −2
Original line number Diff line number Diff line
@@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
	spin_lock_init(&blkif->blk_ring_lock);
	atomic_set(&blkif->refcnt, 1);
	init_waitqueue_head(&blkif->wq);
	init_completion(&blkif->drain_complete);
	atomic_set(&blkif->drain, 0);
	blkif->st_print = jiffies;
	init_waitqueue_head(&blkif->waiting_to_free);

@@ -272,6 +274,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
VBD_SHOW(ds_req,  "%d\n", be->blkif->st_ds_req);
VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);

@@ -280,6 +283,7 @@ static struct attribute *xen_vbdstat_attrs[] = {
	&dev_attr_rd_req.attr,
	&dev_attr_wr_req.attr,
	&dev_attr_f_req.attr,
	&dev_attr_ds_req.attr,
	&dev_attr_rd_sect.attr,
	&dev_attr_wr_sect.attr,
	NULL
@@ -419,6 +423,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
	return err;
}

int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
{
	struct xenbus_device *dev = be->dev;
	struct xen_blkif *blkif = be->blkif;
	char *type;
	int err;
	int state = 0;

	type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
	if (!IS_ERR(type)) {
		if (strncmp(type, "file", 4) == 0) {
			state = 1;
			blkif->blk_backend_type = BLKIF_BACKEND_FILE;
		}
		if (strncmp(type, "phy", 3) == 0) {
			struct block_device *bdev = be->blkif->vbd.bdev;
			struct request_queue *q = bdev_get_queue(bdev);
			if (blk_queue_discard(q)) {
				err = xenbus_printf(xbt, dev->nodename,
					"discard-granularity", "%u",
					q->limits.discard_granularity);
				if (err) {
					xenbus_dev_fatal(dev, err,
						"writing discard-granularity");
					goto kfree;
				}
				err = xenbus_printf(xbt, dev->nodename,
					"discard-alignment", "%u",
					q->limits.discard_alignment);
				if (err) {
					xenbus_dev_fatal(dev, err,
						"writing discard-alignment");
					goto kfree;
				}
				state = 1;
				blkif->blk_backend_type = BLKIF_BACKEND_PHY;
			}
		}
	} else {
		err = PTR_ERR(type);
		xenbus_dev_fatal(dev, err, "reading type");
		goto out;
	}

	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
			    "%d", state);
	if (err)
		xenbus_dev_fatal(dev, err, "writing feature-discard");
kfree:
	kfree(type);
out:
	return err;
}
int xen_blkbk_barrier(struct xenbus_transaction xbt,
		      struct backend_info *be, int state)
{
	struct xenbus_device *dev = be->dev;
	int err;

	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
			    "%d", state);
	if (err)
		xenbus_dev_fatal(dev, err, "writing feature-barrier");

	return err;
}

/*
 * Entry point to this code when a new device is created.  Allocate the basic
 * structures, and watch the store waiting for the hotplug scripts to tell us
@@ -590,7 +661,7 @@ static void frontend_changed(struct xenbus_device *dev,

		/*
		 * Enforce precondition before potential leak point.
		 * blkif_disconnect() is idempotent.
		 * xen_blkif_disconnect() is idempotent.
		 */
		xen_blkif_disconnect(be->blkif);

@@ -611,7 +682,7 @@ static void frontend_changed(struct xenbus_device *dev,
			break;
		/* fall through if not online */
	case XenbusStateUnknown:
		/* implies blkif_disconnect() via blkback_remove() */
		/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
		device_unregister(&dev->dev);
		break;

@@ -650,6 +721,11 @@ static void connect(struct backend_info *be)
	if (err)
		goto abort;

	err = xen_blkbk_discard(xbt, be);

	/* If we can't advertise it is OK. */
	err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);

	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
			    (unsigned long long)vbd_sz(&be->blkif->vbd));
	if (err) {
+98 −25
Original line number Diff line number Diff line
@@ -98,6 +98,9 @@ struct blkfront_info
	unsigned long shadow_free;
	unsigned int feature_flush;
	unsigned int flush_op;
	unsigned int feature_discard;
	unsigned int discard_granularity;
	unsigned int discard_alignment;
	int is_ready;
};

@@ -302,6 +305,12 @@ static int blkif_queue_request(struct request *req)
		ring_req->operation = info->flush_op;
	}

	if (unlikely(req->cmd_flags & REQ_DISCARD)) {
		/* id, sector_number and handle are set above. */
		ring_req->operation = BLKIF_OP_DISCARD;
		ring_req->nr_segments = 0;
		ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
	} else {
		ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
		BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);

@@ -326,6 +335,7 @@ static int blkif_queue_request(struct request *req)
						.first_sect = fsect,
						.last_sect  = lsect };
		}
	}

	info->ring.req_prod_pvt++;

@@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq)

		blk_start_request(req);

		if (req->cmd_type != REQ_TYPE_FS) {
		if ((req->cmd_type != REQ_TYPE_FS) ||
		    ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
		    !info->flush_op)) {
			__blk_end_request_all(req, -EIO);
			continue;
		}
@@ -399,6 +411,7 @@ static void do_blkif_request(struct request_queue *rq)
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
{
	struct request_queue *rq;
	struct blkfront_info *info = gd->private_data;

	rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
	if (rq == NULL)
@@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)

	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);

	if (info->feature_discard) {
		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
		blk_queue_max_discard_sectors(rq, get_capacity(gd));
		rq->limits.discard_granularity = info->discard_granularity;
		rq->limits.discard_alignment = info->discard_alignment;
	}

	/* Hard sector size and max sectors impersonate the equiv. hardware. */
	blk_queue_logical_block_size(rq, sector_size);
	blk_queue_max_hw_sectors(rq, 512);
@@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)

		error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
		switch (bret->operation) {
		case BLKIF_OP_DISCARD:
			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
				struct request_queue *rq = info->rq;
				printk(KERN_WARNING "blkfront: %s: discard op failed\n",
					   info->gd->disk_name);
				error = -EOPNOTSUPP;
				info->feature_discard = 0;
				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
			}
			__blk_end_request_all(req, error);
			break;
		case BLKIF_OP_FLUSH_DISKCACHE:
		case BLKIF_OP_WRITE_BARRIER:
			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
@@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info)
	bdput(bdev);
}

static void blkfront_setup_discard(struct blkfront_info *info)
{
	int err;
	char *type;
	unsigned int discard_granularity;
	unsigned int discard_alignment;

	type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
	if (IS_ERR(type))
		return;

	if (strncmp(type, "phy", 3) == 0) {
		err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
			"discard-granularity", "%u", &discard_granularity,
			"discard-alignment", "%u", &discard_alignment,
			NULL);
		if (!err) {
			info->feature_discard = 1;
			info->discard_granularity = discard_granularity;
			info->discard_alignment = discard_alignment;
		}
	} else if (strncmp(type, "file", 4) == 0)
		info->feature_discard = 1;

	kfree(type);
}

/*
 * Invoked when the backend is finally 'ready' (and has told produced
 * the details about the physical device - #sectors, size, etc).
@@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info)
	unsigned long sector_size;
	unsigned int binfo;
	int err;
	int barrier, flush;
	int barrier, flush, discard;

	switch (info->connected) {
	case BLKIF_STATE_CONNECTED:
@@ -1179,6 +1237,13 @@ static void blkfront_connect(struct blkfront_info *info)
		info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
	}

	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
			    "feature-discard", "%d", &discard,
			    NULL);

	if (!err && discard)
		blkfront_setup_discard(info);

	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
	if (err) {
		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = {

static int __init xlblk_init(void)
{
	int ret;

	if (!xen_domain())
		return -ENODEV;

@@ -1394,7 +1461,13 @@ static int __init xlblk_init(void)
		return -ENODEV;
	}

	return xenbus_register_frontend(&blkfront);
	ret = xenbus_register_frontend(&blkfront);
	if (ret) {
		unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
		return ret;
	}

	return 0;
}
module_init(xlblk_init);

+36 −0
Original line number Diff line number Diff line
@@ -57,6 +57,36 @@ typedef uint64_t blkif_sector_t;
 * "feature-flush-cache" node!
 */
#define BLKIF_OP_FLUSH_DISKCACHE   3

/*
 * Recognised only if "feature-discard" is present in backend xenbus info.
 * The "feature-discard" node contains a boolean indicating whether trim
 * (ATA) or unmap (SCSI) - conviently called discard requests are likely
 * to succeed or fail. Either way, a discard request
 * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
 * the underlying block-device hardware. The boolean simply indicates whether
 * or not it is worthwhile for the frontend to attempt discard requests.
 * If a backend does not recognise BLKIF_OP_DISCARD, it should *not*
 * create the "feature-discard" node!
 *
 * Discard operation is a request for the underlying block device to mark
 * extents to be erased. However, discard does not guarantee that the blocks
 * will be erased from the device - it is just a hint to the device
 * controller that these blocks are no longer in use. What the device
 * controller does with that information is left to the controller.
 * Discard operations are passed with sector_number as the
 * sector index to begin discard operations at and nr_sectors as the number of
 * sectors to be discarded. The specified sectors should be discarded if the
 * underlying block device supports trim (ATA) or unmap (SCSI) operations,
 * or a BLKIF_RSP_EOPNOTSUPP  should be returned.
 * More information about trim/unmap operations at:
 * http://t13.org/Documents/UploadedDocuments/docs2008/
 *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
 * http://www.seagate.com/staticfiles/support/disc/manuals/
 *     Interface%20manuals/100293068c.pdf
 */
#define BLKIF_OP_DISCARD           5

/*
 * Maximum scatter/gather segments per request.
 * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
@@ -74,6 +104,11 @@ struct blkif_request_rw {
	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};

struct blkif_request_discard {
	blkif_sector_t sector_number;
	uint64_t nr_sectors;
};

struct blkif_request {
	uint8_t        operation;    /* BLKIF_OP_???                         */
	uint8_t        nr_segments;  /* number of segments                   */
@@ -81,6 +116,7 @@ struct blkif_request {
	uint64_t       id;           /* private guest value, echoed in resp  */
	union {
		struct blkif_request_rw rw;
		struct blkif_request_discard discard;
	} u;
};