Loading drivers/block/xen-blkback/blkback.c +109 −21 Original line number Diff line number Diff line Loading @@ -39,6 +39,9 @@ #include <linux/list.h> #include <linux/delay.h> #include <linux/freezer.h> #include <linux/loop.h> #include <linux/falloc.h> #include <linux/fs.h> #include <xen/events.h> #include <xen/page.h> Loading Loading @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" " | ds %4d\n", current->comm, blkif->st_oo_req, blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req, blkif->st_ds_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; blkif->st_oo_req = 0; blkif->st_ds_req = 0; } int xen_blkif_schedule(void *arg) Loading Loading @@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req, return ret; } static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) { int err = 0; int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) /* just forward the discard request */ err = blkdev_issue_discard(bdev, req->u.discard.sector_number, req->u.discard.nr_sectors, GFP_KERNEL, 0); else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { /* punch a hole in the backing file */ struct loop_device *lo = bdev->bd_disk->private_data; struct file *file = lo->lo_backing_file; if (file->f_op->fallocate) err = file->f_op->fallocate(file, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, req->u.discard.sector_number << 9, req->u.discard.nr_sectors << 9); else err = -EOPNOTSUPP; } else err = -EOPNOTSUPP; if (err == -EOPNOTSUPP) { pr_debug(DRV_PFX "discard op failed, not supported\n"); status = BLKIF_RSP_EOPNOTSUPP; } else if (err) status = BLKIF_RSP_ERROR; make_response(blkif, req->id, req->operation, status); } static void xen_blk_drain_io(struct xen_blkif *blkif) { atomic_set(&blkif->drain, 1); do { /* The initial value is one, and one refcnt taken at the * start of the xen_blkif_schedule thread. */ if (atomic_read(&blkif->refcnt) <= 2) break; wait_for_completion_interruptible_timeout( &blkif->drain_complete, HZ); if (!atomic_read(&blkif->drain)) break; } while (!kthread_should_stop()); atomic_set(&blkif->drain, 0); } /* * Completion callback on the bio's. Called as bh->b_end_io() */ Loading @@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && (error == -EOPNOTSUPP)) { pr_debug(DRV_PFX "write barrier op failed, not supported\n"); xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," " error=%d\n", error); Loading @@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); xen_blkif_put(pending_req->blkif); if (atomic_read(&pending_req->blkif->refcnt) <= 2) { if (atomic_read(&pending_req->blkif->drain)) complete(&pending_req->blkif->drain_complete); } free_req(pending_req); } } Loading Loading @@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif) return more_to_do; } /* * Transmutation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlying storage. Loading @@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, int i, nbio = 0; int operation; struct blk_plug plug; bool drain = false; switch (req->operation) { case BLKIF_OP_READ: Loading @@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_wr_req++; operation = WRITE_ODIRECT; break; case BLKIF_OP_WRITE_BARRIER: drain = true; case BLKIF_OP_FLUSH_DISKCACHE: blkif->st_f_req++; operation = WRITE_FLUSH; break; case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_DISCARD: blkif->st_ds_req++; operation = REQ_DISCARD; break; default: operation = 0; /* make gcc happy */ goto fail_response; Loading @@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, /* Check that the number of segments is sane. */ nseg = req->nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || if (unlikely(nseg == 0 && operation != WRITE_FLUSH && operation != REQ_DISCARD) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); Loading Loading @@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } } /* Wait on all outstanding I/O's and once that has been completed * issue the WRITE_FLUSH. */ if (drain) xen_blk_drain_io(pending_req->blkif); /* * If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ if (xen_blkbk_map(req, pending_req, seg)) if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* This corresponding xen_blkif_put is done in __end_block_io_op */ /* * This corresponding xen_blkif_put is done in __end_block_io_op, or * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. */ xen_blkif_get(blkif); for (i = 0; i < nseg; i++) { Loading @@ -654,10 +737,11 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, preq.sector_number += seg[i].nsec; } /* This will be hit if the operation was a flush. */ /* This will be hit if the operation was a flush or discard. */ if (!bio) { BUG_ON(operation != WRITE_FLUSH); BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); if (operation == WRITE_FLUSH) { bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; Loading @@ -666,6 +750,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; } else if (operation == REQ_DISCARD) { xen_blk_discard(blkif, req); xen_blkif_put(blkif); free_req(pending_req); return 0; } } /* Loading @@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; else if (operation == WRITE || operation == WRITE_FLUSH) else if (operation & WRITE) blkif->st_wr_sect += preq.nr_sects; return 0; Loading Loading @@ -765,9 +855,9 @@ static int __init xen_blkif_init(void) mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * xen_blkif_reqs, GFP_KERNEL); blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages, GFP_KERNEL); Loading @@ -790,8 +880,6 @@ static int __init xen_blkif_init(void) if (rc) goto failed_init; memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); INIT_LIST_HEAD(&blkbk->pending_free); spin_lock_init(&blkbk->pending_free_lock); init_waitqueue_head(&blkbk->pending_free_wq); Loading drivers/block/xen-blkback/common.h +82 −18 Original line number Diff line number Diff line Loading @@ -63,13 +63,26 @@ struct blkif_common_response { /* i386 protocol version */ #pragma pack(push, 4) struct blkif_x86_32_request_rw { blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; struct blkif_x86_32_request_discard { blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ uint64_t nr_sectors; }; struct blkif_x86_32_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; union { struct blkif_x86_32_request_rw rw; struct blkif_x86_32_request_discard discard; } u; }; struct blkif_x86_32_response { uint64_t id; /* copied from request */ Loading @@ -79,13 +92,26 @@ struct blkif_x86_32_response { #pragma pack(pop) /* x86_64 protocol version */ struct blkif_x86_64_request_rw { blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; struct blkif_x86_64_request_discard { blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ uint64_t nr_sectors; }; struct blkif_x86_64_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t __attribute__((__aligned__(8))) id; blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; union { struct blkif_x86_64_request_rw rw; struct blkif_x86_64_request_discard discard; } u; }; struct blkif_x86_64_response { uint64_t __attribute__((__aligned__(8))) id; Loading Loading @@ -113,6 +139,11 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; enum blkif_backend_type { BLKIF_BACKEND_PHY = 1, BLKIF_BACKEND_FILE = 2, }; struct xen_vbd { /* What the domain refers to this vbd as. */ blkif_vdev_t handle; Loading @@ -138,6 +169,7 @@ struct xen_blkif { unsigned int irq; /* Comms information. */ enum blkif_protocol blk_protocol; enum blkif_backend_type blk_backend_type; union blkif_back_rings blk_rings; struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ Loading @@ -149,6 +181,9 @@ struct xen_blkif { atomic_t refcnt; wait_queue_head_t wq; /* for barrier (drain) requests */ struct completion drain_complete; atomic_t drain; /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; Loading @@ -159,6 +194,7 @@ struct xen_blkif { int st_wr_req; int st_oo_req; int st_f_req; int st_ds_req; int st_rd_sect; int st_wr_sect; Loading @@ -182,7 +218,7 @@ struct xen_blkif { struct phys_req { unsigned short dev; unsigned short nr_sects; blkif_sector_t nr_sects; struct block_device *bdev; blkif_sector_t sector_number; }; Loading @@ -196,6 +232,8 @@ int xen_blkif_schedule(void *arg); int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct backend_info *be, int state); int xen_blkbk_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state); struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); static inline void blkif_get_x86_32_req(struct blkif_request *dst, Loading @@ -206,12 +244,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; dst->u.rw.sector_number = src->sector_number; switch (src->operation) { case BLKIF_OP_READ: case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: dst->u.rw.sector_number = src->u.rw.sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) dst->u.rw.seg[i] = src->seg[i]; dst->u.rw.seg[i] = src->u.rw.seg[i]; break; case BLKIF_OP_DISCARD: dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; default: break; } } static inline void blkif_get_x86_64_req(struct blkif_request *dst, Loading @@ -222,12 +273,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; dst->u.rw.sector_number = src->sector_number; switch (src->operation) { case BLKIF_OP_READ: case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: dst->u.rw.sector_number = src->u.rw.sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) dst->u.rw.seg[i] = src->seg[i]; dst->u.rw.seg[i] = src->u.rw.seg[i]; break; case BLKIF_OP_DISCARD: dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; default: break; } } #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ drivers/block/xen-blkback/xenbus.c +78 −2 Original line number Diff line number Diff line Loading @@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) spin_lock_init(&blkif->blk_ring_lock); atomic_set(&blkif->refcnt, 1); init_waitqueue_head(&blkif->wq); init_completion(&blkif->drain_complete); atomic_set(&blkif->drain, 0); blkif->st_print = jiffies; init_waitqueue_head(&blkif->waiting_to_free); Loading Loading @@ -272,6 +274,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req); VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); Loading @@ -280,6 +283,7 @@ static struct attribute *xen_vbdstat_attrs[] = { &dev_attr_rd_req.attr, &dev_attr_wr_req.attr, &dev_attr_f_req.attr, &dev_attr_ds_req.attr, &dev_attr_rd_sect.attr, &dev_attr_wr_sect.attr, NULL Loading Loading @@ -419,6 +423,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, return err; } int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) { struct xenbus_device *dev = be->dev; struct xen_blkif *blkif = be->blkif; char *type; int err; int state = 0; type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); if (!IS_ERR(type)) { if (strncmp(type, "file", 4) == 0) { state = 1; blkif->blk_backend_type = BLKIF_BACKEND_FILE; } if (strncmp(type, "phy", 3) == 0) { struct block_device *bdev = be->blkif->vbd.bdev; struct request_queue *q = bdev_get_queue(bdev); if (blk_queue_discard(q)) { err = xenbus_printf(xbt, dev->nodename, "discard-granularity", "%u", q->limits.discard_granularity); if (err) { xenbus_dev_fatal(dev, err, "writing discard-granularity"); goto kfree; } err = xenbus_printf(xbt, dev->nodename, "discard-alignment", "%u", q->limits.discard_alignment); if (err) { xenbus_dev_fatal(dev, err, "writing discard-alignment"); goto kfree; } state = 1; blkif->blk_backend_type = BLKIF_BACKEND_PHY; } } } else { err = PTR_ERR(type); xenbus_dev_fatal(dev, err, "reading type"); goto out; } err = xenbus_printf(xbt, dev->nodename, "feature-discard", "%d", state); if (err) xenbus_dev_fatal(dev, err, "writing feature-discard"); kfree: kfree(type); out: return err; } int xen_blkbk_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state) { struct xenbus_device *dev = be->dev; int err; err = xenbus_printf(xbt, dev->nodename, "feature-barrier", "%d", state); if (err) xenbus_dev_fatal(dev, err, "writing feature-barrier"); return err; } /* * Entry point to this code when a new device is created. Allocate the basic * structures, and watch the store waiting for the hotplug scripts to tell us Loading Loading @@ -590,7 +661,7 @@ static void frontend_changed(struct xenbus_device *dev, /* * Enforce precondition before potential leak point. * blkif_disconnect() is idempotent. * xen_blkif_disconnect() is idempotent. */ xen_blkif_disconnect(be->blkif); Loading @@ -611,7 +682,7 @@ static void frontend_changed(struct xenbus_device *dev, break; /* fall through if not online */ case XenbusStateUnknown: /* implies blkif_disconnect() via blkback_remove() */ /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ device_unregister(&dev->dev); break; Loading Loading @@ -650,6 +721,11 @@ static void connect(struct backend_info *be) if (err) goto abort; err = xen_blkbk_discard(xbt, be); /* If we can't advertise it is OK. */ err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); if (err) { Loading drivers/block/xen-blkfront.c +98 −25 Original line number Diff line number Diff line Loading @@ -98,6 +98,9 @@ struct blkfront_info unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; unsigned int feature_discard; unsigned int discard_granularity; unsigned int discard_alignment; int is_ready; }; Loading Loading @@ -302,6 +305,12 @@ static int blkif_queue_request(struct request *req) ring_req->operation = info->flush_op; } if (unlikely(req->cmd_flags & REQ_DISCARD)) { /* id, sector_number and handle are set above. */ ring_req->operation = BLKIF_OP_DISCARD; ring_req->nr_segments = 0; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); } else { ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); Loading @@ -326,6 +335,7 @@ static int blkif_queue_request(struct request *req) .first_sect = fsect, .last_sect = lsect }; } } info->ring.req_prod_pvt++; Loading Loading @@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq) blk_start_request(req); if (req->cmd_type != REQ_TYPE_FS) { if ((req->cmd_type != REQ_TYPE_FS) || ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && !info->flush_op)) { __blk_end_request_all(req, -EIO); continue; } Loading Loading @@ -399,6 +411,7 @@ static void do_blkif_request(struct request_queue *rq) static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) { struct request_queue *rq; struct blkfront_info *info = gd->private_data; rq = blk_init_queue(do_blkif_request, &blkif_io_lock); if (rq == NULL) Loading @@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); blk_queue_max_discard_sectors(rq, get_capacity(gd)); rq->limits.discard_granularity = info->discard_granularity; rq->limits.discard_alignment = info->discard_alignment; } /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); blk_queue_max_hw_sectors(rq, 512); Loading Loading @@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; printk(KERN_WARNING "blkfront: %s: discard op failed\n", info->gd->disk_name); error = -EOPNOTSUPP; info->feature_discard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); } __blk_end_request_all(req, error); break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { Loading Loading @@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info) bdput(bdev); } static void blkfront_setup_discard(struct blkfront_info *info) { int err; char *type; unsigned int discard_granularity; unsigned int discard_alignment; type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); if (IS_ERR(type)) return; if (strncmp(type, "phy", 3) == 0) { err = xenbus_gather(XBT_NIL, info->xbdev->otherend, "discard-granularity", "%u", &discard_granularity, "discard-alignment", "%u", &discard_alignment, NULL); if (!err) { info->feature_discard = 1; info->discard_granularity = discard_granularity; info->discard_alignment = discard_alignment; } } else if (strncmp(type, "file", 4) == 0) info->feature_discard = 1; kfree(type); } /* * Invoked when the backend is finally 'ready' (and has told produced * the details about the physical device - #sectors, size, etc). Loading @@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int binfo; int err; int barrier, flush; int barrier, flush, discard; switch (info->connected) { case BLKIF_STATE_CONNECTED: Loading Loading @@ -1179,6 +1237,13 @@ static void blkfront_connect(struct blkfront_info *info) info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; } err = xenbus_gather(XBT_NIL, info->xbdev->otherend, "feature-discard", "%d", &discard, NULL); if (!err && discard) blkfront_setup_discard(info); err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", Loading Loading @@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = { static int __init xlblk_init(void) { int ret; if (!xen_domain()) return -ENODEV; Loading @@ -1394,7 +1461,13 @@ static int __init xlblk_init(void) return -ENODEV; } return xenbus_register_frontend(&blkfront); ret = xenbus_register_frontend(&blkfront); if (ret) { unregister_blkdev(XENVBD_MAJOR, DEV_NAME); return ret; } return 0; } module_init(xlblk_init); Loading include/xen/interface/io/blkif.h +36 −0 Original line number Diff line number Diff line Loading @@ -57,6 +57,36 @@ typedef uint64_t blkif_sector_t; * "feature-flush-cache" node! */ #define BLKIF_OP_FLUSH_DISKCACHE 3 /* * Recognised only if "feature-discard" is present in backend xenbus info. * The "feature-discard" node contains a boolean indicating whether trim * (ATA) or unmap (SCSI) - conviently called discard requests are likely * to succeed or fail. Either way, a discard request * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by * the underlying block-device hardware. The boolean simply indicates whether * or not it is worthwhile for the frontend to attempt discard requests. * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* * create the "feature-discard" node! * * Discard operation is a request for the underlying block device to mark * extents to be erased. However, discard does not guarantee that the blocks * will be erased from the device - it is just a hint to the device * controller that these blocks are no longer in use. What the device * controller does with that information is left to the controller. * Discard operations are passed with sector_number as the * sector index to begin discard operations at and nr_sectors as the number of * sectors to be discarded. The specified sectors should be discarded if the * underlying block device supports trim (ATA) or unmap (SCSI) operations, * or a BLKIF_RSP_EOPNOTSUPP should be returned. * More information about trim/unmap operations at: * http://t13.org/Documents/UploadedDocuments/docs2008/ * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc * http://www.seagate.com/staticfiles/support/disc/manuals/ * Interface%20manuals/100293068c.pdf */ #define BLKIF_OP_DISCARD 5 /* * Maximum scatter/gather segments per request. * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. Loading @@ -74,6 +104,11 @@ struct blkif_request_rw { } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; struct blkif_request_discard { blkif_sector_t sector_number; uint64_t nr_sectors; }; struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ Loading @@ -81,6 +116,7 @@ struct blkif_request { uint64_t id; /* private guest value, echoed in resp */ union { struct blkif_request_rw rw; struct blkif_request_discard discard; } u; }; Loading Loading
drivers/block/xen-blkback/blkback.c +109 −21 Original line number Diff line number Diff line Loading @@ -39,6 +39,9 @@ #include <linux/list.h> #include <linux/delay.h> #include <linux/freezer.h> #include <linux/loop.h> #include <linux/falloc.h> #include <linux/fs.h> #include <xen/events.h> #include <xen/page.h> Loading Loading @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" " | ds %4d\n", current->comm, blkif->st_oo_req, blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req, blkif->st_ds_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; blkif->st_oo_req = 0; blkif->st_ds_req = 0; } int xen_blkif_schedule(void *arg) Loading Loading @@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req, return ret; } static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) { int err = 0; int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) /* just forward the discard request */ err = blkdev_issue_discard(bdev, req->u.discard.sector_number, req->u.discard.nr_sectors, GFP_KERNEL, 0); else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { /* punch a hole in the backing file */ struct loop_device *lo = bdev->bd_disk->private_data; struct file *file = lo->lo_backing_file; if (file->f_op->fallocate) err = file->f_op->fallocate(file, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, req->u.discard.sector_number << 9, req->u.discard.nr_sectors << 9); else err = -EOPNOTSUPP; } else err = -EOPNOTSUPP; if (err == -EOPNOTSUPP) { pr_debug(DRV_PFX "discard op failed, not supported\n"); status = BLKIF_RSP_EOPNOTSUPP; } else if (err) status = BLKIF_RSP_ERROR; make_response(blkif, req->id, req->operation, status); } static void xen_blk_drain_io(struct xen_blkif *blkif) { atomic_set(&blkif->drain, 1); do { /* The initial value is one, and one refcnt taken at the * start of the xen_blkif_schedule thread. */ if (atomic_read(&blkif->refcnt) <= 2) break; wait_for_completion_interruptible_timeout( &blkif->drain_complete, HZ); if (!atomic_read(&blkif->drain)) break; } while (!kthread_should_stop()); atomic_set(&blkif->drain, 0); } /* * Completion callback on the bio's. Called as bh->b_end_io() */ Loading @@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && (error == -EOPNOTSUPP)) { pr_debug(DRV_PFX "write barrier op failed, not supported\n"); xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," " error=%d\n", error); Loading @@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); xen_blkif_put(pending_req->blkif); if (atomic_read(&pending_req->blkif->refcnt) <= 2) { if (atomic_read(&pending_req->blkif->drain)) complete(&pending_req->blkif->drain_complete); } free_req(pending_req); } } Loading Loading @@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif) return more_to_do; } /* * Transmutation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlying storage. Loading @@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, int i, nbio = 0; int operation; struct blk_plug plug; bool drain = false; switch (req->operation) { case BLKIF_OP_READ: Loading @@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_wr_req++; operation = WRITE_ODIRECT; break; case BLKIF_OP_WRITE_BARRIER: drain = true; case BLKIF_OP_FLUSH_DISKCACHE: blkif->st_f_req++; operation = WRITE_FLUSH; break; case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_DISCARD: blkif->st_ds_req++; operation = REQ_DISCARD; break; default: operation = 0; /* make gcc happy */ goto fail_response; Loading @@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, /* Check that the number of segments is sane. */ nseg = req->nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || if (unlikely(nseg == 0 && operation != WRITE_FLUSH && operation != REQ_DISCARD) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); Loading Loading @@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } } /* Wait on all outstanding I/O's and once that has been completed * issue the WRITE_FLUSH. */ if (drain) xen_blk_drain_io(pending_req->blkif); /* * If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ if (xen_blkbk_map(req, pending_req, seg)) if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* This corresponding xen_blkif_put is done in __end_block_io_op */ /* * This corresponding xen_blkif_put is done in __end_block_io_op, or * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. */ xen_blkif_get(blkif); for (i = 0; i < nseg; i++) { Loading @@ -654,10 +737,11 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, preq.sector_number += seg[i].nsec; } /* This will be hit if the operation was a flush. */ /* This will be hit if the operation was a flush or discard. */ if (!bio) { BUG_ON(operation != WRITE_FLUSH); BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); if (operation == WRITE_FLUSH) { bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; Loading @@ -666,6 +750,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; } else if (operation == REQ_DISCARD) { xen_blk_discard(blkif, req); xen_blkif_put(blkif); free_req(pending_req); return 0; } } /* Loading @@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; else if (operation == WRITE || operation == WRITE_FLUSH) else if (operation & WRITE) blkif->st_wr_sect += preq.nr_sects; return 0; Loading Loading @@ -765,9 +855,9 @@ static int __init xen_blkif_init(void) mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * xen_blkif_reqs, GFP_KERNEL); blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages, GFP_KERNEL); Loading @@ -790,8 +880,6 @@ static int __init xen_blkif_init(void) if (rc) goto failed_init; memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); INIT_LIST_HEAD(&blkbk->pending_free); spin_lock_init(&blkbk->pending_free_lock); init_waitqueue_head(&blkbk->pending_free_wq); Loading
drivers/block/xen-blkback/common.h +82 −18 Original line number Diff line number Diff line Loading @@ -63,13 +63,26 @@ struct blkif_common_response { /* i386 protocol version */ #pragma pack(push, 4) struct blkif_x86_32_request_rw { blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; struct blkif_x86_32_request_discard { blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ uint64_t nr_sectors; }; struct blkif_x86_32_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; union { struct blkif_x86_32_request_rw rw; struct blkif_x86_32_request_discard discard; } u; }; struct blkif_x86_32_response { uint64_t id; /* copied from request */ Loading @@ -79,13 +92,26 @@ struct blkif_x86_32_response { #pragma pack(pop) /* x86_64 protocol version */ struct blkif_x86_64_request_rw { blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; struct blkif_x86_64_request_discard { blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ uint64_t nr_sectors; }; struct blkif_x86_64_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t __attribute__((__aligned__(8))) id; blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; union { struct blkif_x86_64_request_rw rw; struct blkif_x86_64_request_discard discard; } u; }; struct blkif_x86_64_response { uint64_t __attribute__((__aligned__(8))) id; Loading Loading @@ -113,6 +139,11 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; enum blkif_backend_type { BLKIF_BACKEND_PHY = 1, BLKIF_BACKEND_FILE = 2, }; struct xen_vbd { /* What the domain refers to this vbd as. */ blkif_vdev_t handle; Loading @@ -138,6 +169,7 @@ struct xen_blkif { unsigned int irq; /* Comms information. */ enum blkif_protocol blk_protocol; enum blkif_backend_type blk_backend_type; union blkif_back_rings blk_rings; struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ Loading @@ -149,6 +181,9 @@ struct xen_blkif { atomic_t refcnt; wait_queue_head_t wq; /* for barrier (drain) requests */ struct completion drain_complete; atomic_t drain; /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; Loading @@ -159,6 +194,7 @@ struct xen_blkif { int st_wr_req; int st_oo_req; int st_f_req; int st_ds_req; int st_rd_sect; int st_wr_sect; Loading @@ -182,7 +218,7 @@ struct xen_blkif { struct phys_req { unsigned short dev; unsigned short nr_sects; blkif_sector_t nr_sects; struct block_device *bdev; blkif_sector_t sector_number; }; Loading @@ -196,6 +232,8 @@ int xen_blkif_schedule(void *arg); int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct backend_info *be, int state); int xen_blkbk_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state); struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); static inline void blkif_get_x86_32_req(struct blkif_request *dst, Loading @@ -206,12 +244,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; dst->u.rw.sector_number = src->sector_number; switch (src->operation) { case BLKIF_OP_READ: case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: dst->u.rw.sector_number = src->u.rw.sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) dst->u.rw.seg[i] = src->seg[i]; dst->u.rw.seg[i] = src->u.rw.seg[i]; break; case BLKIF_OP_DISCARD: dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; default: break; } } static inline void blkif_get_x86_64_req(struct blkif_request *dst, Loading @@ -222,12 +273,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; dst->u.rw.sector_number = src->sector_number; switch (src->operation) { case BLKIF_OP_READ: case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: dst->u.rw.sector_number = src->u.rw.sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) dst->u.rw.seg[i] = src->seg[i]; dst->u.rw.seg[i] = src->u.rw.seg[i]; break; case BLKIF_OP_DISCARD: dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; default: break; } } #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
drivers/block/xen-blkback/xenbus.c +78 −2 Original line number Diff line number Diff line Loading @@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) spin_lock_init(&blkif->blk_ring_lock); atomic_set(&blkif->refcnt, 1); init_waitqueue_head(&blkif->wq); init_completion(&blkif->drain_complete); atomic_set(&blkif->drain, 0); blkif->st_print = jiffies; init_waitqueue_head(&blkif->waiting_to_free); Loading Loading @@ -272,6 +274,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req); VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); Loading @@ -280,6 +283,7 @@ static struct attribute *xen_vbdstat_attrs[] = { &dev_attr_rd_req.attr, &dev_attr_wr_req.attr, &dev_attr_f_req.attr, &dev_attr_ds_req.attr, &dev_attr_rd_sect.attr, &dev_attr_wr_sect.attr, NULL Loading Loading @@ -419,6 +423,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, return err; } int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) { struct xenbus_device *dev = be->dev; struct xen_blkif *blkif = be->blkif; char *type; int err; int state = 0; type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); if (!IS_ERR(type)) { if (strncmp(type, "file", 4) == 0) { state = 1; blkif->blk_backend_type = BLKIF_BACKEND_FILE; } if (strncmp(type, "phy", 3) == 0) { struct block_device *bdev = be->blkif->vbd.bdev; struct request_queue *q = bdev_get_queue(bdev); if (blk_queue_discard(q)) { err = xenbus_printf(xbt, dev->nodename, "discard-granularity", "%u", q->limits.discard_granularity); if (err) { xenbus_dev_fatal(dev, err, "writing discard-granularity"); goto kfree; } err = xenbus_printf(xbt, dev->nodename, "discard-alignment", "%u", q->limits.discard_alignment); if (err) { xenbus_dev_fatal(dev, err, "writing discard-alignment"); goto kfree; } state = 1; blkif->blk_backend_type = BLKIF_BACKEND_PHY; } } } else { err = PTR_ERR(type); xenbus_dev_fatal(dev, err, "reading type"); goto out; } err = xenbus_printf(xbt, dev->nodename, "feature-discard", "%d", state); if (err) xenbus_dev_fatal(dev, err, "writing feature-discard"); kfree: kfree(type); out: return err; } int xen_blkbk_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state) { struct xenbus_device *dev = be->dev; int err; err = xenbus_printf(xbt, dev->nodename, "feature-barrier", "%d", state); if (err) xenbus_dev_fatal(dev, err, "writing feature-barrier"); return err; } /* * Entry point to this code when a new device is created. Allocate the basic * structures, and watch the store waiting for the hotplug scripts to tell us Loading Loading @@ -590,7 +661,7 @@ static void frontend_changed(struct xenbus_device *dev, /* * Enforce precondition before potential leak point. * blkif_disconnect() is idempotent. * xen_blkif_disconnect() is idempotent. */ xen_blkif_disconnect(be->blkif); Loading @@ -611,7 +682,7 @@ static void frontend_changed(struct xenbus_device *dev, break; /* fall through if not online */ case XenbusStateUnknown: /* implies blkif_disconnect() via blkback_remove() */ /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ device_unregister(&dev->dev); break; Loading Loading @@ -650,6 +721,11 @@ static void connect(struct backend_info *be) if (err) goto abort; err = xen_blkbk_discard(xbt, be); /* If we can't advertise it is OK. */ err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); if (err) { Loading
drivers/block/xen-blkfront.c +98 −25 Original line number Diff line number Diff line Loading @@ -98,6 +98,9 @@ struct blkfront_info unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; unsigned int feature_discard; unsigned int discard_granularity; unsigned int discard_alignment; int is_ready; }; Loading Loading @@ -302,6 +305,12 @@ static int blkif_queue_request(struct request *req) ring_req->operation = info->flush_op; } if (unlikely(req->cmd_flags & REQ_DISCARD)) { /* id, sector_number and handle are set above. */ ring_req->operation = BLKIF_OP_DISCARD; ring_req->nr_segments = 0; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); } else { ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); Loading @@ -326,6 +335,7 @@ static int blkif_queue_request(struct request *req) .first_sect = fsect, .last_sect = lsect }; } } info->ring.req_prod_pvt++; Loading Loading @@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq) blk_start_request(req); if (req->cmd_type != REQ_TYPE_FS) { if ((req->cmd_type != REQ_TYPE_FS) || ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && !info->flush_op)) { __blk_end_request_all(req, -EIO); continue; } Loading Loading @@ -399,6 +411,7 @@ static void do_blkif_request(struct request_queue *rq) static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) { struct request_queue *rq; struct blkfront_info *info = gd->private_data; rq = blk_init_queue(do_blkif_request, &blkif_io_lock); if (rq == NULL) Loading @@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); blk_queue_max_discard_sectors(rq, get_capacity(gd)); rq->limits.discard_granularity = info->discard_granularity; rq->limits.discard_alignment = info->discard_alignment; } /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); blk_queue_max_hw_sectors(rq, 512); Loading Loading @@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; printk(KERN_WARNING "blkfront: %s: discard op failed\n", info->gd->disk_name); error = -EOPNOTSUPP; info->feature_discard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); } __blk_end_request_all(req, error); break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { Loading Loading @@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info) bdput(bdev); } static void blkfront_setup_discard(struct blkfront_info *info) { int err; char *type; unsigned int discard_granularity; unsigned int discard_alignment; type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); if (IS_ERR(type)) return; if (strncmp(type, "phy", 3) == 0) { err = xenbus_gather(XBT_NIL, info->xbdev->otherend, "discard-granularity", "%u", &discard_granularity, "discard-alignment", "%u", &discard_alignment, NULL); if (!err) { info->feature_discard = 1; info->discard_granularity = discard_granularity; info->discard_alignment = discard_alignment; } } else if (strncmp(type, "file", 4) == 0) info->feature_discard = 1; kfree(type); } /* * Invoked when the backend is finally 'ready' (and has told produced * the details about the physical device - #sectors, size, etc). Loading @@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int binfo; int err; int barrier, flush; int barrier, flush, discard; switch (info->connected) { case BLKIF_STATE_CONNECTED: Loading Loading @@ -1179,6 +1237,13 @@ static void blkfront_connect(struct blkfront_info *info) info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; } err = xenbus_gather(XBT_NIL, info->xbdev->otherend, "feature-discard", "%d", &discard, NULL); if (!err && discard) blkfront_setup_discard(info); err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", Loading Loading @@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = { static int __init xlblk_init(void) { int ret; if (!xen_domain()) return -ENODEV; Loading @@ -1394,7 +1461,13 @@ static int __init xlblk_init(void) return -ENODEV; } return xenbus_register_frontend(&blkfront); ret = xenbus_register_frontend(&blkfront); if (ret) { unregister_blkdev(XENVBD_MAJOR, DEV_NAME); return ret; } return 0; } module_init(xlblk_init); Loading
include/xen/interface/io/blkif.h +36 −0 Original line number Diff line number Diff line Loading @@ -57,6 +57,36 @@ typedef uint64_t blkif_sector_t; * "feature-flush-cache" node! */ #define BLKIF_OP_FLUSH_DISKCACHE 3 /* * Recognised only if "feature-discard" is present in backend xenbus info. * The "feature-discard" node contains a boolean indicating whether trim * (ATA) or unmap (SCSI) - conviently called discard requests are likely * to succeed or fail. Either way, a discard request * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by * the underlying block-device hardware. The boolean simply indicates whether * or not it is worthwhile for the frontend to attempt discard requests. * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* * create the "feature-discard" node! * * Discard operation is a request for the underlying block device to mark * extents to be erased. However, discard does not guarantee that the blocks * will be erased from the device - it is just a hint to the device * controller that these blocks are no longer in use. What the device * controller does with that information is left to the controller. * Discard operations are passed with sector_number as the * sector index to begin discard operations at and nr_sectors as the number of * sectors to be discarded. The specified sectors should be discarded if the * underlying block device supports trim (ATA) or unmap (SCSI) operations, * or a BLKIF_RSP_EOPNOTSUPP should be returned. * More information about trim/unmap operations at: * http://t13.org/Documents/UploadedDocuments/docs2008/ * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc * http://www.seagate.com/staticfiles/support/disc/manuals/ * Interface%20manuals/100293068c.pdf */ #define BLKIF_OP_DISCARD 5 /* * Maximum scatter/gather segments per request. * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. Loading @@ -74,6 +104,11 @@ struct blkif_request_rw { } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; struct blkif_request_discard { blkif_sector_t sector_number; uint64_t nr_sectors; }; struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ Loading @@ -81,6 +116,7 @@ struct blkif_request { uint64_t id; /* private guest value, echoed in resp */ union { struct blkif_request_rw rw; struct blkif_request_discard discard; } u; }; Loading