Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8d0ff392 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'stable/for-jens-3.8' of...

Merge branch 'stable/for-jens-3.8' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen into for-3.8/drivers
parents f1d6a328 cb5bd4d1
Loading
Loading
Loading
Loading
+270 −25
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/bitmap.h>

#include <xen/events.h>
#include <xen/page.h>
@@ -79,6 +80,7 @@ struct pending_req {
	unsigned short		operation;
	int			status;
	struct list_head	free_list;
	DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
};

#define BLKBACK_INVALID_HANDLE (~0)
@@ -98,6 +100,36 @@ struct xen_blkbk {

static struct xen_blkbk *blkbk;

/*
 * Maximum number of grant pages that can be mapped in blkback.
 * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
 * pages that blkback will persistently map.
 * Currently, this is:
 * RING_SIZE = 32 (for all known ring types)
 * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
 * sizeof(struct persistent_gnt) = 48
 * So the maximum memory used to store the grants is:
 * 32 * 11 * 48 = 16896 bytes
 */
static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
{
	switch (protocol) {
	case BLKIF_PROTOCOL_NATIVE:
		return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
	case BLKIF_PROTOCOL_X86_32:
		return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
	case BLKIF_PROTOCOL_X86_64:
		return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
	default:
		BUG();
	}
	return 0;
}


/*
 * Little helpful macro to figure out the index and virtual address of the
 * pending_pages[..]. For each 'pending_req' we have have up to
@@ -129,6 +161,57 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
static void make_response(struct xen_blkif *blkif, u64 id,
			  unsigned short op, int st);

#define foreach_grant(pos, rbtree, node) \
	for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
	     &(pos)->node != NULL; \
	     (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))


static void add_persistent_gnt(struct rb_root *root,
			       struct persistent_gnt *persistent_gnt)
{
	struct rb_node **new = &(root->rb_node), *parent = NULL;
	struct persistent_gnt *this;

	/* Figure out where to put new node */
	while (*new) {
		this = container_of(*new, struct persistent_gnt, node);

		parent = *new;
		if (persistent_gnt->gnt < this->gnt)
			new = &((*new)->rb_left);
		else if (persistent_gnt->gnt > this->gnt)
			new = &((*new)->rb_right);
		else {
			pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
			BUG();
		}
	}

	/* Add new node and rebalance tree. */
	rb_link_node(&(persistent_gnt->node), parent, new);
	rb_insert_color(&(persistent_gnt->node), root);
}

static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
						 grant_ref_t gref)
{
	struct persistent_gnt *data;
	struct rb_node *node = root->rb_node;

	while (node) {
		data = container_of(node, struct persistent_gnt, node);

		if (gref < data->gnt)
			node = node->rb_left;
		else if (gref > data->gnt)
			node = node->rb_right;
		else
			return data;
	}
	return NULL;
}

/*
 * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
 */
@@ -275,6 +358,11 @@ int xen_blkif_schedule(void *arg)
{
	struct xen_blkif *blkif = arg;
	struct xen_vbd *vbd = &blkif->vbd;
	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
	struct persistent_gnt *persistent_gnt;
	int ret = 0;
	int segs_to_unmap = 0;

	xen_blkif_get(blkif);

@@ -302,6 +390,36 @@ int xen_blkif_schedule(void *arg)
			print_stats(blkif);
	}

	/* Free all persistent grant pages */
	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) {
		foreach_grant(persistent_gnt, &blkif->persistent_gnts, node) {
			BUG_ON(persistent_gnt->handle ==
				BLKBACK_INVALID_HANDLE);
			gnttab_set_unmap_op(&unmap[segs_to_unmap],
			    (unsigned long) pfn_to_kaddr(page_to_pfn(
				persistent_gnt->page)),
			    GNTMAP_host_map,
			    persistent_gnt->handle);

			pages[segs_to_unmap] = persistent_gnt->page;
			rb_erase(&persistent_gnt->node,
				&blkif->persistent_gnts);
			kfree(persistent_gnt);
			blkif->persistent_gnt_c--;

			if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
				!rb_next(&persistent_gnt->node)) {
				ret = gnttab_unmap_refs(unmap, NULL, pages,
							segs_to_unmap);
				BUG_ON(ret);
				segs_to_unmap = 0;
			}
		}
	}

	BUG_ON(blkif->persistent_gnt_c != 0);
	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));

	if (log_stats)
		print_stats(blkif);

@@ -328,6 +446,8 @@ static void xen_blkbk_unmap(struct pending_req *req)
	int ret;

	for (i = 0; i < req->nr_pages; i++) {
		if (!test_bit(i, req->unmap_seg))
			continue;
		handle = pending_handle(req, i);
		if (handle == BLKBACK_INVALID_HANDLE)
			continue;
@@ -344,12 +464,26 @@ static void xen_blkbk_unmap(struct pending_req *req)

static int xen_blkbk_map(struct blkif_request *req,
			 struct pending_req *pending_req,
			 struct seg_buf seg[])
			 struct seg_buf seg[],
			 struct page *pages[])
{
	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
	int i;
	struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
	struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
	struct persistent_gnt *persistent_gnt = NULL;
	struct xen_blkif *blkif = pending_req->blkif;
	phys_addr_t addr = 0;
	int i, j;
	bool new_map;
	int nseg = req->u.rw.nr_segments;
	int segs_to_map = 0;
	int ret = 0;
	int use_persistent_gnts;

	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);

	BUG_ON(blkif->persistent_gnt_c >
		   max_mapped_grant_pages(pending_req->blkif->blk_protocol));

	/*
	 * Fill out preq.nr_sects with proper amount of sectors, and setup
@@ -359,37 +493,147 @@ static int xen_blkbk_map(struct blkif_request *req,
	for (i = 0; i < nseg; i++) {
		uint32_t flags;

		if (use_persistent_gnts)
			persistent_gnt = get_persistent_gnt(
				&blkif->persistent_gnts,
				req->u.rw.seg[i].gref);

		if (persistent_gnt) {
			/*
			 * We are using persistent grants and
			 * the grant is already mapped
			 */
			new_map = false;
		} else if (use_persistent_gnts &&
			   blkif->persistent_gnt_c <
			   max_mapped_grant_pages(blkif->blk_protocol)) {
			/*
			 * We are using persistent grants, the grant is
			 * not mapped but we have room for it
			 */
			new_map = true;
			persistent_gnt = kmalloc(
				sizeof(struct persistent_gnt),
				GFP_KERNEL);
			if (!persistent_gnt)
				return -ENOMEM;
			persistent_gnt->page = alloc_page(GFP_KERNEL);
			if (!persistent_gnt->page) {
				kfree(persistent_gnt);
				return -ENOMEM;
			}
			persistent_gnt->gnt = req->u.rw.seg[i].gref;
			persistent_gnt->handle = BLKBACK_INVALID_HANDLE;

			pages_to_gnt[segs_to_map] =
				persistent_gnt->page;
			addr = (unsigned long) pfn_to_kaddr(
				page_to_pfn(persistent_gnt->page));

			add_persistent_gnt(&blkif->persistent_gnts,
				persistent_gnt);
			blkif->persistent_gnt_c++;
			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
				 persistent_gnt->gnt, blkif->persistent_gnt_c,
				 max_mapped_grant_pages(blkif->blk_protocol));
		} else {
			/*
			 * We are either using persistent grants and
			 * hit the maximum limit of grants mapped,
			 * or we are not using persistent grants.
			 */
			if (use_persistent_gnts &&
				!blkif->vbd.overflow_max_grants) {
				blkif->vbd.overflow_max_grants = 1;
				pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
					 blkif->domid, blkif->vbd.handle);
			}
			new_map = true;
			pages[i] = blkbk->pending_page(pending_req, i);
			addr = vaddr(pending_req, i);
			pages_to_gnt[segs_to_map] =
				blkbk->pending_page(pending_req, i);
		}

		if (persistent_gnt) {
			pages[i] = persistent_gnt->page;
			persistent_gnts[i] = persistent_gnt;
		} else {
			persistent_gnts[i] = NULL;
		}

		if (new_map) {
			flags = GNTMAP_host_map;
		if (pending_req->operation != BLKIF_OP_READ)
			if (!persistent_gnt &&
			    (pending_req->operation != BLKIF_OP_READ))
				flags |= GNTMAP_readonly;
		gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
				  req->u.rw.seg[i].gref,
				  pending_req->blkif->domid);
			gnttab_set_map_op(&map[segs_to_map++], addr,
					  flags, req->u.rw.seg[i].gref,
					  blkif->domid);
		}
	}

	ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
	if (segs_to_map) {
		ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
		BUG_ON(ret);
	}

	/*
	 * Now swizzle the MFN in our domain with the MFN from the other domain
	 * so that when we access vaddr(pending_req,i) it has the contents of
	 * the page from the other domain.
	 */
	for (i = 0; i < nseg; i++) {
		if (unlikely(map[i].status != 0)) {
	bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
	for (i = 0, j = 0; i < nseg; i++) {
		if (!persistent_gnts[i] ||
		    persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
			/* This is a newly mapped grant */
			BUG_ON(j >= segs_to_map);
			if (unlikely(map[j].status != 0)) {
				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
			map[i].handle = BLKBACK_INVALID_HANDLE;
				map[j].handle = BLKBACK_INVALID_HANDLE;
				ret |= 1;
				if (persistent_gnts[i]) {
					rb_erase(&persistent_gnts[i]->node,
						 &blkif->persistent_gnts);
					blkif->persistent_gnt_c--;
					kfree(persistent_gnts[i]);
					persistent_gnts[i] = NULL;
				}

		pending_handle(pending_req, i) = map[i].handle;
			}
		}
		if (persistent_gnts[i]) {
			if (persistent_gnts[i]->handle ==
			    BLKBACK_INVALID_HANDLE) {
				/*
				 * If this is a new persistent grant
				 * save the handler
				 */
				persistent_gnts[i]->handle = map[j].handle;
				persistent_gnts[i]->dev_bus_addr =
					map[j++].dev_bus_addr;
			}
			pending_handle(pending_req, i) =
				persistent_gnts[i]->handle;

			if (ret)
				continue;

		seg[i].buf  = map[i].dev_bus_addr |
			seg[i].buf = persistent_gnts[i]->dev_bus_addr |
				(req->u.rw.seg[i].first_sect << 9);
		} else {
			pending_handle(pending_req, i) = map[j].handle;
			bitmap_set(pending_req->unmap_seg, i, 1);

			if (ret) {
				j++;
				continue;
			}

			seg[i].buf = map[j++].dev_bus_addr |
				(req->u.rw.seg[i].first_sect << 9);
		}
	}
	return ret;
}

@@ -591,6 +835,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
	int operation;
	struct blk_plug plug;
	bool drain = false;
	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];

	switch (req->operation) {
	case BLKIF_OP_READ:
@@ -677,7 +922,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
	 * the hypercall to unmap the grants - that is all done in
	 * xen_blkbk_unmap.
	 */
	if (xen_blkbk_map(req, pending_req, seg))
	if (xen_blkbk_map(req, pending_req, seg, pages))
		goto fail_flush;

	/*
@@ -689,7 +934,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
	for (i = 0; i < nseg; i++) {
		while ((bio == NULL) ||
		       (bio_add_page(bio,
				     blkbk->pending_page(pending_req, i),
				     pages[i],
				     seg[i].nsec << 9,
				     seg[i].buf & ~PAGE_MASK) == 0)) {

+17 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/io.h>
#include <linux/rbtree.h>
#include <asm/setup.h>
#include <asm/pgalloc.h>
#include <asm/hypervisor.h>
@@ -160,10 +161,22 @@ struct xen_vbd {
	sector_t		size;
	bool			flush_support;
	bool			discard_secure;

	unsigned int		feature_gnt_persistent:1;
	unsigned int		overflow_max_grants:1;
};

struct backend_info;


struct persistent_gnt {
	struct page *page;
	grant_ref_t gnt;
	grant_handle_t handle;
	uint64_t dev_bus_addr;
	struct rb_node node;
};

struct xen_blkif {
	/* Unique identifier for this interface. */
	domid_t			domid;
@@ -190,6 +203,10 @@ struct xen_blkif {
	struct task_struct	*xenblkd;
	unsigned int		waiting_reqs;

	/* tree to store persistent grants */
	struct rb_root		persistent_gnts;
	unsigned int		persistent_gnt_c;

	/* statistics */
	unsigned long		st_print;
	int			st_rd_req;
+21 −2
Original line number Diff line number Diff line
@@ -118,6 +118,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
	atomic_set(&blkif->drain, 0);
	blkif->st_print = jiffies;
	init_waitqueue_head(&blkif->waiting_to_free);
	blkif->persistent_gnts.rb_node = NULL;

	return blkif;
}
@@ -673,6 +674,13 @@ again:

	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);

	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
				 dev->nodename);
		goto abort;
	}

	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
			    (unsigned long long)vbd_sz(&be->blkif->vbd));
	if (err) {
@@ -721,6 +729,7 @@ static int connect_ring(struct backend_info *be)
	struct xenbus_device *dev = be->dev;
	unsigned long ring_ref;
	unsigned int evtchn;
	unsigned int pers_grants;
	char protocol[64] = "";
	int err;

@@ -750,8 +759,18 @@ static int connect_ring(struct backend_info *be)
		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
		return -1;
	}
	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
		ring_ref, evtchn, be->blkif->blk_protocol, protocol);
	err = xenbus_gather(XBT_NIL, dev->otherend,
			    "feature-persistent", "%u",
			    &pers_grants, NULL);
	if (err)
		pers_grants = 0;

	be->blkif->vbd.feature_gnt_persistent = pers_grants;
	be->blkif->vbd.overflow_max_grants = 0;

	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
		ring_ref, evtchn, be->blkif->blk_protocol, protocol,
		pers_grants ? "persistent grants" : "");

	/* Map the shared frame, irq etc. */
	err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+170 −28
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
#include <linux/llist.h>

#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -64,10 +65,17 @@ enum blkif_state {
	BLKIF_STATE_SUSPENDED,
};

struct grant {
	grant_ref_t gref;
	unsigned long pfn;
	struct llist_node node;
};

struct blk_shadow {
	struct blkif_request req;
	struct request *request;
	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
	struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};

static DEFINE_MUTEX(blkfront_mutex);
@@ -97,6 +105,8 @@ struct blkfront_info
	struct work_struct work;
	struct gnttab_free_callback callback;
	struct blk_shadow shadow[BLK_RING_SIZE];
	struct llist_head persistent_gnts;
	unsigned int persistent_gnts_c;
	unsigned long shadow_free;
	unsigned int feature_flush;
	unsigned int flush_op;
@@ -104,6 +114,7 @@ struct blkfront_info
	unsigned int feature_secdiscard:1;
	unsigned int discard_granularity;
	unsigned int discard_alignment;
	unsigned int feature_persistent:1;
	int is_ready;
};

@@ -287,14 +298,27 @@ static int blkif_queue_request(struct request *req)
	unsigned long id;
	unsigned int fsect, lsect;
	int i, ref;

	/*
	 * Used to store if we are able to queue the request by just using
	 * existing persistent grants, or if we have to get new grants,
	 * as there are not sufficiently many free.
	 */
	bool new_persistent_gnts;
	grant_ref_t gref_head;
	struct page *granted_page;
	struct grant *gnt_list_entry = NULL;
	struct scatterlist *sg;

	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
		return 1;

	/* Check if we have enought grants to allocate a requests */
	if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
		new_persistent_gnts = 1;
		if (gnttab_alloc_grant_references(
		BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
		    BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
		    &gref_head) < 0) {
			gnttab_request_free_callback(
				&info->callback,
				blkif_restart_queue_callback,
@@ -302,6 +326,8 @@ static int blkif_queue_request(struct request *req)
				BLKIF_MAX_SEGMENTS_PER_REQUEST);
			return 1;
		}
	} else
		new_persistent_gnts = 0;

	/* Fill out a communications ring structure. */
	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
@@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req)
		       BLKIF_MAX_SEGMENTS_PER_REQUEST);

		for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
			buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
			fsect = sg->offset >> 9;
			lsect = fsect + (sg->length >> 9) - 1;
			/* install a grant reference. */

			if (info->persistent_gnts_c) {
				BUG_ON(llist_empty(&info->persistent_gnts));
				gnt_list_entry = llist_entry(
					llist_del_first(&info->persistent_gnts),
					struct grant, node);

				ref = gnt_list_entry->gref;
				buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
				info->persistent_gnts_c--;
			} else {
				ref = gnttab_claim_grant_reference(&gref_head);
				BUG_ON(ref == -ENOSPC);

			gnttab_grant_foreign_access_ref(
					ref,
				gnt_list_entry =
					kmalloc(sizeof(struct grant),
							 GFP_ATOMIC);
				if (!gnt_list_entry)
					return -ENOMEM;

				granted_page = alloc_page(GFP_ATOMIC);
				if (!granted_page) {
					kfree(gnt_list_entry);
					return -ENOMEM;
				}

				gnt_list_entry->pfn =
					page_to_pfn(granted_page);
				gnt_list_entry->gref = ref;

				buffer_mfn = pfn_to_mfn(page_to_pfn(
								granted_page));
				gnttab_grant_foreign_access_ref(ref,
					info->xbdev->otherend_id,
					buffer_mfn,
					rq_data_dir(req));
					buffer_mfn, 0);
			}

			info->shadow[id].grants_used[i] = gnt_list_entry;

			if (rq_data_dir(req)) {
				char *bvec_data;
				void *shared_data;

				BUG_ON(sg->offset + sg->length > PAGE_SIZE);

				shared_data = kmap_atomic(
					pfn_to_page(gnt_list_entry->pfn));
				bvec_data = kmap_atomic(sg_page(sg));

				/*
				 * this does not wipe data stored outside the
				 * range sg->offset..sg->offset+sg->length.
				 * Therefore, blkback *could* see data from
				 * previous requests. This is OK as long as
				 * persistent grants are shared with just one
				 * domain. It may need refactoring if this
				 * changes
				 */
				memcpy(shared_data + sg->offset,
				       bvec_data   + sg->offset,
				       sg->length);

				kunmap_atomic(bvec_data);
				kunmap_atomic(shared_data);
			}

			info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
			ring_req->u.rw.seg[i] =
@@ -368,6 +449,7 @@ static int blkif_queue_request(struct request *req)
	/* Keep a private copy so we can reissue requests when recovering. */
	info->shadow[id].req = *ring_req;

	if (new_persistent_gnts)
		gnttab_free_grant_references(gref_head);

	return 0;
@@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
static void xlvbd_flush(struct blkfront_info *info)
{
	blk_queue_flush(info->rq, info->feature_flush);
	printk(KERN_INFO "blkfront: %s: %s: %s\n",
	printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
	       info->gd->disk_name,
	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
		"flush diskcache" : "barrier or flush"),
	       info->feature_flush ? "enabled" : "disabled");
	       info->feature_flush ? "enabled" : "disabled",
	       info->feature_persistent ? "using persistent grants" : "");
}

static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work)

static void blkif_free(struct blkfront_info *info, int suspend)
{
	struct llist_node *all_gnts;
	struct grant *persistent_gnt;

	/* Prevent new requests being issued until we fix things up. */
	spin_lock_irq(&info->io_lock);
	info->connected = suspend ?
@@ -714,6 +800,17 @@ static void blkif_free(struct blkfront_info *info, int suspend)
	/* No more blkif_request(). */
	if (info->rq)
		blk_stop_queue(info->rq);

	/* Remove all persistent grants */
	if (info->persistent_gnts_c) {
		all_gnts = llist_del_all(&info->persistent_gnts);
		llist_for_each_entry(persistent_gnt, all_gnts, node) {
			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
			kfree(persistent_gnt);
		}
		info->persistent_gnts_c = 0;
	}

	/* No more gnttab callback work. */
	gnttab_cancel_free_callback(&info->callback);
	spin_unlock_irq(&info->io_lock);
@@ -734,13 +831,43 @@ static void blkif_free(struct blkfront_info *info, int suspend)

}

static void blkif_completion(struct blk_shadow *s)
static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
			     struct blkif_response *bret)
{
	int i;
	/* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
	 * flag. */
	for (i = 0; i < s->req.u.rw.nr_segments; i++)
		gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
	struct bio_vec *bvec;
	struct req_iterator iter;
	unsigned long flags;
	char *bvec_data;
	void *shared_data;
	unsigned int offset = 0;

	if (bret->operation == BLKIF_OP_READ) {
		/*
		 * Copy the data received from the backend into the bvec.
		 * Since bv_offset can be different than 0, and bv_len different
		 * than PAGE_SIZE, we have to keep track of the current offset,
		 * to be sure we are copying the data from the right shared page.
		 */
		rq_for_each_segment(bvec, s->request, iter) {
			BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
			i = offset >> PAGE_SHIFT;
			BUG_ON(i >= s->req.u.rw.nr_segments);
			shared_data = kmap_atomic(
				pfn_to_page(s->grants_used[i]->pfn));
			bvec_data = bvec_kmap_irq(bvec, &flags);
			memcpy(bvec_data, shared_data + bvec->bv_offset,
				bvec->bv_len);
			bvec_kunmap_irq(bvec_data, &flags);
			kunmap_atomic(shared_data);
			offset += bvec->bv_len;
		}
	}
	/* Add the persistent grant into the list of free grants */
	for (i = 0; i < s->req.u.rw.nr_segments; i++) {
		llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
		info->persistent_gnts_c++;
	}
}

static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -783,7 +910,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
		req  = info->shadow[id].request;

		if (bret->operation != BLKIF_OP_DISCARD)
			blkif_completion(&info->shadow[id]);
			blkif_completion(&info->shadow[id], info, bret);

		if (add_id_to_freelist(info, id)) {
			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
@@ -942,6 +1069,11 @@ again:
		message = "writing protocol";
		goto abort_transaction;
	}
	err = xenbus_printf(xbt, dev->nodename,
			    "feature-persistent", "%u", 1);
	if (err)
		dev_warn(&dev->dev,
			 "writing persistent grants feature to xenbus");

	err = xenbus_transaction_end(xbt, 0);
	if (err) {
@@ -1029,6 +1161,8 @@ static int blkfront_probe(struct xenbus_device *dev,
	spin_lock_init(&info->io_lock);
	info->xbdev = dev;
	info->vdevice = vdevice;
	init_llist_head(&info->persistent_gnts);
	info->persistent_gnts_c = 0;
	info->connected = BLKIF_STATE_DISCONNECTED;
	INIT_WORK(&info->work, blkif_restart_queue);

@@ -1093,7 +1227,7 @@ static int blkif_recover(struct blkfront_info *info)
					req->u.rw.seg[j].gref,
					info->xbdev->otherend_id,
					pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
					rq_data_dir(info->shadow[req->u.rw.id].request));
					0);
		}
		info->shadow[req->u.rw.id].req = *req;

@@ -1225,7 +1359,7 @@ static void blkfront_connect(struct blkfront_info *info)
	unsigned long sector_size;
	unsigned int binfo;
	int err;
	int barrier, flush, discard;
	int barrier, flush, discard, persistent;

	switch (info->connected) {
	case BLKIF_STATE_CONNECTED:
@@ -1303,6 +1437,14 @@ static void blkfront_connect(struct blkfront_info *info)
	if (!err && discard)
		blkfront_setup_discard(info);

	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
			    "feature-persistent", "%u", &persistent,
			    NULL);
	if (err)
		info->feature_persistent = 0;
	else
		info->feature_persistent = persistent;

	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
	if (err) {
		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",